You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/08/15 16:15:14 UTC

[01/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Repository: incubator-singa
Updated Branches:
  refs/heads/dev 1db278417 -> 0a7642576


SINGA-237 New documentation files for SINGA v1.0

1. copy the 'examples' folder into docs/ to generate htmls files using the README.md files
2. add software_stack.md to describe the major data structures of v1.0
3. add device.rst to introduce the Device APIs


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/3299b0c2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/3299b0c2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/3299b0c2

Branch: refs/heads/dev
Commit: 3299b0c29e62f61dc7f74918fb01a0fa2c93a4a4
Parents: 6b2ff3c
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Aug 11 23:13:12 2016 +0800
Committer: wangwei <wa...@gmail.com>
Committed: Fri Aug 12 00:04:20 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                      |   1 +
 doc/_static/images/singav1-sw.png | Bin 0 -> 24326 bytes
 doc/conf.py                       |   9 +--
 doc/docs.rst                      |   6 +-
 doc/docs/device.rst               |  47 ++++++++++++++++
 doc/docs/examples.rst             |   6 ++
 doc/docs/index.rst                |  15 ++---
 doc/docs/jp/index.md              |  23 --------
 doc/docs/kr/index.md              |  23 --------
 doc/docs/software_stack.md        |  99 +++++++++++++++++++++++++++++++++
 doc/docs/tensor.md                |   7 +++
 doc/docs/zh/index.md              |  10 ++--
 12 files changed, 177 insertions(+), 69 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/Makefile
----------------------------------------------------------------------
diff --git a/doc/Makefile b/doc/Makefile
index 62a2236..c6eddf1 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -50,6 +50,7 @@ clean:
 
 .PHONY: html
 html:
+	cp -rf ../examples docs/
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/_static/images/singav1-sw.png
----------------------------------------------------------------------
diff --git a/doc/_static/images/singav1-sw.png b/doc/_static/images/singav1-sw.png
new file mode 100644
index 0000000..e443c6e
Binary files /dev/null and b/doc/_static/images/singav1-sw.png differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 86dc031..20ba51a 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -16,9 +16,10 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(1, '../src/python/singa/')
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser
@@ -35,7 +36,7 @@ source_parsers = {
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-   
+'sphinx.ext.autodoc'
 ]
 
 # Add any paths that contain templates here, relative to this directory.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs.rst
----------------------------------------------------------------------
diff --git a/doc/docs.rst b/doc/docs.rst
index 2ebea60..400b12a 100644
--- a/doc/docs.rst
+++ b/doc/docs.rst
@@ -2,7 +2,5 @@ Documentation
 =============
 
 .. toctree::
-	docs/index
- 	docs/zh/index
-	docs/jp/index
-	docs/kr/index
+   docs/index
+   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
new file mode 100644
index 0000000..aa5defb
--- /dev/null
+++ b/doc/docs/device.rst
@@ -0,0 +1,47 @@
+Device
+=======
+
+
+The Device abstract represent a hardware device with memory and compuation units.
+
+Specific devices
+----------------
+Currently, SINGA has three Device implmentations,
+
+1. CudaGPU for an Nvidia GPU card which runs Cuda code
+2. CppCPU for a CPU which runs Cpp code
+3. OpenclGPU for a GPU card which runs OpenCL code
+
+
+Create devices
+---------------
+
+Python API
+~~~~~~~~~~
+
+.. autofunction:: device.create_cuda_gpus
+
+.. autofunction:: device.create_cuda_gpus_on
+
+.. autofunction:: device.create_cuda_gpu_on
+
+.. autofunction:: device.get_default_device
+
+
+The following code shows how to create devices,
+
+.. code:: python
+
+   from singa import device
+   cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
+   host = device.get_default_device()  # get the default host device (a CppCPU)
+   ary1 = device.create_cuda_gpus(2)  # create 2 devices, starting from ID 0
+   ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
+
+
+
+CPP API
+~~~~~~~
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
new file mode 100644
index 0000000..b0b2af8
--- /dev/null
+++ b/doc/docs/examples.rst
@@ -0,0 +1,6 @@
+Examples
+========
+
+.. toctree::
+
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index a6a1b49..8a74976 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -2,15 +2,8 @@ English
 =======
 
 .. toctree::
-	overview
-        installation
-	quick-start
-        programming-guide
-        distributed-training
-        data
-        checkpoint
-        python
-        test
-        gpu
-        examples
 
+   installation
+   software_stack
+   device
+   examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/jp/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/jp/index.md b/doc/docs/jp/index.md
deleted file mode 100644
index 6679198..0000000
--- a/doc/docs/jp/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \u6700\u65b0\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8
-
----
-
-* [\u30a4\u30f3\u30c8\u30ed\u30c0\u30af\u30b7\u30e7\u30f3](overview.html)
-* [\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb](installation.html)
-* [\u30af\u30a4\u30c3\u30af\u30b9\u30bf\u30fc\u30c8](quick-start.html)
-* [\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0 \u30ac\u30a4\u30c9](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\u5206\u6563 \u30c8\u30ec\u30fc\u30cb\u30f3\u30b0](distributed-training.html)
-* [\u30c7\u30fc\u30bf\u306e\u6e96\u5099](data.html)
-* [Checkpoint \u3068 Resume](checkpoint.html)
-* [\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u30c6\u30b9\u30c8 \u3068 \u7279\u5fb4\u62bd\u51fa](test.html)
-* [\u30b5\u30f3\u30d7\u30eb](examples.html)
-    * Feed-forward \u30e2\u30c7\u30eb
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/kr/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/kr/index.md b/doc/docs/kr/index.md
deleted file mode 100644
index 990d5d9..0000000
--- a/doc/docs/kr/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \ucd5c\uc2e0 \ubb38\uc11c
-
----
-
-* [\uac1c\uc694](overview.html)
-* [\uc778\uc2a4\ud1a8](installation.html)
-* [\ud035 \uc2a4\ud0c0\ud2b8](quick-start.html)
-* [\ud504\ub85c\uadf8\ub798\ubc0d \uac00\uc774\ub4dc](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\ubd84\uc0b0 \ud2b8\ub808\uc774\ub2dd](distributed-training.html)
-* [\ub370\uc774\ud130 \uc900\ube44](data.html)
-* [Checkpoint \uc640 Resume](checkpoint.html)
-* [\uc131\ub2a5\ud14c\uc2a4\ud2b8 \ubc0f \ud2b9\uc9d5\ucd94\ucd9c](test.html)
-* [\uc0d8\ud50c](examples.html)
-    * Feed-forward \ubaa8\ub378
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/software_stack.md
----------------------------------------------------------------------
diff --git a/doc/docs/software_stack.md b/doc/docs/software_stack.md
new file mode 100644
index 0000000..c60b6a5
--- /dev/null
+++ b/doc/docs/software_stack.md
@@ -0,0 +1,99 @@
+# Software Stack
+
+SINGA's software stack includes three major components, namely, core, IO and
+model. Figure 1 illustrates these components together with the hardware.
+The core component provides memory management and tensor operations;
+IO has classes for reading (and writing) data from (to) disk and network; The
+model component provides data structures and algorithms for machine learning models,
+e.g., layers for neural network models, optimizers/initializer/metric/loss for
+general machine learning models.
+
+
+<img src="../_static/images/singav1-sw.png" align="center" width="500px"/>
+<br/>
+<span><strong>Figure 1 - SINGA V1 software stack.</strong></span>
+
+## Core
+
+[Tensor](tensor.html) and [Device](device.html) are two core abstractions in SINGA. Tensor class represents a
+multi-dimensional array, which stores model variables and provides linear algebra
+operations for machine learning
+algorithms, including matrix multiplication and random functions. Each tensor
+instance (i.e. a tensor) is allocated on a Device instance.
+Each Device instance (i.e. a device) is created against one hardware device,
+e.g. a GPU card or a CPU core. Devices manage the memory of tensors and execute
+tensor operations on its execution units, e.g. CPU threads or CUDA streams.
+
+Depending on the hardware and the programming language, SINGA have implemented
+the following specific device classes:
+
+* **CudaGPU** represents an Nvidia GPU card. The execution units are the CUDA streams.
+* **CppCPU** represents a normal CPU. The execution units are the CPU threads.
+* **OpenclGPU** represents normal GPU card from both Nvidia and AMD.
+  The execution units are the CommandQueues. Given that OpenCL is compatible with
+  many hardware devices, e.g. FPGA and ARM, the OpenclGPU has the potential to be
+  extended for other devices.
+
+Different types of devices use different programming languages to write the kernel
+functions for tensor operations,
+
+* CppMath (tensor_math_cpp.h) implements the tensor operations using Cpp for CppCPU
+* CudaMath (tensor_math_cuda.h) implements the tensor operations using CUDA for CudaGPU
+* OpenclMath (tensor_math_opencl.h) implements the tensor operations using OpenCL for OpenclGPU
+
+In addition, different types of data, such as float32 and float16, could be supported by adding
+the corresponding tensor functions.
+
+Typically, users would create a device instance and pass it to create multiple
+tensor instances. When users call the Tensor functions, these function would invoke
+the corresponding implementation (CppMath/CudaMath/OpenclMath) automatically. In
+other words, the implementation of Tensor operations is transparent to users.
+
+Most machine learning algorithms could be expressed using (dense or sparse) tensors.
+Therefore, with the Tensor abstraction, SINGA would be able to run a wide range of models,
+including deep learning models and other traditional machine learning models.
+
+The Tensor and Device abstractions are extensible to support a wide range of hardware device
+using different programming languages. A new hardware device would be supported by
+adding a new Device subclass and the corresponding implementation of the Tensor
+operations (xxxMath).
+
+Optimizations in terms of speed and memory could be implemented by Device, which
+manages both operation execution and memory malloc/free. More optimization details
+would be described in the [Device page](device.html).
+
+
+## Model
+
+On top of the Tensor and Device abstractions, SINGA provides some higher level
+classes for machine learning modules.
+
+* [Layer](layer.html) and its subclasses are specific for neural networks. Every layer provides
+  functions for forward propagating features and backward propagating gradients w.r.t the training loss functions.
+  They wraps the complex layer operations so that users can easily create neural nets
+  by connecting a set of layers.
+
+* [Initializer](initializer.html) and its subclasses provide variant methods of initializing
+  model parameters (stored in Tensor instances), following Uniform, Gaussian, etc.
+
+* [Loss](loss.html) and its subclasses defines the training objective loss functions.
+  Both functions of computing the loss values and computing the gradient of the prediction w.r.t the
+  objective loss are implemented. Example loss functions include squared error and cross entropy.
+
+* [Metric](metric.html) and its subclasses provide the function to measure the
+  performance of the model, e.g., the accuracy.
+
+* [Optimizer](optimizer.html) and its subclasses implement the methods for updating
+  model parameter values using parameter gradients, including SGD, AdaGrad, RMSProp etc.
+
+
+## IO
+
+The IO module consists of classes for data loading, data preprocessing and message passing.
+
+* Reader and its subclasses load string records from disk files
+* Writer and its subclasses write string records to disk files
+* Encoder and its subclasses encode Tensor instances into string records
+* Decoder and its subclasses decodes string records into Tensor instances
+* Endpoint represents a communication endpoint which provides functions for passing messages to each other.
+* Message represents communication messages between Endpoint instances. It carries both meta data and payload.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/tensor.md
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.md b/doc/docs/tensor.md
new file mode 100644
index 0000000..eaf8362
--- /dev/null
+++ b/doc/docs/tensor.md
@@ -0,0 +1,7 @@
+# Tensor
+
+
+##
+
+
+##

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3299b0c2/doc/docs/zh/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/zh/index.md b/doc/docs/zh/index.md
index c44a2cf..4b49d5f 100644
--- a/doc/docs/zh/index.md
+++ b/doc/docs/zh/index.md
@@ -1,7 +1,9 @@
 SINGA \u4e2d\u6587\u6587\u6863
----
+==============
 
-* [\u7b80\u4ecb](overview.html)
-* [\u5b89\u88c5](installation_source.html)
-* [\u4f7f\u7528\u6307\u5357](programming-guide.html)
+.. toctree::
+
+   overview
+   installation_source
+   programming-guide
 


[13/22] incubator-singa git commit: Merge commits for updating layer identifers and commits for updating the installation page.

Posted by wa...@apache.org.
Merge commits for updating layer identifers and commits for updating the installation page.

Conflicts:
	examples/cifar10/vgg.py
	src/python/singa/device.py
	src/python/singa/layer.py
	src/python/singa/optimizer.py
	src/python/singa/tensor.py
	test/CMakeLists.txt


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5db7eb61
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5db7eb61
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5db7eb61

Branch: refs/heads/dev
Commit: 5db7eb61ed1e5e189aad9c4b02f85cae6fa23df9
Parents: 2c049d6 9c71bd6
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 20:04:54 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 20:04:54 2016 +0800

----------------------------------------------------------------------
 CMakeLists.txt                       |   9 +-
 cmake/Dependencies.cmake             |   7 ++
 cmake/Thirdparty/FindCUDNN.cmake     |   2 +-
 examples/char-rnn/train.py           |   2 +-
 examples/cifar10/alexnet.cc          |   2 +-
 examples/cifar10/alexnet.py          |   2 +-
 examples/cifar10/train.py            |   2 +-
 examples/cifar10/vgg-parallel.cc     |   1 +
 examples/cifar10/vgg.py              |  18 ++--
 examples/imagenet/alexnet.cc         |  50 ++++++-----
 examples/mnist/README.md             |  18 ++++
 examples/mnist/train.py              | 134 ++++++++++++++++++++++++++++++
 include/singa/core/device.h          |  15 ++--
 include/singa/core/tensor.h          |  19 +++++
 include/singa/model/layer.h          |  13 ++-
 include/singa/model/loss.h           |   1 -
 src/CMakeLists.txt                   |  10 +--
 src/core/device/cpp_cpu.cc           |   2 +-
 src/core/tensor/math_kernel.cu       |  53 +++++++++++-
 src/core/tensor/math_kernel.h        |  14 +++-
 src/core/tensor/tensor.cc            |  15 +++-
 src/core/tensor/tensor_math.h        |  24 ++++++
 src/core/tensor/tensor_math_cpp.h    |  42 ++++++++++
 src/core/tensor/tensor_math_cuda.h   |  40 ++++++++-
 src/model/layer/activation.cc        |  10 +++
 src/model/layer/batchnorm.cc         |   3 +
 src/model/layer/convolution.cc       |   1 +
 src/model/layer/cudnn_batchnorm.cc   |  16 ++--
 src/model/layer/dense.cc             |   3 +
 src/model/layer/dropout.cc           |   3 +
 src/model/layer/flatten.cc           |   3 +
 src/model/layer/lrn.cc               |   3 +
 src/model/layer/pooling.cc           |   1 +
 src/model/layer/prelu.cc             |   3 +
 src/model/layer/rnn.cc               |   3 +
 src/model/layer/softmax.cc           |   3 +
 src/python/singa/device.py           |   5 +-
 src/python/singa/layer.py            |  28 ++++---
 src/python/singa/optimizer.py        |  63 ++++++++------
 src/python/singa/tensor.py           |  45 +++++++---
 src/python/swig/core_device.i        |   4 +-
 src/python/swig/core_tensor.i        |  10 +++
 test/CMakeLists.txt                  |   1 -
 test/python/test_layer.py            |  11 +--
 test/singa/test_cpp_cpu.cc           |   2 +-
 test/singa/test_cudnn_batchnorm.cc   |  22 ++---
 test/singa/test_image_transformer.cc |  20 ++---
 test/singa/test_layer.cc             |   2 +-
 48 files changed, 602 insertions(+), 158 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5db7eb61/src/python/singa/device.py
----------------------------------------------------------------------
diff --cc src/python/singa/device.py
index eff6783,897fdf5..65824c2
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@@ -113,7 -113,7 +113,10 @@@ def create_cuda_gpu_on(device_id)
      devices = create_cuda_gpus_on([device_id])
      return devices[0]
  
 +
+ default_device = singa.Platform.GetDefaultDevice()
++
++
  def get_default_device():
      '''Get the default host device which is a CppCPU device'''
-     return singa.Platform.GetDefaultDevice()
+     return default_device

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5db7eb61/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --cc src/python/singa/optimizer.py
index 7c8cc39,86e68af..14cf3c0
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@@ -54,8 -54,8 +54,8 @@@ class Optimizer(object)
          lr (float): a constant for the learning rate, mutually exclusive with
              'lr_gen'.
          momentum (float): a constant for the momentum value
-         decay (float): the coefficent for L2 regularizer, which is mutually
 -        weight_decay (float): the coefficent for L2 regularizer, which is mutually
--            exclusive with 'regularizer'.
++        weight_decay (float): the coefficent for L2 regularizer, which is
++            mutually exclusive with 'regularizer'.
          lr_gen (function): a function returns the learning rate given
              the current training step/epoch. It is mutually exclusive with lr.
              If both are not set, the apply_with_lr function should be used for
@@@ -67,7 -67,7 +67,8 @@@
              constraint would be applied inside apply_with_lr(). Users can
              also do regularization outside.
      '''
-     def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
++
+     def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
                   regularizer=None, constraint=None):
          if lr is not None:
              assert lr_gen is None, 'Cannot set lr and lr_gen at the same time'
@@@ -108,11 -108,14 +109,14 @@@
              name (str): parameter name
              specs (ParamSpec): protobuf obj, including regularizer and
                  constraint, multipliers for learning rate and weight decay.
+ 
          '''
 -	assert type(specs) == model_pb2.ParamSpec, \
 -		'specs should be model_pb2.ParamSpec instance'
++        assert isinstance(specs, model_pb2.ParamSpec), \
++            'specs should be model_pb2.ParamSpec instance'
          if specs.HasField('regularizer'):
-             self.regularizers[name] = CppRegularizer(specs.constraint)
+             self.regularizers[name] = CppRegularizer(specs.regularizer)
          if specs.HasField('constraint'):
-             self.constraints[name] = CppConstraint(specs.regularizer)
+             self.constraints[name] = CppConstraint(specs.constraint)
          if specs.lr_mult != 1:
              self.learning_rate_multiplier[name] = specs.lr_mult
          if specs.decay_mult != 1:
@@@ -190,12 -193,13 +194,20 @@@ class SGD(Optimizer)
      See the base Optimizer for all arguments.
      '''
  
-     def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+     def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
                   regularizer=None, constraint=None):
-         super(SGD, self).__init__(lr, momentum, decay, lr_gen, regularizer,
 -        super(SGD, self).__init__(lr, momentum, weight_decay, lr_gen, regularizer,
--                                  constraint)
++        super(
++            SGD,
++            self).__init__(
++            lr,
++            momentum,
++            weight_decay,
++            lr_gen,
++            regularizer,
++         constraint)
          conf = model_pb2.OptimizerConf()
-         conf.momentum = self.momentum
+         if self.momentum is not None:
+             conf.momentum = self.momentum
          conf.type = 'sgd'
          self.opt = singa.CreateOptimizer('SGD')
          self.opt.Setup(conf.SerializeToString())
@@@ -212,12 -216,13 +224,13 @@@ class Nesterov(Optimizer)
      See the base Optimizer for all arguments.
      '''
  
-     def __init__(self, lr=None, momentum=0.9, decay=None, lr_gen=None,
+     def __init__(self, lr=None, momentum=0.9, weight_decay=None, lr_gen=None,
                   regularizer=None, constraint=None):
-         super(Nesterov, self).__init__(lr, momentum, decay, lr_gen, regularizer,
 -        super(Nesterov, self).__init__(lr, momentum, weight_decay, lr_gen, regularizer,
--                                       constraint)
++        super(Nesterov, self).__init__(lr, momentum, weight_decay, lr_gen,
++                                       regularizer, constraint)
          conf = model_pb2.OptimizerConf()
-         conf.momentum = momentum
+         if self.momentum is not None:
+             conf.momentum = momentum
          conf.type = 'nesterov'
          self.opt = singa.CreateOptimizer('Nesterov')
          self.opt.Setup(conf.SerializeToString())
@@@ -236,9 -241,9 +249,10 @@@ class AdaGrad(Optimizer)
      Args:
          epsilon (float): small number for preventing numeric error.
      '''
-     def __init__(self, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
++
+     def __init__(self, epsilon=1e-8, lr=None, weight_decay=None, lr_gen=None,
                   regularizer=None, constraint=None):
-         super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+         super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
                                        constraint)
          conf = model_pb2.OptimizerConf()
          conf.delta = epsilon
@@@ -262,9 -267,9 +276,9 @@@ class RMSProp(Optimizer)
          epsilon (float): small value for preventing numeric error
      '''
  
-     def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
 -    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, weight_decay=None, lr_gen=None,
--                 regularizer=None, constraint=None):
-         super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
++    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, weight_decay=None,
++                 lr_gen=None, regularizer=None, constraint=None):
+         super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
                                        constraint)
          conf = model_pb2.OptimizerConf()
          conf.rho = rho
@@@ -333,6 -338,6 +347,7 @@@ class CppConstraint(Constraint)
      Args:
          conf (ConstraintConf): protobuf message for the configuration.
      '''
++
      def __init__(self, conf):
          self.constraint = singa.CreateConstraint(conf.type)
          self.constraint.Setup(conf.SerializeToString())

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5db7eb61/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --cc src/python/singa/tensor.py
index 2e60554,5086fdc..6641a71
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@@ -206,7 -207,7 +207,7 @@@ class Tensor(object)
          Args:
              t (Tensor): source Tensor.
          '''
--        assert type(t) == Tensor, 't must be a singa Tensor instance'
++        assert isinstance(t, Tensor), 't must be a singa Tensor instance'
          self.singa_tensor.CopyData(t.singa_tensor)
  
      def clone(self):
@@@ -404,16 -403,28 +403,38 @@@
                                      self.singa_tensor, rhs)
  
      def __lt__(self, rhs):
-         return _call_singa_func(singa.LT_Tf, self.singa_tensor, rhs)
+         if isinstance(rhs, Tensor):
 -            return _call_singa_func(singa.LT_TT, self.singa_tensor, rhs.singa_tensor)
++            return _call_singa_func(singa.LT_TT, self.singa_tensor,
++                                    rhs.singa_tensor)
+         else:
+             return _call_singa_func(singa.LT_Tf, self.singa_tensor, rhs)
  
      def __le__(self, rhs):
-         return _call_singa_func(singa.LE_Tf, self.singa_tensor, rhs)
+         if isinstance(rhs, Tensor):
 -            return _call_singa_func(singa.LE_TT, self.singa_tensor, rhs.singa_tensor)
++            return _call_singa_func(
++                singa.LE_TT,
++                self.singa_tensor,
++                rhs.singa_tensor)
+         else:
+             return _call_singa_func(singa.LE_Tf, self.singa_tensor, rhs)
  
      def __gt__(self, rhs):
-         return _call_singa_func(singa.GT_Tf, self.singa_tensor, rhs)
+         if isinstance(rhs, Tensor):
 -            return _call_singa_func(singa.GT_TT, self.singa_tensor, rhs.singa_tensor)
++            return _call_singa_func(
++                singa.GT_TT,
++                self.singa_tensor,
++                rhs.singa_tensor)
+         else:
+             return _call_singa_func(singa.GT_Tf, self.singa_tensor, rhs)
  
      def __ge__(self, rhs):
-         return _call_singa_func(singa.GE_Tf, self.singa_tensor, rhs)
+         if isinstance(rhs, Tensor):
 -            return _call_singa_func(singa.GE_TT, self.singa_tensor, rhs.singa_tensor)
++            return _call_singa_func(
++                singa.GE_TT,
++                self.singa_tensor,
++                rhs.singa_tensor)
+         else:
+             return _call_singa_func(singa.GE_Tf, self.singa_tensor, rhs)
  
  
  ''' python functions for global functions in Tensor.h

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5db7eb61/test/CMakeLists.txt
----------------------------------------------------------------------
diff --cc test/CMakeLists.txt
index 6e7dd84,6fc4d77..6c21034
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@@ -22,9 -22,5 +22,8 @@@ ADD_EXECUTABLE(test_singa "gtest/gtest_
  ADD_DEPENDENCIES(test_singa singa_core singa_utils)
  #MESSAGE(STATUS "link libs" ${singa_linker_libs})
  TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model
 -    singa_io singa_proto protobuf ${SINGA_LINKER_LIBS})
 -SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")
 +    singa_io proto protobuf ${SINGA_LINKER_LIBS})
 +IF(UNIX AND (NOT APPLE))
 +    LIST(APPEND LINK_FLAGS "-pthread")
 +ENDIF()
 +SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
- 


[04/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.
SINGA-237 New documentation files for SINGA v1.0

Added readme file for the cifar-10 examples.
Updated the uniform and gaussian methods in initializer.py to include the
fan_in and fan_out arguments.
Reformat some python files.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d3a57cfc
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d3a57cfc
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d3a57cfc

Branch: refs/heads/dev
Commit: d3a57cfc2b71abadf992e9f0900a4051da8e4232
Parents: 8cd5530
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Sun Aug 14 21:41:16 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Sun Aug 14 21:41:16 2016 +0800

----------------------------------------------------------------------
 doc/docs/examples.rst           |   6 --
 doc/docs/index.rst              |   2 +-
 doc/docs/initializer.rst        |   2 +-
 examples/char-rnn/README.md     |   2 +-
 examples/char-rnn/train.py      | 103 +++++++++++++++++++++--------------
 examples/cifar10/alexnet.py     |  48 +++++++++++++---
 examples/cifar10/predict.py     |  10 ++--
 examples/cifar10/vgg.py         |  12 ++--
 examples/index.rst              |   4 ++
 src/python/singa/initializer.py |  85 ++++++++++++++---------------
 src/python/singa/optimizer.py   |   4 +-
 11 files changed, 157 insertions(+), 121 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
deleted file mode 100644
index b0b2af8..0000000
--- a/doc/docs/examples.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Examples
-========
-
-.. toctree::
-
-   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index 2294054..11f0ebb 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -12,4 +12,4 @@ English
    loss
    metric
    optimizer
-   examples
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/doc/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/initializer.rst b/doc/docs/initializer.rst
index a190702..f334497 100644
--- a/doc/docs/initializer.rst
+++ b/doc/docs/initializer.rst
@@ -5,7 +5,7 @@ Python API
 ----------
 
 .. automodule:: singa.initializer
-   :members:
+   :members: uniform, gaussian
    :member-order: bysource
 
 CPP API

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/char-rnn/README.md
----------------------------------------------------------------------
diff --git a/examples/char-rnn/README.md b/examples/char-rnn/README.md
index d4cfa30..f6e5edc 100644
--- a/examples/char-rnn/README.md
+++ b/examples/char-rnn/README.md
@@ -1,4 +1,4 @@
-# Train Char-RNN using SINGA
+# Train Char-RNN over plain text
 
 Recurrent neural networks (RNN) are widely used for modelling sequential data,
 e.g., natural language sentences. This example describes how to implement a RNN

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index fb5e71f..1273a57 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -19,8 +19,6 @@ The model is created following https://github.com/karpathy/char-rnn
 The train file could be any text file,
 e.g., http://cs.stanford.edu/people/karpathy/char-rnn/
 '''
-import sys
-import os
 import cPickle as pickle
 import numpy as np
 import argparse
@@ -32,12 +30,12 @@ from singa import device
 from singa import tensor
 from singa import optimizer
 from singa import initializer
-from singa.proto import core_pb2
 from singa.proto import model_pb2
 from singa import utils
 
 
 class Data(object):
+
     def __init__(self, fpath, batch_size=32, seq_length=100, train_ratio=0.8):
         '''Data object for loading a plain text file.
 
@@ -49,8 +47,8 @@ class Data(object):
         self.raw_data = open(fpath, 'r').read()  # read text file
         chars = list(set(self.raw_data))
         self.vocab_size = len(chars)
-        self.char_to_idx = {ch:i for i, ch in enumerate(chars)}
-        self.idx_to_char = {i:ch for i, ch in enumerate(chars)}
+        self.char_to_idx = {ch: i for i, ch in enumerate(chars)}
+        self.idx_to_char = {i: ch for i, ch in enumerate(chars)}
         data = [self.char_to_idx[c] for c in self.raw_data]
         # seq_length + 1 for the data + label
         nsamples = len(data) / (1 + seq_length)
@@ -69,10 +67,10 @@ class Data(object):
 
 def numpy2tensors(npx, npy, dev):
     '''batch, seq, dim -- > seq, batch, dim'''
-    tmpx=np.swapaxes(npx, 0, 1)
-    tmpy=np.swapaxes(npy, 0, 1)
-    inputs=[]
-    labels=[]
+    tmpx = np.swapaxes(npx, 0, 1)
+    tmpy = np.swapaxes(npy, 0, 1)
+    inputs = []
+    labels = []
     for t in range(tmpx.shape[0]):
         x = tensor.from_numpy(tmpx[t])
         y = tensor.from_numpy(tmpy[t])
@@ -99,25 +97,36 @@ def get_lr(epoch):
     return 0.001 / float(1 << (epoch / 50))
 
 
-def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
-        num_stacks=1, lr=0.001, dropout = 0.5, model_path='model.bin'):
+def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
+          num_stacks=1, lr=0.001, dropout=0.5, model_path='model.bin'):
     # SGD with L2 gradient normalization
     opt = optimizer.SGD(constraint=optimizer.L2Constraint(5))
     cuda = device.create_cuda_gpu()
-    rnn = layer.LSTM(name='lstm', hidden_size=hidden_size, num_stacks=num_stacks,
-            dropout=dropout, input_sample_shape=(data.vocab_size,))
+    rnn = layer.LSTM(
+        name='lstm',
+        hidden_size=hidden_size,
+        num_stacks=num_stacks,
+        dropout=dropout,
+        input_sample_shape=(
+            data.vocab_size,
+        ))
     rnn.to_device(cuda)
     print 'created rnn'
     rnn_w = rnn.param_values()[0]
-    initializer.uniform(rnn_w, -0.08, 0.08)  # init all rnn parameters
+    rnn_w.uniform(-0.08, 0.08)  # init all rnn parameters
     print 'rnn weight l1 = %f' % (rnn_w.l1())
-    dense = layer.Dense('dense', data.vocab_size, input_sample_shape=(hidden_size,))
+    dense = layer.Dense(
+        'dense',
+        data.vocab_size,
+        input_sample_shape=(
+            hidden_size,
+        ))
     dense.to_device(cuda)
     dense_w = dense.param_values()[0]
     dense_b = dense.param_values()[1]
     print 'dense w ', dense_w.shape
     print 'dense b ', dense_b.shape
-    initializer.xavier(dense_w) # init weight matrix using Xavier
+    initializer.uniform(dense_w, dense_w.shape[0], dense_w.shape[1])
     print 'dense weight l1 = %f' % (dense_w.l1())
     dense_b.set_value(0.0)
     print 'dense b l1 = %f' % (dense_b.l1())
@@ -125,18 +134,18 @@ def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
     g_dense_w = tensor.Tensor(dense_w.shape, cuda)
     g_dense_b = tensor.Tensor(dense_b.shape, cuda)
 
-    lossfun = loss.SoftmaxCrossEntropy();
+    lossfun = loss.SoftmaxCrossEntropy()
     for epoch in range(max_epoch):
         train_loss = 0
         for b in range(data.num_train_batch):
             batch = data.train_dat[b * batch_size: (b + 1) * batch_size]
             inputs, labels = convert(batch, batch_size, seq_length,
-                    data.vocab_size, cuda)
+                                     data.vocab_size, cuda)
             inputs.append(tensor.Tensor())
             inputs.append(tensor.Tensor())
 
             outputs = rnn.forward(model_pb2.kTrain, inputs)[0:-2]
-            grads=[]
+            grads = []
             batch_loss = 0
             g_dense_w.set_value(0.0)
             g_dense_b.set_value(0.0)
@@ -149,52 +158,62 @@ def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
                 grads.append(grad)
                 g_dense_w += gwb[0]
                 g_dense_b += gwb[1]
-                #print output.l1(), act.l1()
-            utils.update_progress(b * 1.0 / data.num_train_batch,
-                    'training loss = %f' % (batch_loss / seq_length))
+                # print output.l1(), act.l1()
+            utils.update_progress(
+                b * 1.0 / data.num_train_batch, 'training loss = %f' %
+                (batch_loss / seq_length))
             train_loss += batch_loss
 
             grads.append(tensor.Tensor())
             grads.append(tensor.Tensor())
-            g_rnn_w=rnn.backward(model_pb2.kTrain, grads)[1][0]
+            g_rnn_w = rnn.backward(model_pb2.kTrain, grads)[1][0]
             dense_w, dense_b = dense.param_values()
             opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')
-            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_w, dense_w, 'dense_w')
-            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_b, dense_b, 'dense_b')
-        print '\nEpoch %d, train loss is %f' % (epoch,
-                train_loss / data.num_train_batch / seq_length)
+            opt.apply_with_lr(
+                epoch, get_lr(epoch),
+                g_dense_w, dense_w, 'dense_w')
+            opt.apply_with_lr(
+                epoch, get_lr(epoch),
+                g_dense_b, dense_b, 'dense_b')
+        print '\nEpoch %d, train loss is %f' % \
+            (epoch, train_loss / data.num_train_batch / seq_length)
+
         eval_loss = 0
         for b in range(data.num_test_batch):
             batch = data.val_dat[b * batch_size: (b + 1) * batch_size]
             inputs, labels = convert(batch, batch_size, seq_length,
-                    data.vocab_size, cuda)
+                                     data.vocab_size, cuda)
             inputs.append(tensor.Tensor())
             inputs.append(tensor.Tensor())
             outputs = rnn.forward(model_pb2.kEval, inputs)[0:-2]
             for output, label in zip(outputs, labels):
                 output = dense.forward(model_pb2.kEval, output)
-                eval_loss += lossfun.forward(model_pb2.kEval, output, label).l1()
-        print 'Epoch %d, evaluation loss is %f' % (epoch,
-                eval_loss / data.num_test_batch / seq_length)
+                eval_loss += lossfun.forward(model_pb2.kEval,
+                                             output, label).l1()
+        print 'Epoch %d, evaluation loss is %f' % \
+            (epoch, eval_loss / data.num_test_batch / seq_length)
 
     # checkpoint the file model
     with open(model_path, 'wb') as fd:
         print 'saving model to %s' % model_path
-        d={}
-        for name, w in zip(['rnn_w', 'dense_w', 'dense_b'], [rnn_w, dense_w, dense_b]):
+        d = {}
+        for name, w in zip(
+                ['rnn_w', 'dense_w', 'dense_b'],
+                [rnn_w, dense_w, dense_b]):
             w.to_host()
-            d[name]=tensor.to_numpy(w)
-        d['idx_to_char']=data.idx_to_char
-        d['char_to_idx']=data.char_to_idx
-        d['hidden_size']=hidden_size
-        d['num_stacks']=num_stacks
-        d['dropout']=dropout
+            d[name] = tensor.to_numpy(w)
+        d['idx_to_char'] = data.idx_to_char
+        d['char_to_idx'] = data.char_to_idx
+        d['hidden_size'] = hidden_size
+        d['num_stacks'] = num_stacks
+        d['dropout'] = dropout
 
         pickle.dump(d, fd)
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Train multi-stack LSTM for '\
-            'modeling  character sequence from plain text files')
+    parser = argparse.ArgumentParser(
+        description='Train multi-stack LSTM for '
+        'modeling  character sequence from plain text files')
     parser.add_argument('data', type=str, help='training file')
     parser.add_argument('-b', type=int, default=32, help='batch_size')
     parser.add_argument('-l', type=int, default=64, help='sequence length')
@@ -204,4 +223,4 @@ if __name__ == '__main__':
     args = parser.parse_args()
     data = Data(args.data, batch_size=args.b, seq_length=args.l)
     train(data, args.m,  hidden_size=args.d, num_stacks=args.s,
-            seq_length=args.l, batch_size=args.b)
+          seq_length=args.l, batch_size=args.b)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index ddad1d5..34da95d 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -20,12 +20,8 @@ Following the same setting for hyper-parameters and data pre-processing, the fin
 validation accuracy would be about 82%.
 '''
 
-import sys
-import os
-
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
-from singa import initializer
 from singa import metric
 from singa import loss
 from singa import net as ffnet
@@ -40,23 +36,57 @@ def create_net(use_cpu=False):
     W1_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01}
     W2_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01, 'decay_mult': 250}
     b_specs = {'init': 'constant', 'value': 0, 'lt_mult': 2}
-    net.add(layer.Conv2D('conv1', 32, 5, 1, W_specs=W0_specs.copy(), b_specs=b_specs.copy(), pad=2, input_sample_shape=(3,32,32,)))
+    net.add(
+        layer.Conv2D(
+            'conv1',
+            32,
+            5,
+            1,
+            W_specs=W0_specs.copy(),
+            b_specs=b_specs.copy(),
+            pad=2,
+            input_sample_shape=(
+                3,
+                32,
+                32,
+            )))
     net.add(layer.MaxPooling2D('pool1', 3, 2, pad=1))
     net.add(layer.Activation('relu1'))
     net.add(layer.LRN(name='lrn1'))
-    net.add(layer.Conv2D('conv2', 32, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
+    net.add(
+        layer.Conv2D(
+            'conv2',
+            32,
+            5,
+            1,
+            W_specs=W1_specs.copy(),
+            b_specs=b_specs.copy(),
+         pad=2))
     net.add(layer.Activation('relu2'))
     net.add(layer.MaxPooling2D('pool2', 3, 2,  pad=1))
     net.add(layer.LRN('lrn2'))
-    net.add(layer.Conv2D('conv3', 64, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
+    net.add(
+        layer.Conv2D(
+            'conv3',
+            64,
+            5,
+            1,
+            W_specs=W1_specs.copy(),
+            b_specs=b_specs.copy(),
+         pad=2))
     net.add(layer.Activation('relu3'))
     net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
     net.add(layer.Flatten('flat'))
-    net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
+    net.add(
+        layer.Dense(
+            'dense',
+            10,
+            W_specs=W2_specs.copy(),
+         b_specs=b_specs.copy()))
     for (p, specs) in zip(net.param_values(), net.param_specs()):
         filler = specs.filler
         if filler.type == 'gaussian':
-            initializer.gaussian(p, filler.mean, filler.std)
+            p.gaussian(filler.mean, filler.std)
         else:
             p.set_value(0)
         print specs.name, filler.type, p.l1()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/cifar10/predict.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/predict.py b/examples/cifar10/predict.py
index 8a9ea4e..307a610 100644
--- a/examples/cifar10/predict.py
+++ b/examples/cifar10/predict.py
@@ -16,28 +16,26 @@
 # =============================================================================
 import cPickle as pickle
 import numpy as np
-import sys
-import os
 
-#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
 from singa import device
 from singa import tensor
 import net as ffnet
 
 
-def predict(net, images, cuda, topk=5):
+def predict(net, images, dev, topk=5):
     '''Predict the label of each image.
 
     Args:
         net, a pretrained neural net
         images, a batch of images [batch_size, 3, 32, 32], which have been
             pre-processed
-        cuda, the cuda device
+        dev, the training device
         topk, return the topk labels for each image.
     '''
     x = tensor.from_numpy(images.astype(np.float32))
-    x.to_device(cuda)
+    x.to_device(dev)
     y = net.predict(x)
     y.to_host()
     y = tensor.to_numpy(y)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index 327592f..29a4b40 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -20,11 +20,7 @@ The performance could be improved by tuning some hyper-parameters, including
 learning rate, weight decay, max_epoch, parameter initialization, etc.
 """
 
-import sys
-import os
-import math
-
-#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
 from singa import layer
 from singa import initializer
@@ -86,11 +82,11 @@ def create_net(use_cpu=False):
             elif 'var' in name:
                 p.set_value(1.0)
             elif 'gamma' in name:
-                initializer.uniform(p, 0, 1)
+                p.uniform(0, 1)
             elif 'conv' in name:
-                initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
+                initializer.gaussian(p, 0, 3 * 3 * p.shape[0])
             else:
-                initializer.gaussian(p, 0, 0.02)
+                p.gaussian(0, 0.02)
         else:
             p.set_value(0)
         print name, p.l1()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
index d6faf5d..4bb5b49 100644
--- a/examples/index.rst
+++ b/examples/index.rst
@@ -1,5 +1,9 @@
+Examples
+========
+
 .. toctree::
 
+   cifar10/README
    char-rnn/README
    imagenet/README
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/src/python/singa/initializer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py
index 277fd2f..fb99663 100644
--- a/src/python/singa/initializer.py
+++ b/src/python/singa/initializer.py
@@ -23,77 +23,68 @@ Example usages::
     from singa import initializer
 
     x = tensor.Tensor((3, 5))
-    initializer.xavier(x)
+    initializer.uniform(x, 3, 5) # use both fan_in and fan_out
+    initializer.uniform(x, 3, 0)  # use only fan_in
 '''
 
 import math
 
 
-'''
-TODO(wangwei) update the uniform and gaussian initializers
-
 def uniform(t, fan_in=0, fan_out=0):
-    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
-    fan_out = nb_channel * kh * kw
-    for dense layer weight, fan_in = input_feature_length,
-    fan_out = output_feature_length
-    # Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
+    '''Initialize the values of the input tensor following a uniform
+    distribution with specific bounds.
+
+    Args:
+        fan_in(int): for the weight Tensor of a convolution layer,
+            fan_in = nb_channel * kh * kw; for dense layer,
+            fan_in = input_feature_length
+        fan_out(int): for the convolution layer weight Tensor,
+            fan_out = nb_filter * kh * kw; for the weight Tensor of a dense
+            layer, fan_out = output_feature_length
+
+    Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
     training deep feedforward neuralnetworks.
 
-    assert fan_in >0 or fan_out > 0, \
+    '''
+    assert fan_in > 0 or fan_out > 0, \
         'fan_in and fan_out cannot be 0 at the same time'
-    avg = 1
+    avg = 2
     if fan_in * fan_out == 0:
-      avg = 2
-    x = math.sqrt(3.0f * avg / (fan_in + fan_out))
+        avg = 1
+    x = math.sqrt(3.0 * avg / (fan_in + fan_out))
     t.uniform(-x, x)
 
 
 def gaussian(t, fan_in=0, fan_out=0):
-    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
-    fan_out = nb_channel * kh * kw
-    for dense layer weight, fan_in = input_feature_length,
-    fan_out = output_feature_length
+    '''Initialize the values of the input tensor following a Gaussian
+    distribution with specific std.
+
+    Args:
+        fan_in(int): for the weight Tensor of a convolution layer,
+            fan_in = nb_channel * kh * kw; for dense layer,
+            fan_in = input_feature_length
+        fan_out(int): for the convolution layer weight Tensor,
+            fan_out = nb_filter * kh * kw; for the weight Tensor of a dense
+            layer, fan_out = output_feature_length
 
     Ref Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: Delving Deep into
     Rectifiers: Surpassing Human-Level Performance on ImageNet Classification
-
-    assert fan_in >0 or fan_out > 0, \
+    '''
+    assert fan_in > 0 or fan_out > 0, \
         'fan_in and fan_out cannot be 0 at the same time'
-    avg = 1
+    avg = 2
     if fan_in * fan_out == 0:
-      avg = 2
-    std = math.sqrt(2.0f * avg / (fan_in + fan_out))
+        avg = 1
+    std = math.sqrt(2.0 * avg / (fan_in + fan_out))
     t.gaussian(0, std)
-'''
-
-
-def uniform(t, low=0, high=1):
-    '''Initialize the parameter values following an Uniform distribution.
-
-    Args:
-        t (Tensor): the parater tensor
-        low (float): lower bound
-        high (float): higher bound
-    '''
-    t.uniform(low, high)
-
-
-def gaussian(t, mean=0, std=0.01):
-    '''Initialize the parameter values following an Gaussian distribution.
-
-    Args:
-        t (Tensor): the parater tensor
-        mean (float): mean of the distribution
-        std (float): standard variance
-    '''
-    t.gaussian(mean, std)
 
 
 def xavier(t):
     '''Initialize the matrix parameter follow a Uniform distribution from
     [-sqrt(6/(fan_in + fan_out)), sqrt(6/(fan_in + fan_out))].
 
+    Deprecated. Please use uniform()
+
     Args:
         t (Tensor): the parater tensor
     '''
@@ -106,6 +97,8 @@ def glorot(t):
     '''Initialize the matrix parameter follow a Gaussian distribution with
     mean = 0 and std = sqrt(2.0 / (nb_row + nb_col))
 
+    Deprecated. Please use gaussian()
+
     Args:
         t (Tensor): the parater tensor
     '''
@@ -118,6 +111,8 @@ def msra(t):
     '''Initialize the matrix parameter follow a Guassian distribution with
     mean = 0, std = math.sqrt(2.0 / nb_row).
 
+    Deprecated. Please use gaussian()
+
     Ref [He, Zhang, Ren and Sun 2015]: Specifically accounts for ReLU
     nonlinearities.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d3a57cfc/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 5d38997..7c8cc39 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -44,8 +44,8 @@ class Optimizer(object):
 
     1. construct the optimizer
     2. (optional) register each parameter with its specs.
-    3. use the optimizer to update parameter values given parameter
-        gradients and other optional info
+    3. use the optimizer to update parameter values given parameter gradients
+       and other optional info
 
     The subclasses should override the apply_with_lr function to do the real
     parameter udpate.


[12/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.
SINGA-237 New documentation files for SINGA v1.0

Update the layer identifier to be consistent with the documentation.
In particualr, the identifier prefix should be one of
['cudnn','singacpp', 'singacuda', 'singacl']. The complete identifier
would be <prefix>_xxxx, e.g., cudnn_convolution and
singacpp_convolution. The identifier should not have upper case letters.
If the implmentation is transparent to cpp/cuda/opencl, then register all
possible identifiers. For instance, Dropout is registered three times,
RegisterLayerClass("singacpp_dropout", Dropout)
RegisterLayerClass("singacl_dropout", Dropout)
RegisterLayerClass("singacuda_dropout", Dropout)


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/9c71bd67
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/9c71bd67
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/9c71bd67

Branch: refs/heads/dev
Commit: 9c71bd6745450019f9d4cbb748949142cf687616
Parents: cdd718e
Author: Wei Wang <wa...@gmail.com>
Authored: Sun Aug 14 23:41:03 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 19:55:17 2016 +0800

----------------------------------------------------------------------
 examples/char-rnn/train.py       |  2 +-
 examples/cifar10/alexnet.cc      |  2 +-
 examples/cifar10/alexnet.py      |  2 +-
 examples/cifar10/vgg-parallel.cc |  1 +
 examples/cifar10/vgg.py          |  4 ++--
 examples/imagenet/alexnet.cc     |  1 +
 examples/mnist/train.py          |  8 ++++----
 include/singa/core/device.h      | 15 +++++++-------
 include/singa/model/layer.h      | 13 +++++++++++-
 src/core/device/cpp_cpu.cc       |  2 +-
 src/model/layer/activation.cc    | 10 +++++++++
 src/model/layer/batchnorm.cc     |  3 +++
 src/model/layer/convolution.cc   |  1 +
 src/model/layer/dense.cc         |  3 +++
 src/model/layer/dropout.cc       |  3 +++
 src/model/layer/flatten.cc       |  3 +++
 src/model/layer/lrn.cc           |  3 +++
 src/model/layer/pooling.cc       |  1 +
 src/model/layer/prelu.cc         |  3 +++
 src/model/layer/rnn.cc           |  3 +++
 src/model/layer/softmax.cc       |  3 +++
 src/python/singa/device.py       |  4 ++--
 src/python/singa/layer.py        | 28 ++++++++++++++++----------
 src/python/singa/optimizer.py    | 38 ++++++++++++++++++-----------------
 src/python/singa/tensor.py       | 13 ++++++------
 src/python/swig/core_device.i    |  4 ++--
 test/python/test_layer.py        | 11 +++++-----
 test/singa/test_cpp_cpu.cc       |  2 +-
 test/singa/test_layer.cc         |  2 +-
 29 files changed, 123 insertions(+), 65 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index 1273a57..83771c2 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -128,7 +128,7 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
     print 'dense b ', dense_b.shape
     initializer.uniform(dense_w, dense_w.shape[0], dense_w.shape[1])
     print 'dense weight l1 = %f' % (dense_w.l1())
-    dense_b.set_value(0.0)
+    dense_b.set_value(0)
     print 'dense b l1 = %f' % (dense_b.l1())
 
     g_dense_w = tensor.Tensor(dense_w.shape, cuda)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.cc b/examples/cifar10/alexnet.cc
index e1363e4..fa953f8 100644
--- a/examples/cifar10/alexnet.cc
+++ b/examples/cifar10/alexnet.cc
@@ -27,7 +27,7 @@
 #include "singa/utils/channel.h"
 #include "singa/utils/string.h"
 namespace singa {
-
+// currently supports 'cudnn' and 'singacpp'
 const std::string engine = "cudnn";
 LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride,
                       int pad, float std) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index 34da95d..17b6a89 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -29,7 +29,7 @@ from singa import net as ffnet
 
 def create_net(use_cpu=False):
     if use_cpu:
-        layer.engine = 'singa'
+        layer.engine = 'singacpp'
 
     net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
     W0_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.0001}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/vgg-parallel.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg-parallel.cc b/examples/cifar10/vgg-parallel.cc
index 149cb21..90e9fce 100644
--- a/examples/cifar10/vgg-parallel.cc
+++ b/examples/cifar10/vgg-parallel.cc
@@ -34,6 +34,7 @@
 
 namespace singa {
 
+// currently supports 'cudnn' and 'singacpp'
 const std::string engine = "cudnn";
 const float default_wd  = 0.0005f;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index e8e3602..89c6fe8 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -38,7 +38,7 @@ def ConvBnReLU(net, name, nb_filers, sample_shape=None):
 
 def create_net(use_cpu=False):
     if use_cpu:
-        layer.engine = 'singa'
+        layer.engine = 'singacpp'
     net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
     ConvBnReLU(net, 'conv1_1', 64, (3, 32, 32))
     net.add(layer.Dropout('drop1', 0.3))
@@ -84,7 +84,7 @@ def create_net(use_cpu=False):
             initializer.uniform(p, 0, 1)
         elif len(p.shape) > 1:
             if 'conv' in name:
-                p.gaussian(0, 0, 3 * 3 * p.shape[0])
+                initializer.gaussian(p, 0, 3 * 3 * p.shape[0])
             else:
                 p.gaussian(0, 0.02)
         else:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/imagenet/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/imagenet/alexnet.cc b/examples/imagenet/alexnet.cc
index 26b2d96..4ac1130 100644
--- a/examples/imagenet/alexnet.cc
+++ b/examples/imagenet/alexnet.cc
@@ -33,6 +33,7 @@
 #include "singa/utils/timer.h"
 namespace singa {
 
+// currently supports 'cudnn' and 'singacpp'
 const std::string engine = "cudnn";
 LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride,
                       int pad, float std, float bias = .0f) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/examples/mnist/train.py
----------------------------------------------------------------------
diff --git a/examples/mnist/train.py b/examples/mnist/train.py
index 43b8e26..55c7cbb 100644
--- a/examples/mnist/train.py
+++ b/examples/mnist/train.py
@@ -85,7 +85,7 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
             tposhidsample = tensor.gt(tposhidprob, tposhidrandom)
 
             # negative phase
-            tnegdata = tensor.mult(tposhidsample, tweight.transpose())
+            tnegdata = tensor.mult(tposhidsample, tweight.T())
             tnegdata.add_row(tvbias)
             tnegdata = tensor.sigmoid(tnegdata)
 
@@ -95,8 +95,8 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
             error = tensor.sum(tensor.square((tdata - tnegdata)))
             trainerrorsum = error + trainerrorsum
 
-            tgweight = tensor.mult(tnegdata.transpose(), tneghidprob) -\
-                    tensor.mult(tdata.transpose(), tposhidprob)
+            tgweight = tensor.mult(tnegdata.T(), tneghidprob) -\
+                    tensor.mult(tdata.T(), tposhidprob)
             tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0)
             tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0)
 
@@ -115,7 +115,7 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
         initializer.uniform(tvalidposhidrandom, 0.0, 1.0)
         tvalidposhidsample = tensor.gt(tvalidposhidprob, tvalidposhidrandom)
 
-        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.transpose())
+        tvalidnegdata = tensor.mult(tvalidposhidsample, tweight.T())
         tvalidnegdata.add_row(tvbias)
         tvalidnegdata = tensor.sigmoid(tvalidnegdata)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index a564524..810d41f 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -152,6 +152,7 @@ class CppCPU : public Device {
 
   std::shared_ptr<Device> host() const override { return defaultDevice;}
   void SetRandSeed(unsigned seed) override;
+
  protected:
   void DoExec(function<void(Context*)>&& fn, int executor) override;
 
@@ -303,10 +304,15 @@ private:
 /// If CUDA or OPENCL are not enabled, then the respective related methods should
 /// return something that indicates their absence (for example, 0 devices);
 /// however they should always be available regardless of compile-time switches.
-#ifdef USE_CUDA
 class Platform {
 public:
 
+  /// Return the defualt host device
+  static std::shared_ptr<Device> GetDefaultDevice() {
+    return defaultDevice;
+  }
+
+#ifdef USE_CUDA
   /// Return the number of total available GPUs
   static int GetNumGPUs();
 
@@ -322,11 +328,6 @@ public:
   /// Return a string containing all hardware info, e.g., version, memory size.
   static const std::string DeviceQuery(int id, bool verbose = false);
 
-  /// Return the defualt host device
-  static std::shared_ptr<Device> GetDefaultDevice() {
-    return defaultDevice;
-  }
-
   /// Create a set of CudaGPU Device using 'num_devices' free GPUs.
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUs(const size_t num_devices, size_t init_size = 0);
@@ -334,6 +335,7 @@ public:
   /// Create a set of CudaGPU Device using given GPU IDs.
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
+#endif // USE_CUDA
 
   /// Create a \p num_devices set of valid OpenCL devices, regardless of
   /// platforms.  If there are fewer valid devices than requested, then this
@@ -373,7 +375,6 @@ private:
 #endif  // USE_OPENCL
 };
 
-#endif // USE_CUDA
 
 }  // namespace singa
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/include/singa/model/layer.h
----------------------------------------------------------------------
diff --git a/include/singa/model/layer.h b/include/singa/model/layer.h
index 58f0f4b..e67fcc5 100644
--- a/include/singa/model/layer.h
+++ b/include/singa/model/layer.h
@@ -222,6 +222,17 @@ class Layer {
   vector<ParamSpec> param_specs_;
 };
 
+/// Name should be formated as cudnn_xxx, singacpp_xxx, singacuda_xxx,
+/// singacl_xxx, where xxx is the real layer type, e.g., convolution, relu, etc.
+/// xxx should only have lower case letters.
+/// if the implmentation is transparent to cpp/cuda/opencl, then register all
+/// possible identifiers. For instance, Dropout is registered three times,
+/// RegisterLayerClass("singacpp_dropout", Dropout)
+/// RegisterLayerClass("singacl_dropout", Dropout)
+/// RegisterLayerClass("singacuda_dropout", Dropout)
+/// to be compatible with previous commits, the following identifier is
+/// registered. Better avoid using it, as it would be deprecated.
+/// RegisterLayerClass("singa_dropout", Dropout)
 #define RegisterLayerClass(Name, SubLayer) \
   static Registra<Layer, SubLayer> Name##SubLayer(#Name);
 
@@ -234,7 +245,7 @@ inline const std::vector<std::string> GetRegisteredLayers() {
   vector<std::string> ret;
   for (const string type : Factory<Layer>::GetIDs()) {
     auto layer = CreateLayer(type);
-    ret.push_back("Register type: " + type + " --> " + layer->layer_type());
+    ret.push_back("Register type: " + type);
   }
   return ret;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/core/device/cpp_cpu.cc
----------------------------------------------------------------------
diff --git a/src/core/device/cpp_cpu.cc b/src/core/device/cpp_cpu.cc
index 2b3e63b..04209ab 100644
--- a/src/core/device/cpp_cpu.cc
+++ b/src/core/device/cpp_cpu.cc
@@ -22,7 +22,7 @@ namespace singa {
 
 std::shared_ptr<Device> defaultDevice=std::make_shared<CppCPU>();
 
-CppCPU::CppCPU() : Device(0, 1) {
+CppCPU::CppCPU() : Device(-1, 1) {
   lang_ = kCpp;
   //host_ = nullptr;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/activation.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/activation.cc b/src/model/layer/activation.cc
index aa40edb..eb90d87 100644
--- a/src/model/layer/activation.cc
+++ b/src/model/layer/activation.cc
@@ -25,6 +25,16 @@ RegisterLayerClass(singa_relu, Activation);
 RegisterLayerClass(singa_sigmoid, Activation);
 RegisterLayerClass(singa_tanh, Activation);
 
+RegisterLayerClass(singacpp_relu, Activation);
+RegisterLayerClass(singacuda_relu, Activation);
+RegisterLayerClass(singacl_relu, Activation);
+RegisterLayerClass(singacpp_sigmoid, Activation);
+RegisterLayerClass(singacuda_sigmoid, Activation);
+RegisterLayerClass(singacl_sigmoid, Activation);
+RegisterLayerClass(singacpp_tanh, Activation);
+RegisterLayerClass(singacuda_tanh, Activation);
+RegisterLayerClass(singacl_tanh, Activation);
+
 void Activation::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   auto pos = conf.type().find_first_of('_');

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/batchnorm.cc b/src/model/layer/batchnorm.cc
index f348661..b345c6b 100644
--- a/src/model/layer/batchnorm.cc
+++ b/src/model/layer/batchnorm.cc
@@ -22,6 +22,9 @@
 
 namespace singa {
 RegisterLayerClass(singa_batchnorm, BatchNorm);
+RegisterLayerClass(singacpp_batchnorm, BatchNorm);
+RegisterLayerClass(singacuda_batchnorm, BatchNorm);
+RegisterLayerClass(singacl_batchnorm, BatchNorm);
 void BatchNorm::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/convolution.cc b/src/model/layer/convolution.cc
index 4fc209f..0d1751d 100644
--- a/src/model/layer/convolution.cc
+++ b/src/model/layer/convolution.cc
@@ -24,6 +24,7 @@ namespace singa {
 using std::vector;
 
 RegisterLayerClass(singa_convolution, Convolution);
+RegisterLayerClass(singacpp_convolution, Convolution);
 void Convolution::Setup(const Shape &in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   ConvolutionConf conv_conf = conf.convolution_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
index 1a2d16e..7470154 100644
--- a/src/model/layer/dense.cc
+++ b/src/model/layer/dense.cc
@@ -24,6 +24,9 @@ namespace singa {
 using std::vector;
 
 RegisterLayerClass(singa_dense, Dense);
+RegisterLayerClass(singacpp_dense, Dense);
+RegisterLayerClass(singacuda_dense, Dense);
+RegisterLayerClass(singacl_dense, Dense);
 Dense::~Dense() {
   // delete weight_;
   // delete bias_;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/dropout.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dropout.cc b/src/model/layer/dropout.cc
index 35801b4..d7397a1 100644
--- a/src/model/layer/dropout.cc
+++ b/src/model/layer/dropout.cc
@@ -21,6 +21,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_dropout, Dropout);
+RegisterLayerClass(singacpp_dropout, Dropout);
+RegisterLayerClass(singacuda_dropout, Dropout);
+RegisterLayerClass(singacl_dropout, Dropout);
 void Dropout::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   dropout_ratio_ = conf.dropout_conf().dropout_ratio();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/flatten.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/flatten.cc b/src/model/layer/flatten.cc
index d89361e..561c310 100644
--- a/src/model/layer/flatten.cc
+++ b/src/model/layer/flatten.cc
@@ -21,6 +21,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_flatten, Flatten);
+RegisterLayerClass(singacpp_flatten, Flatten);
+RegisterLayerClass(singacuda_flatten, Flatten);
+RegisterLayerClass(singacl_flatten, Flatten);
 void Flatten::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   axis_ = conf.flatten_conf().axis();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/lrn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/lrn.cc b/src/model/layer/lrn.cc
index 6b5a618..4fdb5c9 100644
--- a/src/model/layer/lrn.cc
+++ b/src/model/layer/lrn.cc
@@ -23,6 +23,9 @@
 
 namespace singa {
 RegisterLayerClass(singa_lrn, LRN);
+RegisterLayerClass(singacpp_lrn, LRN);
+RegisterLayerClass(singacuda_lrn, LRN);
+RegisterLayerClass(singacl_lrn, LRN);
 void LRN::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/pooling.cc b/src/model/layer/pooling.cc
index 5e7ba1d..23969da 100644
--- a/src/model/layer/pooling.cc
+++ b/src/model/layer/pooling.cc
@@ -21,6 +21,7 @@
 namespace singa {
 
 RegisterLayerClass(singa_pooling, Pooling);
+RegisterLayerClass(singacpp_pooling, Pooling);
 void Pooling::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   PoolingConf pool_conf = conf.pooling_conf();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/prelu.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/prelu.cc b/src/model/layer/prelu.cc
index a20972c..e567172 100644
--- a/src/model/layer/prelu.cc
+++ b/src/model/layer/prelu.cc
@@ -21,6 +21,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_prelu, PReLU);
+RegisterLayerClass(singacpp_prelu, PReLU);
+RegisterLayerClass(singacuda_prelu, PReLU);
+RegisterLayerClass(singacl_prelu, PReLU);
 void PReLU::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
   out_sample_shape_ = in_sample;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/rnn.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/rnn.cc b/src/model/layer/rnn.cc
index 524b462..b811f9d 100644
--- a/src/model/layer/rnn.cc
+++ b/src/model/layer/rnn.cc
@@ -23,6 +23,9 @@
 
 namespace singa {
 RegisterLayerClass(singa_rnn, RNN);
+RegisterLayerClass(singacpp_rnn, RNN);
+RegisterLayerClass(singacuda_rnn, RNN);
+RegisterLayerClass(singacl_rnn, RNN);
 void RNN::Setup(const Shape& in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/model/layer/softmax.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/softmax.cc b/src/model/layer/softmax.cc
index 6a49131..2cbd264 100644
--- a/src/model/layer/softmax.cc
+++ b/src/model/layer/softmax.cc
@@ -20,6 +20,9 @@
 namespace singa {
 
 RegisterLayerClass(singa_softmax, Softmax);
+RegisterLayerClass(singacpp_softmax, Softmax);
+RegisterLayerClass(singacuda_softmax, Softmax);
+RegisterLayerClass(singacl_softmax, Softmax);
 void Softmax::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);
   CHECK_EQ(in_sample.size(), 1u);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
index eff6783..897fdf5 100644
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@ -113,7 +113,7 @@ def create_cuda_gpu_on(device_id):
     devices = create_cuda_gpus_on([device_id])
     return devices[0]
 
-
+default_device = singa.Platform.GetDefaultDevice()
 def get_default_device():
     '''Get the default host device which is a CppCPU device'''
-    return singa.Platform.GetDefaultDevice()
+    return default_device

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index 0759716..b0fdb5e 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -152,9 +152,9 @@ class Layer(object):
             for t in x:
                 x.append(t.singa_tensor)
         else:
-            assert isinstance(input, tensor.Tensor), \
+            assert isinstance(x, tensor.Tensor), \
                 'input must be a Tensor or a list of Tensor'
-            xs = x
+            xs = x.singa_tensor
         y = self.layer.Forward(flag, xs)
         if type(y) == list:
             return tensor.from_raw_tensors(y)
@@ -266,7 +266,7 @@ class Conv2D(Layer):
         self.conf.param.extend([bspecs])
         self.param_specs.append(bspecs)
 
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp'])
         self.layer = _create_layer(engine, 'Convolution')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -322,7 +322,7 @@ class Pooling2D(Layer):
         conf = self.conf.pooling_conf
         conf = _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad)
         conf.pool = mode
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp'])
         self.layer = _create_layer(engine, 'Pooling')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -439,7 +439,7 @@ class BatchNormalization(Layer):
         self.param_specs.append(_construct_param_specs_from_dict(beta_specs))
         self.param_specs.append(_construct_param_specs_from_dict(mean_specs))
         self.param_specs.append(_construct_param_specs_from_dict(var_specs))
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.layer = _create_layer(engine, 'BatchNorm')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -466,7 +466,7 @@ class LRN(Layer):
         # TODO(wangwei) enable mode = 'within_channel'
         assert mode == 'cross_channel', 'only support mode="across_channel"'
         conf.norm_region = model_pb2.LRNConf.ACROSS_CHANNELS
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.layer = _create_layer(engine, 'LRN')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -513,7 +513,10 @@ class Dense(Layer):
         self.conf.param.extend([_construct_param_specs_from_dict(b_specs)])
         self.param_specs.append(_construct_param_specs_from_dict(b_specs))
         # dense layer is transparent to engine.
-        self.layer = _create_layer('singa', 'Dense')
+        if engine == 'cudnn':
+            self.layer = _create_layer('singacuda', 'Dense')
+        else:
+            self.layer = _create_layer(engine, 'Dense')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
 
@@ -533,7 +536,7 @@ class Dropout(Layer):
         # 'cudnn' works for v>=5.0
         #  if engine.lower() == 'cudnn':
         #      engine = 'cuda'
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.layer = _create_layer(engine, 'Dropout')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -549,8 +552,8 @@ class Activation(Layer):
     """
     def __init__(self, name, mode='relu', input_sample_shape=None):
         super(Activation, self).__init__(name)
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
         self.conf.type = (engine + '_' + mode).lower()
-        _check_engine(engine, ['cudnn', 'singa'])
         self.layer = _create_layer(engine, mode)
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -568,7 +571,7 @@ class Softmax(Layer):
         super(Softmax, self).__init__(name)
         # conf = self.conf.softmax_conf
         # conf.axis = axis
-        _check_engine(engine, ['cudnn', 'singa'])
+        _check_engine(engine, ['cudnn', 'singacpp', 'singacl', 'singacuda'])
         self.layer = _create_layer(engine, 'Softmax')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -587,7 +590,10 @@ class Flatten(Layer):
         conf = self.conf.flatten_conf
         conf.axis = axis
         # fltten layer is transparent to engine
-        self.layer = _create_layer('singa', 'Flatten')
+        if engine == 'cudnn':
+            self.layer = _create_layer('singacuda', 'Flatten')
+        else:
+            self.layer = _create_layer(engine, 'Flatten')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 338c6b0..86e68af 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -54,7 +54,7 @@ class Optimizer(object):
         lr (float): a constant for the learning rate, mutually exclusive with
             'lr_gen'.
         momentum (float): a constant for the momentum value
-        decay (float): the coefficent for L2 regularizer, which is mutually
+        weight_decay (float): the coefficent for L2 regularizer, which is mutually
             exclusive with 'regularizer'.
         lr_gen (function): a function returns the learning rate given
             the current training step/epoch. It is mutually exclusive with lr.
@@ -67,7 +67,7 @@ class Optimizer(object):
             constraint would be applied inside apply_with_lr(). Users can
             also do regularization outside.
     '''
-    def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+    def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
         if lr is not None:
             assert lr_gen is None, 'Cannot set lr and lr_gen at the same time'
@@ -76,10 +76,10 @@ class Optimizer(object):
                 return lr
         self.lr_gen = lr_gen
         self.momentum = momentum
-        if decay is not None:
+        if weight_decay is not None:
             assert regularizer is None, \
-                'Cannot set decay and regularizer at the same time'
-            regularizer = L2Regularizer(decay)
+                'Cannot set weight_decay and regularizer at the same time'
+            regularizer = L2Regularizer(weight_decay)
         if regularizer is not None:
             if isinstance(regularizer, model_pb2.RegularizerConf):
                 self.regularizer = CppRegularizer(regularizer)
@@ -121,7 +121,7 @@ class Optimizer(object):
         if specs.decay_mult != 1:
             self.decay_multiplier[name] = specs.decay_mult
 
-    def apply_regularizer_constraint(self, value, grad, name=None, epoch=None):
+    def apply_regularizer_constraint(self, epoch, value, grad, name=None):
         '''Apply regularization and constraint if available.
 
         If there are both global regularizer (constraint) and param specific
@@ -137,12 +137,12 @@ class Optimizer(object):
             the updated gradient Tensor
         '''
         if name is not None and name in self.constraints:
-            self.constraints[name].apply(value, grad, epoch)
+            self.constraints[name].apply(epoch, value, grad)
         elif self.constraint is not None:
             self.constraint.apply(epoch, value, grad)
 
         if name is not None and name in self.regularizers:
-            self.regularizers[name].apply(value, grad, epoch)
+            self.regularizers[name].apply(epoch, value, grad)
         elif self.regularizer is not None:
             self.regularizer.apply(epoch, value, grad)
         return grad
@@ -193,12 +193,13 @@ class SGD(Optimizer):
     See the base Optimizer for all arguments.
     '''
 
-    def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+    def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(SGD, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+        super(SGD, self).__init__(lr, momentum, weight_decay, lr_gen, regularizer,
                                   constraint)
         conf = model_pb2.OptimizerConf()
-        conf.momentum = self.momentum
+        if self.momentum is not None:
+            conf.momentum = self.momentum
         conf.type = 'sgd'
         self.opt = singa.CreateOptimizer('SGD')
         self.opt.Setup(conf.SerializeToString())
@@ -215,12 +216,13 @@ class Nesterov(Optimizer):
     See the base Optimizer for all arguments.
     '''
 
-    def __init__(self, lr=None, momentum=0.9, decay=None, lr_gen=None,
+    def __init__(self, lr=None, momentum=0.9, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(Nesterov, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+        super(Nesterov, self).__init__(lr, momentum, weight_decay, lr_gen, regularizer,
                                        constraint)
         conf = model_pb2.OptimizerConf()
-        conf.momentum = momentum
+        if self.momentum is not None:
+            conf.momentum = momentum
         conf.type = 'nesterov'
         self.opt = singa.CreateOptimizer('Nesterov')
         self.opt.Setup(conf.SerializeToString())
@@ -239,9 +241,9 @@ class AdaGrad(Optimizer):
     Args:
         epsilon (float): small number for preventing numeric error.
     '''
-    def __init__(self, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+    def __init__(self, epsilon=1e-8, lr=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+        super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
                                       constraint)
         conf = model_pb2.OptimizerConf()
         conf.delta = epsilon
@@ -265,9 +267,9 @@ class RMSProp(Optimizer):
         epsilon (float): small value for preventing numeric error
     '''
 
-    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, weight_decay=None, lr_gen=None,
                  regularizer=None, constraint=None):
-        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+        super(RMSProp, self).__init__(lr, weight_decay, lr_gen, regularizer,
                                       constraint)
         conf = model_pb2.OptimizerConf()
         conf.rho = rho

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index e2572d3..5086fdc 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -56,7 +56,7 @@ import numpy as np
 from functools import reduce
 from .proto import core_pb2
 from . import singa_wrap as singa
-import device
+import device as pydevice
 
 
 class Tensor(object):
@@ -81,7 +81,8 @@ class Tensor(object):
             assert isinstance(shape, tuple), 'shape should be tuple'
             vs = list(shape)
             if device is None:
-                self.singa_tensor = singa.Tensor(vs, dtype)
+                device = pydevice.get_default_device()
+                self.singa_tensor = singa.Tensor(vs, device, dtype)
             else:
                 self.singa_tensor = singa.Tensor(vs, device, dtype)
             self.shape = shape
@@ -225,12 +226,10 @@ class Tensor(object):
         '''
         return _call_singa_func(self.singa_tensor.T)
 
-    '''
     def copy(self):
-        shallow copy
-            call copy constructor of singa::Tensor
+        '''shallow copy calls copy constructor of singa::Tensor
+        '''
         return _call_singa_func(singa.Tensor, self.singa_tensor)
-    '''
 
     def deepcopy(self):
         '''Same as clone().
@@ -513,7 +512,7 @@ def to_numpy(t):
     Returns:
         a numpy array
     '''
-    assert t.device == device.get_default_device() or t.device is None, \
+    assert (t.device.id() == -1) or (t.device is None), \
         'Please move the tensor onto the default host device'
 
     if t.dtype == core_pb2.kFloat32:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/src/python/swig/core_device.i
----------------------------------------------------------------------
diff --git a/src/python/swig/core_device.i b/src/python/swig/core_device.i
index 21b97b4..b3521be 100644
--- a/src/python/swig/core_device.i
+++ b/src/python/swig/core_device.i
@@ -49,9 +49,9 @@ class Device {
   int id() const;
 };
 
-#if USE_CUDA
 class Platform {
  public:
+#if USE_CUDA
   static int GetNumGPUs();
   static const std::vector<int> GetGPUIDs();
   static const std::pair<size_t, size_t> GetGPUMemSize(const int device);
@@ -61,9 +61,9 @@ class Platform {
   CreateCudaGPUs(const size_t num_devices, size_t init_size = 0);
   static const std::vector<std::shared_ptr<Device>>
   CreateCudaGPUsOn(const std::vector<int> &devices, size_t init_size = 0);
+#endif // USE_CUDA
   static std::shared_ptr<Device> GetDefaultDevice();
 };
 
-#endif // USE_CUDA
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/test/python/test_layer.py
----------------------------------------------------------------------
diff --git a/test/python/test_layer.py b/test/python/test_layer.py
index 7078240..441f352 100644
--- a/test/python/test_layer.py
+++ b/test/python/test_layer.py
@@ -25,6 +25,7 @@ class TestPythonLayer(unittest.TestCase):
                          )
 
     def setUp(self):
+        layer.engine='singacpp'
         self.w = {'init': 'Xavier', 'regularizer': 1e-4}
         self.b = {'init': 'Constant', 'value': 0}
         self.sample_shape = None
@@ -40,8 +41,8 @@ class TestPythonLayer(unittest.TestCase):
         in_sample_shape = (1, 3, 3)
         conv = layer.Conv2D('conv', 1, 3, 2, W_specs=self.w, b_specs=self.b,
                             pad=1, input_sample_shape=in_sample_shape)
-        cuda = device.create_cuda_gpu()
-        conv.to_device(cuda)
+        # cuda = device.create_cuda_gpu()
+        # conv.to_device(cuda)
         params = conv.param_values()
 
         raw_x = np.arange(9, dtype=np.float32) + 1
@@ -51,9 +52,9 @@ class TestPythonLayer(unittest.TestCase):
         params[0].copy_from_numpy(w)
         params[1].set_value(1.0)
 
-        x.to_device(cuda)
+        # x.to_device(cuda)
         y = conv.forward(model_pb2.kTrain, x)
-        y.to_host()
+        # y.to_host()
         npy = tensor.to_numpy(y).flatten()
 
         self.assertAlmostEqual(3.0, npy[0])
@@ -63,7 +64,7 @@ class TestPythonLayer(unittest.TestCase):
 
         dy = np.asarray([0.1, 0.2, 0.3, 0.4], dtype=np.float32).reshape(y.shape)
         grad = tensor.from_numpy(dy)
-        grad.to_device(cuda)
+        # grad.to_device(cuda)
         (dx, [dw, db]) = conv.backward(model_pb2.kTrain, grad)
         dx.to_host()
         dw.to_host()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/test/singa/test_cpp_cpu.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cpp_cpu.cc b/test/singa/test_cpp_cpu.cc
index 35bd108..5f3308a 100644
--- a/test/singa/test_cpp_cpu.cc
+++ b/test/singa/test_cpp_cpu.cc
@@ -27,7 +27,7 @@ using singa::CppCPU;
 using singa::Block;
 TEST(CppCPU, Constructor) {
   CppCPU dev;
-  EXPECT_EQ(0, dev.id());
+  EXPECT_EQ(-1, dev.id());
 }
 
 TEST(CppCPU, MemoryMallocFree) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9c71bd67/test/singa/test_layer.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_layer.cc b/test/singa/test_layer.cc
index aa01746..bb33dba 100644
--- a/test/singa/test_layer.cc
+++ b/test/singa/test_layer.cc
@@ -7,7 +7,7 @@ TEST(Layer, CreateLayer) {
       "convolution", "dense", "dropout", "relu", "batchnorm",
       "flatten",     "lrn",   "pooling", "prelu",      "softmax"};
   for (auto type : types) {
-    auto layer = singa::CreateLayer("singa_" + type);
+    auto layer = singa::CreateLayer("singacpp_" + type);
     // EXPECT_EQ(layer->layer_type(), type);
   }
 }



[17/22] incubator-singa git commit: SINGA-223 Use Sphinx to create the website.

Posted by wa...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/zh/_templates/layout.html
----------------------------------------------------------------------
diff --git a/doc/zh/_templates/layout.html b/doc/zh/_templates/layout.html
new file mode 100755
index 0000000..6b9f2c5
--- /dev/null
+++ b/doc/zh/_templates/layout.html
@@ -0,0 +1,61 @@
+{#
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+#}
+{% extends "!layout.html" %}
+
+{% block extrahead %}
+    <link href="{{ pathto("_static/style.css", True) }}" rel="stylesheet" type="text/css">
+{% endblock %}
+     
+{% block footer %}
+
+<div class="rst-versions shift-up" data-toggle="rst-versions" role="note" aria-label="versions">
+<a href="http://incubator.apache.org/">
+<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">  
+</a>
+ 
+  <span class="rst-current-version" data-toggle="rst-current-version">
+    <span class="fa fa-book"> incubator-singa </span>
+    v: {{ version }}
+    <span class="fa fa-caret-down"></span>
+  </span>
+  <div class="rst-other-versions">
+    <dl>
+      <dt>Languages</dt>
+      <dd><a href="{{pathto(''+ '../index.html' , 1) }}">English</a></dd>
+      <dd><a href="">\u4e2d\u6587</a></dd>	  
+	  <!--dd><a href="/jp/latest/">\u65e5\u672c\u8a9e</a></dd>
+	  <dd><a href="/kr/latest/">\ud55c\uad6d\uc5b4</a></dd>
+	  <dd><a href="/it/latest/">Italiano</a></dd>
+	  <dd><a href="/ar/latest/">\u0627\u0644\u0639\u0631\u0628\u064a\u0629</a></dd-->
+    </dl>
+    <dl>
+      <dt>Versions</dt>
+      <dd><a href="/{{ language }}/latest/">latest</a></dd>
+      <dd><a href="/{{ language }}/0.3.0/">v0.3.0</a></dd>
+    </dl>
+  </div>
+</div>
+
+ <a href="https://github.com/apache/incubator-singa">
+    <img style="position: absolute; top: 0; right: 0; border: 0; z-index: 10000;"
+        src="https://s3.amazonaws.com/github/ribbons/forkme_right_orange_ff7600.png"
+        alt="Fork me on GitHub">
+</a>
+
+{{ super() }}
+{% endblock %}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/zh/conf.py
----------------------------------------------------------------------
diff --git a/doc/zh/conf.py b/doc/zh/conf.py
new file mode 100755
index 0000000..332a0d1
--- /dev/null
+++ b/doc/zh/conf.py
@@ -0,0 +1,339 @@
+# -*- coding: utf-8 -*-
+#
+# incubator-singa documentation build configuration file, created by
+# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(1, os.path.abspath('../build/python'))
+
+# -- General configuration ------------------------------------------------
+from recommonmark.parser import CommonMarkParser
+
+source_parsers = {
+    '.md': CommonMarkParser,
+}
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = ['.rst', '.md']
+
+# The encoding of source files.
+#
+source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'incubator-singa'
+copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
+author = u'moaz'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = u'1.0.0'
+# The full version, including alpha/beta/rc tags.
+release = u'1.0.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#
+# today = ''
+#
+# Else, today_fmt is used as the format for a strftime call.
+#
+# today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+# keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+# html_theme_path = []
+
+# The name for this set of Sphinx documents.
+# "<project> v<release> documentation" by default.
+#
+# html_title = u'Singa v1.0.0'
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#
+html_logo = 'image/singa.png'
+
+# The name of an image file (relative to this directory) to use as a favicon of
+# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#
+# html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['../_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#
+# html_extra_path = []
+
+# If not None, a 'Last updated on:' timestamp is inserted at every page
+# bottom, using the given strftime format.
+# The empty string is equivalent to '%b %d, %Y'.
+#
+# html_last_updated_fmt = None
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#
+# html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+#
+# html_domain_indices = True
+
+# If false, no index is generated.
+#
+# html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#
+html_show_sourcelink = False
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#
+# html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#
+# html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#
+# html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
+#
+# html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# 'ja' uses this config value.
+# 'zh' user can custom change `jieba` dictionary path.
+#
+# html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#
+# html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Singadoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+     # The paper size ('letterpaper' or 'a4paper').
+     #
+     # 'papersize': 'letterpaper',
+
+     # The font size ('10pt', '11pt' or '12pt').
+     #
+     # 'pointsize': '10pt',
+
+     # Additional stuff for the LaTeX preamble.
+     #
+     # 'preamble': '',
+
+     # Latex figure (float) alignment
+     #
+     # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
+     u'moaz', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#
+# latex_use_parts = False
+
+# If true, show page references after internal links.
+#
+# latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#
+# latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#
+# latex_appendices = []
+
+# If false, no module index is generated.
+#
+# latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
+     [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#
+# man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
+     author, 'incubator-singa', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#
+# texinfo_appendices = []
+
+# If false, no module index is generated.
+#
+# texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#
+# texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#
+# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/zh/index.md
----------------------------------------------------------------------
diff --git a/doc/zh/index.md b/doc/zh/index.md
new file mode 100644
index 0000000..4b49d5f
--- /dev/null
+++ b/doc/zh/index.md
@@ -0,0 +1,9 @@
+SINGA \u4e2d\u6587\u6587\u6863
+==============
+
+.. toctree::
+
+   overview
+   installation_source
+   programming-guide
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
index d6faf5d..3a9c0f0 100644
--- a/examples/index.rst
+++ b/examples/index.rst
@@ -1,3 +1,6 @@
+Examples
+--------
+
 .. toctree::
 
    char-rnn/README


[16/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.
SINGA-237 New documentation files for SINGA v1.0

1. copy the 'examples' folder into docs/ to generate htmls files using the README.md files
2. add software_stack.md to describe the major data structures of v1.0
3. add device.rst to introduce the Device APIs


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/e963363a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/e963363a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/e963363a

Branch: refs/heads/dev
Commit: e963363a6d99825d8f4472130559814347845194
Parents: 6b2ff3c
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Aug 11 23:13:12 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 21:02:47 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                      |   1 +
 doc/_static/images/singav1-sw.png | Bin 0 -> 24326 bytes
 doc/conf.py                       |  18 +++---
 doc/docs.rst                      |   6 +-
 doc/docs/device.rst               |  38 +++++++++++++
 doc/docs/examples.rst             |   6 ++
 doc/docs/index.rst                |  16 ++----
 doc/docs/jp/index.md              |  23 --------
 doc/docs/kr/index.md              |  23 --------
 doc/docs/software_stack.md        |  99 +++++++++++++++++++++++++++++++++
 doc/docs/tensor.rst               |  54 ++++++++++++++++++
 doc/docs/zh/index.md              |  10 ++--
 doc/index.rst                     |  28 +++++-----
 examples/index.rst                |   6 ++
 src/python/singa/device.py        |  31 +++++++++++
 src/python/singa/tensor.py        |  49 +++++++++++++---
 16 files changed, 311 insertions(+), 97 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/Makefile
----------------------------------------------------------------------
diff --git a/doc/Makefile b/doc/Makefile
index 62a2236..c6eddf1 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -50,6 +50,7 @@ clean:
 
 .PHONY: html
 html:
+	cp -rf ../examples docs/
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/_static/images/singav1-sw.png
----------------------------------------------------------------------
diff --git a/doc/_static/images/singav1-sw.png b/doc/_static/images/singav1-sw.png
new file mode 100644
index 0000000..e443c6e
Binary files /dev/null and b/doc/_static/images/singav1-sw.png differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 86dc031..9d4480e 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -16,9 +16,10 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(1, os.path.abspath('../build/python'))
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser
@@ -34,9 +35,8 @@ source_parsers = {
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = [
-   
-]
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -49,7 +49,7 @@ source_suffix = ['.rst', '.md']
 
 # The encoding of source files.
 #
-# source_encoding = 'utf-8-sig'
+source_encoding = 'utf-8-sig'
 
 # The master toctree document.
 master_doc = 'index'
@@ -149,7 +149,7 @@ html_theme = 'sphinx_rtd_theme'
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 #
-html_logo = '/singa.png'
+html_logo = 'image/singa.png'
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
@@ -202,7 +202,7 @@ html_static_path = ['_static']
 
 # If true, links to the reST sources are added to the pages.
 #
-html_show_sourcelink = False
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
 #

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs.rst
----------------------------------------------------------------------
diff --git a/doc/docs.rst b/doc/docs.rst
index 2ebea60..400b12a 100644
--- a/doc/docs.rst
+++ b/doc/docs.rst
@@ -2,7 +2,5 @@ Documentation
 =============
 
 .. toctree::
-	docs/index
- 	docs/zh/index
-	docs/jp/index
-	docs/kr/index
+   docs/index
+   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
new file mode 100644
index 0000000..e79d87a
--- /dev/null
+++ b/doc/docs/device.rst
@@ -0,0 +1,38 @@
+Device
+=======
+
+
+The Device abstract represents any hardware device with memory and compuation units.
+All [Tensor operations](tensor.html) are scheduled by the resident device for execution.
+Tensor memory is also managed by the device's memory manager. Therefore, optimization
+of memory and execution are implemented in the Device class.
+
+Specific devices
+----------------
+Currently, SINGA has three Device implmentations,
+
+1. CudaGPU for an Nvidia GPU card which runs Cuda code
+2. CppCPU for a CPU which runs Cpp code
+3. OpenclGPU for a GPU card which runs OpenCL code
+
+
+Python API
+----------
+
+.. automodule:: singa.device
+   :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
+
+
+The following code provides examples of creating devices,
+
+.. code:: python
+
+   from singa import device
+   cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
+   host = device.get_default_device()  # get the default host device (a CppCPU)
+   ary1 = device.create_cuda_gpus(2)  # create 2 devices, starting from ID 0
+   ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
+
+
+CPP API
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
new file mode 100644
index 0000000..b0b2af8
--- /dev/null
+++ b/doc/docs/examples.rst
@@ -0,0 +1,6 @@
+Examples
+========
+
+.. toctree::
+
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index a6a1b49..2f6352e 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -2,15 +2,9 @@ English
 =======
 
 .. toctree::
-	overview
-        installation
-	quick-start
-        programming-guide
-        distributed-training
-        data
-        checkpoint
-        python
-        test
-        gpu
-        examples
 
+   installation
+   software_stack
+   device
+   tensor
+   examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/jp/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/jp/index.md b/doc/docs/jp/index.md
deleted file mode 100644
index 6679198..0000000
--- a/doc/docs/jp/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \u6700\u65b0\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8
-
----
-
-* [\u30a4\u30f3\u30c8\u30ed\u30c0\u30af\u30b7\u30e7\u30f3](overview.html)
-* [\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb](installation.html)
-* [\u30af\u30a4\u30c3\u30af\u30b9\u30bf\u30fc\u30c8](quick-start.html)
-* [\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0 \u30ac\u30a4\u30c9](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\u5206\u6563 \u30c8\u30ec\u30fc\u30cb\u30f3\u30b0](distributed-training.html)
-* [\u30c7\u30fc\u30bf\u306e\u6e96\u5099](data.html)
-* [Checkpoint \u3068 Resume](checkpoint.html)
-* [\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u30c6\u30b9\u30c8 \u3068 \u7279\u5fb4\u62bd\u51fa](test.html)
-* [\u30b5\u30f3\u30d7\u30eb](examples.html)
-    * Feed-forward \u30e2\u30c7\u30eb
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/kr/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/kr/index.md b/doc/docs/kr/index.md
deleted file mode 100644
index 990d5d9..0000000
--- a/doc/docs/kr/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \ucd5c\uc2e0 \ubb38\uc11c
-
----
-
-* [\uac1c\uc694](overview.html)
-* [\uc778\uc2a4\ud1a8](installation.html)
-* [\ud035 \uc2a4\ud0c0\ud2b8](quick-start.html)
-* [\ud504\ub85c\uadf8\ub798\ubc0d \uac00\uc774\ub4dc](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\ubd84\uc0b0 \ud2b8\ub808\uc774\ub2dd](distributed-training.html)
-* [\ub370\uc774\ud130 \uc900\ube44](data.html)
-* [Checkpoint \uc640 Resume](checkpoint.html)
-* [\uc131\ub2a5\ud14c\uc2a4\ud2b8 \ubc0f \ud2b9\uc9d5\ucd94\ucd9c](test.html)
-* [\uc0d8\ud50c](examples.html)
-    * Feed-forward \ubaa8\ub378
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/software_stack.md
----------------------------------------------------------------------
diff --git a/doc/docs/software_stack.md b/doc/docs/software_stack.md
new file mode 100644
index 0000000..c60b6a5
--- /dev/null
+++ b/doc/docs/software_stack.md
@@ -0,0 +1,99 @@
+# Software Stack
+
+SINGA's software stack includes three major components, namely, core, IO and
+model. Figure 1 illustrates these components together with the hardware.
+The core component provides memory management and tensor operations;
+IO has classes for reading (and writing) data from (to) disk and network; The
+model component provides data structures and algorithms for machine learning models,
+e.g., layers for neural network models, optimizers/initializer/metric/loss for
+general machine learning models.
+
+
+<img src="../_static/images/singav1-sw.png" align="center" width="500px"/>
+<br/>
+<span><strong>Figure 1 - SINGA V1 software stack.</strong></span>
+
+## Core
+
+[Tensor](tensor.html) and [Device](device.html) are two core abstractions in SINGA. Tensor class represents a
+multi-dimensional array, which stores model variables and provides linear algebra
+operations for machine learning
+algorithms, including matrix multiplication and random functions. Each tensor
+instance (i.e. a tensor) is allocated on a Device instance.
+Each Device instance (i.e. a device) is created against one hardware device,
+e.g. a GPU card or a CPU core. Devices manage the memory of tensors and execute
+tensor operations on its execution units, e.g. CPU threads or CUDA streams.
+
+Depending on the hardware and the programming language, SINGA have implemented
+the following specific device classes:
+
+* **CudaGPU** represents an Nvidia GPU card. The execution units are the CUDA streams.
+* **CppCPU** represents a normal CPU. The execution units are the CPU threads.
+* **OpenclGPU** represents normal GPU card from both Nvidia and AMD.
+  The execution units are the CommandQueues. Given that OpenCL is compatible with
+  many hardware devices, e.g. FPGA and ARM, the OpenclGPU has the potential to be
+  extended for other devices.
+
+Different types of devices use different programming languages to write the kernel
+functions for tensor operations,
+
+* CppMath (tensor_math_cpp.h) implements the tensor operations using Cpp for CppCPU
+* CudaMath (tensor_math_cuda.h) implements the tensor operations using CUDA for CudaGPU
+* OpenclMath (tensor_math_opencl.h) implements the tensor operations using OpenCL for OpenclGPU
+
+In addition, different types of data, such as float32 and float16, could be supported by adding
+the corresponding tensor functions.
+
+Typically, users would create a device instance and pass it to create multiple
+tensor instances. When users call the Tensor functions, these function would invoke
+the corresponding implementation (CppMath/CudaMath/OpenclMath) automatically. In
+other words, the implementation of Tensor operations is transparent to users.
+
+Most machine learning algorithms could be expressed using (dense or sparse) tensors.
+Therefore, with the Tensor abstraction, SINGA would be able to run a wide range of models,
+including deep learning models and other traditional machine learning models.
+
+The Tensor and Device abstractions are extensible to support a wide range of hardware device
+using different programming languages. A new hardware device would be supported by
+adding a new Device subclass and the corresponding implementation of the Tensor
+operations (xxxMath).
+
+Optimizations in terms of speed and memory could be implemented by Device, which
+manages both operation execution and memory malloc/free. More optimization details
+would be described in the [Device page](device.html).
+
+
+## Model
+
+On top of the Tensor and Device abstractions, SINGA provides some higher level
+classes for machine learning modules.
+
+* [Layer](layer.html) and its subclasses are specific for neural networks. Every layer provides
+  functions for forward propagating features and backward propagating gradients w.r.t the training loss functions.
+  They wraps the complex layer operations so that users can easily create neural nets
+  by connecting a set of layers.
+
+* [Initializer](initializer.html) and its subclasses provide variant methods of initializing
+  model parameters (stored in Tensor instances), following Uniform, Gaussian, etc.
+
+* [Loss](loss.html) and its subclasses defines the training objective loss functions.
+  Both functions of computing the loss values and computing the gradient of the prediction w.r.t the
+  objective loss are implemented. Example loss functions include squared error and cross entropy.
+
+* [Metric](metric.html) and its subclasses provide the function to measure the
+  performance of the model, e.g., the accuracy.
+
+* [Optimizer](optimizer.html) and its subclasses implement the methods for updating
+  model parameter values using parameter gradients, including SGD, AdaGrad, RMSProp etc.
+
+
+## IO
+
+The IO module consists of classes for data loading, data preprocessing and message passing.
+
+* Reader and its subclasses load string records from disk files
+* Writer and its subclasses write string records to disk files
+* Encoder and its subclasses encode Tensor instances into string records
+* Decoder and its subclasses decodes string records into Tensor instances
+* Endpoint represents a communication endpoint which provides functions for passing messages to each other.
+* Message represents communication messages between Endpoint instances. It carries both meta data and payload.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.rst b/doc/docs/tensor.rst
new file mode 100644
index 0000000..87d26ea
--- /dev/null
+++ b/doc/docs/tensor.rst
@@ -0,0 +1,54 @@
+Tensor
+========
+
+Each Tensor instance is a multi-dimensional array allocated on a specific
+Device instance. Tensor instances store variables and provide
+linear algebra operations over different types of hardware devices without user
+awareness. Note that users need to make sure the tensor operands are
+allocated on the same device except copy functions.
+
+
+Tensor implementation
+---------------------
+
+SINGA has three different sets of implmentations of Tensor functions, one for each
+type of Device.
+
+* 'tensor_math_cpp.h' implements operations using Cpp (with CBLAS) for CppGPU devices.
+* 'tensor_math_cuda.h' implements operations using Cuda (with cuBLAS) for CudaGPU devices.
+* 'tensor_math_opencl.h' implements operations using OpenCL for OpenclGPU devices.
+
+Python API
+----------
+
+There are two set of tensor functions,
+1. Tensor member functions, which would change the internal state of the Tensor instance.
+2. tensor module functions, which accepts Tensor instances as arguments and return
+Tensor instances.
+
+
+Create Tensor instances
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: singa.tensor.Tensor
+
+
+Tensor instances can be constructed from Numpy array,
+
+.. automodule:: singa.tensor
+   :members: from_numpy
+
+
+Set Tensor values
+~~~~~~~~~~~~~~~~~
+
+
+
+
+
+
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/docs/zh/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/zh/index.md b/doc/docs/zh/index.md
index c44a2cf..4b49d5f 100644
--- a/doc/docs/zh/index.md
+++ b/doc/docs/zh/index.md
@@ -1,7 +1,9 @@
 SINGA \u4e2d\u6587\u6587\u6863
----
+==============
 
-* [\u7b80\u4ecb](overview.html)
-* [\u5b89\u88c5](installation_source.html)
-* [\u4f7f\u7528\u6307\u5357](programming-guide.html)
+.. toctree::
+
+   overview
+   installation_source
+   programming-guide
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/doc/index.rst
----------------------------------------------------------------------
diff --git a/doc/index.rst b/doc/index.rst
index ec727b1..50c65d7 100755
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -7,9 +7,9 @@ Welcome to Apache Singa
 =======================
 
 Recent News
-===========
+-----------
 
-* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_ 
+* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
 
 * The **second release** is now available, 14 Jan, 2016. `Download SINGA v0.2.0 <downloads.html>`_.
 
@@ -34,7 +34,7 @@ Recent News
 * SINGA has been accepted by `Apache Incubator <http://incubator.apache.org/>`_, 17 March, 2015.
 
 Getting Started
-===============
+---------------
 * The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
 
 * The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
@@ -42,7 +42,7 @@ Getting Started
 * Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
 
 Documentation
-=============
+-------------
 
 * Documentations are listed `here <docs.html>`_.
 
@@ -51,8 +51,8 @@ Documentation
 * Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
 
 How to contribute
-=================
-  
+----------------------
+
 * Please subscribe to our development mailing list dev-subscribe@singa.incubator.apache.org.
 
 * If you find any issues using SINGA, please report it to the `Issue Tracker <https://issues.apache.org/jira/browse/singa>`_.
@@ -62,17 +62,17 @@ How to contribute
 More details on contributing to SINGA is described `here <develop/how-contribute.html>`_ .
 
 Citing SINGA
-============
+------------
 
 Please cite the following two papers if you use SINGA in your research:
 
 * B. C. Ooi, K.-L. Tan, S. Wang, W. Wang, Q. Cai, G. Chen, J. Gao, Z. Luo, A. K. H. Tung, Y. Wang, Z. Xie, M. Zhang, and K. Zheng. `SINGA: A distributed deep learning platform <http://www.comp.nus.edu.sg/~ooibc/singaopen-mm15.pdf>`_. ACM Multimedia (Open Source Software Competition) 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-oss.txt>`_).
 
-* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_). 
+* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
 
 .. toctree::
    :hidden:
-   
+
    downloads
    docs
 
@@ -85,25 +85,25 @@ Please cite the following two papers if you use SINGA in your research:
    develop/how-contribute
    develop/contribute-code
    develop/contribute-docs
-   
+
 .. toctree::
    :hidden:
    :maxdepth: 2
    :caption: Community
-   
+
    community/source-repository
    community/mail-lists
    community/issue-tracking
    community/team-list
-   
+
 
 
 License
-=======
+----------
 SINGA is released under `Apache License Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
 
 Disclaimers
-===========
+-----------
 
 Apache SINGA is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
new file mode 100644
index 0000000..d6faf5d
--- /dev/null
+++ b/examples/index.rst
@@ -0,0 +1,6 @@
+.. toctree::
+
+   char-rnn/README
+   imagenet/README
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
index aff3587..eff6783 100644
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@ -68,21 +68,52 @@ def device_query(id, verbose=False):
 
 
 def create_cuda_gpus(num):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        num (int): number of device to create.
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
+
     return singa.Platform.CreateCudaGPUs(num)
 
 
 def create_cuda_gpu():
+    '''Create a single CudaGPU device.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
+
     return singa.Platform.CreateCudaGPUs(1)[0]
 
 
 def create_cuda_gpus_on(device_ids):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        device_ids (list): a list of GPU card IDs.
+
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
     return singa.Platform.CreateCudaGPUsOn(device_ids)
 
 
 def create_cuda_gpu_on(device_id):
+    '''Create a CudaGPU device on the given device ID.
+
+    Args:
+        device_id (int): GPU card ID.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
     devices = create_cuda_gpus_on([device_id])
     return devices[0]
 
 
 def get_default_device():
+    '''Get the default host device which is a CppCPU device'''
     return singa.Platform.GetDefaultDevice()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e963363a/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index 6e84a4f..3645ff8 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -21,17 +21,21 @@ to call singa::Tensor and its methods
 """
 
 import numpy as np
+from functools import reduce
 from .proto import core_pb2
 from . import singa_wrap as singa
-from functools import reduce
+import device
 
 
 class Tensor(object):
-    ''' Class and member functions for singa::Tensor
-    '''
-
-    def __init__(self, shape=None, device=None, dtype=core_pb2.kFloat32):
-        ''' shape = (tuple)
+    def __init__(self, shape=None, device=None, dtype=core_pb.kFloat32):
+        '''Create a Py Tensor, which wraps a swig converted Tensor from SINGA
+            Tensor.
+        Args:
+            shape (list), a list of integers for the tensor shape. If shape is not specified, the created tensor is called a dummy tensor.
+            device, a Device instance created from ::py:mod:device. If it is
+                None, then the default host device would be used.
+            dtype, data type. currently, most operations only accept kFloat32.
         '''
         if shape is None:
             # call constructor of singa::Tensor
@@ -111,11 +115,19 @@ class Tensor(object):
         return self.singa_tensor.L1()
 
     def set_value(self, x):
+        '''Set all elements of the tensor to be the give value.
+
+        Args:
+            x (float), a float value to be set to all elements.
+        '''
         # assert type(x) == float, 'set value only accepts float input'
         # if isinstance(x, float):
         self.singa_tensor.floatSetValue(x)
 
     def copy_data(self, t):
+        '''Copy data from other Tensor instance.
+        '''
+        assert type(t) == Tensor, 't must be a singa Tensor instance'
         self.singa_tensor.CopyData(t.singa_tensor)
 
     def clone(self):
@@ -285,16 +297,35 @@ def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0):
 
 
 def from_numpy(np_array):
+    '''Create a Tensor instance with the shape, dtype and values from the numpy
+        array.
+
+    Args:
+        np_array: the numpy array.
+
+    Returns:
+        A Tensor instance allocated on the default CppCPU device.
+    '''
     ret = Tensor(np_array.shape)
     ret.copy_from_numpy(np_array)
     return ret
 
 
 def to_numpy(t):
-    ''' this method gets the values of tensor data and
-        returns it as numpy array
-        TODO(wangwei) clone t to host
+    '''Convert the tensor into a numpy array.
+
+    Since numpy array is allocated on CPU devices, the input Tensor instance
+    must be on the default CppCPU device.
+
+    Args:
+        t (Tensor), a Tensor on the default CppCPU device.
+
+    Returns:
+        a numpy array
     '''
+    assert t.device == device.get_default_device() or t.device == None, \
+        'Please move the tensor onto the default host device'
+
     if t.dtype == core_pb2.kFloat32:
         np_array = t.singa_tensor.floatGetValue(int(t.size()))
     elif t.dtype == core_pb2.kInt:


[08/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.
SINGA-237 New documentation files for SINGA v1.0

Added readme file for the cifar-10 examples.
Updated the uniform and gaussian methods in initializer.py to include the
fan_in and fan_out arguments.
Reformat some python files.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/cdd718ed
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/cdd718ed
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/cdd718ed

Branch: refs/heads/dev
Commit: cdd718ed946acfd829ccfd6e5b43999f990fd634
Parents: 33992c9
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Sun Aug 14 21:41:16 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Sun Aug 14 23:47:24 2016 +0800

----------------------------------------------------------------------
 doc/docs/examples.rst           |   6 --
 doc/docs/index.rst              |   2 +-
 doc/docs/initializer.rst        |   2 +-
 examples/char-rnn/README.md     |   2 +-
 examples/char-rnn/train.py      | 103 +++++++++++++++++++++--------------
 examples/cifar10/alexnet.py     |  48 +++++++++++++---
 examples/cifar10/predict.py     |  10 ++--
 examples/cifar10/vgg.py         |  10 +---
 examples/index.rst              |   4 ++
 src/python/singa/initializer.py |  85 ++++++++++++++---------------
 src/python/singa/optimizer.py   |   4 +-
 11 files changed, 156 insertions(+), 120 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
deleted file mode 100644
index b0b2af8..0000000
--- a/doc/docs/examples.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Examples
-========
-
-.. toctree::
-
-   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index 2294054..11f0ebb 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -12,4 +12,4 @@ English
    loss
    metric
    optimizer
-   examples
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/doc/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/initializer.rst b/doc/docs/initializer.rst
index a190702..f334497 100644
--- a/doc/docs/initializer.rst
+++ b/doc/docs/initializer.rst
@@ -5,7 +5,7 @@ Python API
 ----------
 
 .. automodule:: singa.initializer
-   :members:
+   :members: uniform, gaussian
    :member-order: bysource
 
 CPP API

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/char-rnn/README.md
----------------------------------------------------------------------
diff --git a/examples/char-rnn/README.md b/examples/char-rnn/README.md
index d4cfa30..f6e5edc 100644
--- a/examples/char-rnn/README.md
+++ b/examples/char-rnn/README.md
@@ -1,4 +1,4 @@
-# Train Char-RNN using SINGA
+# Train Char-RNN over plain text
 
 Recurrent neural networks (RNN) are widely used for modelling sequential data,
 e.g., natural language sentences. This example describes how to implement a RNN

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index fb5e71f..1273a57 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -19,8 +19,6 @@ The model is created following https://github.com/karpathy/char-rnn
 The train file could be any text file,
 e.g., http://cs.stanford.edu/people/karpathy/char-rnn/
 '''
-import sys
-import os
 import cPickle as pickle
 import numpy as np
 import argparse
@@ -32,12 +30,12 @@ from singa import device
 from singa import tensor
 from singa import optimizer
 from singa import initializer
-from singa.proto import core_pb2
 from singa.proto import model_pb2
 from singa import utils
 
 
 class Data(object):
+
     def __init__(self, fpath, batch_size=32, seq_length=100, train_ratio=0.8):
         '''Data object for loading a plain text file.
 
@@ -49,8 +47,8 @@ class Data(object):
         self.raw_data = open(fpath, 'r').read()  # read text file
         chars = list(set(self.raw_data))
         self.vocab_size = len(chars)
-        self.char_to_idx = {ch:i for i, ch in enumerate(chars)}
-        self.idx_to_char = {i:ch for i, ch in enumerate(chars)}
+        self.char_to_idx = {ch: i for i, ch in enumerate(chars)}
+        self.idx_to_char = {i: ch for i, ch in enumerate(chars)}
         data = [self.char_to_idx[c] for c in self.raw_data]
         # seq_length + 1 for the data + label
         nsamples = len(data) / (1 + seq_length)
@@ -69,10 +67,10 @@ class Data(object):
 
 def numpy2tensors(npx, npy, dev):
     '''batch, seq, dim -- > seq, batch, dim'''
-    tmpx=np.swapaxes(npx, 0, 1)
-    tmpy=np.swapaxes(npy, 0, 1)
-    inputs=[]
-    labels=[]
+    tmpx = np.swapaxes(npx, 0, 1)
+    tmpy = np.swapaxes(npy, 0, 1)
+    inputs = []
+    labels = []
     for t in range(tmpx.shape[0]):
         x = tensor.from_numpy(tmpx[t])
         y = tensor.from_numpy(tmpy[t])
@@ -99,25 +97,36 @@ def get_lr(epoch):
     return 0.001 / float(1 << (epoch / 50))
 
 
-def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
-        num_stacks=1, lr=0.001, dropout = 0.5, model_path='model.bin'):
+def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
+          num_stacks=1, lr=0.001, dropout=0.5, model_path='model.bin'):
     # SGD with L2 gradient normalization
     opt = optimizer.SGD(constraint=optimizer.L2Constraint(5))
     cuda = device.create_cuda_gpu()
-    rnn = layer.LSTM(name='lstm', hidden_size=hidden_size, num_stacks=num_stacks,
-            dropout=dropout, input_sample_shape=(data.vocab_size,))
+    rnn = layer.LSTM(
+        name='lstm',
+        hidden_size=hidden_size,
+        num_stacks=num_stacks,
+        dropout=dropout,
+        input_sample_shape=(
+            data.vocab_size,
+        ))
     rnn.to_device(cuda)
     print 'created rnn'
     rnn_w = rnn.param_values()[0]
-    initializer.uniform(rnn_w, -0.08, 0.08)  # init all rnn parameters
+    rnn_w.uniform(-0.08, 0.08)  # init all rnn parameters
     print 'rnn weight l1 = %f' % (rnn_w.l1())
-    dense = layer.Dense('dense', data.vocab_size, input_sample_shape=(hidden_size,))
+    dense = layer.Dense(
+        'dense',
+        data.vocab_size,
+        input_sample_shape=(
+            hidden_size,
+        ))
     dense.to_device(cuda)
     dense_w = dense.param_values()[0]
     dense_b = dense.param_values()[1]
     print 'dense w ', dense_w.shape
     print 'dense b ', dense_b.shape
-    initializer.xavier(dense_w) # init weight matrix using Xavier
+    initializer.uniform(dense_w, dense_w.shape[0], dense_w.shape[1])
     print 'dense weight l1 = %f' % (dense_w.l1())
     dense_b.set_value(0.0)
     print 'dense b l1 = %f' % (dense_b.l1())
@@ -125,18 +134,18 @@ def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
     g_dense_w = tensor.Tensor(dense_w.shape, cuda)
     g_dense_b = tensor.Tensor(dense_b.shape, cuda)
 
-    lossfun = loss.SoftmaxCrossEntropy();
+    lossfun = loss.SoftmaxCrossEntropy()
     for epoch in range(max_epoch):
         train_loss = 0
         for b in range(data.num_train_batch):
             batch = data.train_dat[b * batch_size: (b + 1) * batch_size]
             inputs, labels = convert(batch, batch_size, seq_length,
-                    data.vocab_size, cuda)
+                                     data.vocab_size, cuda)
             inputs.append(tensor.Tensor())
             inputs.append(tensor.Tensor())
 
             outputs = rnn.forward(model_pb2.kTrain, inputs)[0:-2]
-            grads=[]
+            grads = []
             batch_loss = 0
             g_dense_w.set_value(0.0)
             g_dense_b.set_value(0.0)
@@ -149,52 +158,62 @@ def train(data, max_epoch, hidden_size =100, seq_length=100, batch_size=16,
                 grads.append(grad)
                 g_dense_w += gwb[0]
                 g_dense_b += gwb[1]
-                #print output.l1(), act.l1()
-            utils.update_progress(b * 1.0 / data.num_train_batch,
-                    'training loss = %f' % (batch_loss / seq_length))
+                # print output.l1(), act.l1()
+            utils.update_progress(
+                b * 1.0 / data.num_train_batch, 'training loss = %f' %
+                (batch_loss / seq_length))
             train_loss += batch_loss
 
             grads.append(tensor.Tensor())
             grads.append(tensor.Tensor())
-            g_rnn_w=rnn.backward(model_pb2.kTrain, grads)[1][0]
+            g_rnn_w = rnn.backward(model_pb2.kTrain, grads)[1][0]
             dense_w, dense_b = dense.param_values()
             opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')
-            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_w, dense_w, 'dense_w')
-            opt.apply_with_lr(epoch, get_lr(epoch), g_dense_b, dense_b, 'dense_b')
-        print '\nEpoch %d, train loss is %f' % (epoch,
-                train_loss / data.num_train_batch / seq_length)
+            opt.apply_with_lr(
+                epoch, get_lr(epoch),
+                g_dense_w, dense_w, 'dense_w')
+            opt.apply_with_lr(
+                epoch, get_lr(epoch),
+                g_dense_b, dense_b, 'dense_b')
+        print '\nEpoch %d, train loss is %f' % \
+            (epoch, train_loss / data.num_train_batch / seq_length)
+
         eval_loss = 0
         for b in range(data.num_test_batch):
             batch = data.val_dat[b * batch_size: (b + 1) * batch_size]
             inputs, labels = convert(batch, batch_size, seq_length,
-                    data.vocab_size, cuda)
+                                     data.vocab_size, cuda)
             inputs.append(tensor.Tensor())
             inputs.append(tensor.Tensor())
             outputs = rnn.forward(model_pb2.kEval, inputs)[0:-2]
             for output, label in zip(outputs, labels):
                 output = dense.forward(model_pb2.kEval, output)
-                eval_loss += lossfun.forward(model_pb2.kEval, output, label).l1()
-        print 'Epoch %d, evaluation loss is %f' % (epoch,
-                eval_loss / data.num_test_batch / seq_length)
+                eval_loss += lossfun.forward(model_pb2.kEval,
+                                             output, label).l1()
+        print 'Epoch %d, evaluation loss is %f' % \
+            (epoch, eval_loss / data.num_test_batch / seq_length)
 
     # checkpoint the file model
     with open(model_path, 'wb') as fd:
         print 'saving model to %s' % model_path
-        d={}
-        for name, w in zip(['rnn_w', 'dense_w', 'dense_b'], [rnn_w, dense_w, dense_b]):
+        d = {}
+        for name, w in zip(
+                ['rnn_w', 'dense_w', 'dense_b'],
+                [rnn_w, dense_w, dense_b]):
             w.to_host()
-            d[name]=tensor.to_numpy(w)
-        d['idx_to_char']=data.idx_to_char
-        d['char_to_idx']=data.char_to_idx
-        d['hidden_size']=hidden_size
-        d['num_stacks']=num_stacks
-        d['dropout']=dropout
+            d[name] = tensor.to_numpy(w)
+        d['idx_to_char'] = data.idx_to_char
+        d['char_to_idx'] = data.char_to_idx
+        d['hidden_size'] = hidden_size
+        d['num_stacks'] = num_stacks
+        d['dropout'] = dropout
 
         pickle.dump(d, fd)
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Train multi-stack LSTM for '\
-            'modeling  character sequence from plain text files')
+    parser = argparse.ArgumentParser(
+        description='Train multi-stack LSTM for '
+        'modeling  character sequence from plain text files')
     parser.add_argument('data', type=str, help='training file')
     parser.add_argument('-b', type=int, default=32, help='batch_size')
     parser.add_argument('-l', type=int, default=64, help='sequence length')
@@ -204,4 +223,4 @@ if __name__ == '__main__':
     args = parser.parse_args()
     data = Data(args.data, batch_size=args.b, seq_length=args.l)
     train(data, args.m,  hidden_size=args.d, num_stacks=args.s,
-            seq_length=args.l, batch_size=args.b)
+          seq_length=args.l, batch_size=args.b)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index ddad1d5..34da95d 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -20,12 +20,8 @@ Following the same setting for hyper-parameters and data pre-processing, the fin
 validation accuracy would be about 82%.
 '''
 
-import sys
-import os
-
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
-from singa import initializer
 from singa import metric
 from singa import loss
 from singa import net as ffnet
@@ -40,23 +36,57 @@ def create_net(use_cpu=False):
     W1_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01}
     W2_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01, 'decay_mult': 250}
     b_specs = {'init': 'constant', 'value': 0, 'lt_mult': 2}
-    net.add(layer.Conv2D('conv1', 32, 5, 1, W_specs=W0_specs.copy(), b_specs=b_specs.copy(), pad=2, input_sample_shape=(3,32,32,)))
+    net.add(
+        layer.Conv2D(
+            'conv1',
+            32,
+            5,
+            1,
+            W_specs=W0_specs.copy(),
+            b_specs=b_specs.copy(),
+            pad=2,
+            input_sample_shape=(
+                3,
+                32,
+                32,
+            )))
     net.add(layer.MaxPooling2D('pool1', 3, 2, pad=1))
     net.add(layer.Activation('relu1'))
     net.add(layer.LRN(name='lrn1'))
-    net.add(layer.Conv2D('conv2', 32, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
+    net.add(
+        layer.Conv2D(
+            'conv2',
+            32,
+            5,
+            1,
+            W_specs=W1_specs.copy(),
+            b_specs=b_specs.copy(),
+         pad=2))
     net.add(layer.Activation('relu2'))
     net.add(layer.MaxPooling2D('pool2', 3, 2,  pad=1))
     net.add(layer.LRN('lrn2'))
-    net.add(layer.Conv2D('conv3', 64, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
+    net.add(
+        layer.Conv2D(
+            'conv3',
+            64,
+            5,
+            1,
+            W_specs=W1_specs.copy(),
+            b_specs=b_specs.copy(),
+         pad=2))
     net.add(layer.Activation('relu3'))
     net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
     net.add(layer.Flatten('flat'))
-    net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
+    net.add(
+        layer.Dense(
+            'dense',
+            10,
+            W_specs=W2_specs.copy(),
+         b_specs=b_specs.copy()))
     for (p, specs) in zip(net.param_values(), net.param_specs()):
         filler = specs.filler
         if filler.type == 'gaussian':
-            initializer.gaussian(p, filler.mean, filler.std)
+            p.gaussian(filler.mean, filler.std)
         else:
             p.set_value(0)
         print specs.name, filler.type, p.l1()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/cifar10/predict.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/predict.py b/examples/cifar10/predict.py
index 8a9ea4e..307a610 100644
--- a/examples/cifar10/predict.py
+++ b/examples/cifar10/predict.py
@@ -16,28 +16,26 @@
 # =============================================================================
 import cPickle as pickle
 import numpy as np
-import sys
-import os
 
-#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
 from singa import device
 from singa import tensor
 import net as ffnet
 
 
-def predict(net, images, cuda, topk=5):
+def predict(net, images, dev, topk=5):
     '''Predict the label of each image.
 
     Args:
         net, a pretrained neural net
         images, a batch of images [batch_size, 3, 32, 32], which have been
             pre-processed
-        cuda, the cuda device
+        dev, the training device
         topk, return the topk labels for each image.
     '''
     x = tensor.from_numpy(images.astype(np.float32))
-    x.to_device(cuda)
+    x.to_device(dev)
     y = net.predict(x)
     y.to_host()
     y = tensor.to_numpy(y)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index 35ef00d..e8e3602 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -20,11 +20,7 @@ The performance could be improved by tuning some hyper-parameters, including
 learning rate, weight decay, max_epoch, parameter initialization, etc.
 """
 
-import sys
-import os
-import math
-
-#sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
 from singa import layer
 from singa import initializer
@@ -88,9 +84,9 @@ def create_net(use_cpu=False):
             initializer.uniform(p, 0, 1)
         elif len(p.shape) > 1:
             if 'conv' in name:
-                initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
+                p.gaussian(0, 0, 3 * 3 * p.shape[0])
             else:
-                initializer.gaussian(p, 0, 0.02)
+                p.gaussian(0, 0.02)
         else:
             p.set_value(0)
         print name, p.l1()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
index d6faf5d..4bb5b49 100644
--- a/examples/index.rst
+++ b/examples/index.rst
@@ -1,5 +1,9 @@
+Examples
+========
+
 .. toctree::
 
+   cifar10/README
    char-rnn/README
    imagenet/README
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/src/python/singa/initializer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py
index 277fd2f..fb99663 100644
--- a/src/python/singa/initializer.py
+++ b/src/python/singa/initializer.py
@@ -23,77 +23,68 @@ Example usages::
     from singa import initializer
 
     x = tensor.Tensor((3, 5))
-    initializer.xavier(x)
+    initializer.uniform(x, 3, 5) # use both fan_in and fan_out
+    initializer.uniform(x, 3, 0)  # use only fan_in
 '''
 
 import math
 
 
-'''
-TODO(wangwei) update the uniform and gaussian initializers
-
 def uniform(t, fan_in=0, fan_out=0):
-    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
-    fan_out = nb_channel * kh * kw
-    for dense layer weight, fan_in = input_feature_length,
-    fan_out = output_feature_length
-    # Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
+    '''Initialize the values of the input tensor following a uniform
+    distribution with specific bounds.
+
+    Args:
+        fan_in(int): for the weight Tensor of a convolution layer,
+            fan_in = nb_channel * kh * kw; for dense layer,
+            fan_in = input_feature_length
+        fan_out(int): for the convolution layer weight Tensor,
+            fan_out = nb_filter * kh * kw; for the weight Tensor of a dense
+            layer, fan_out = output_feature_length
+
+    Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
     training deep feedforward neuralnetworks.
 
-    assert fan_in >0 or fan_out > 0, \
+    '''
+    assert fan_in > 0 or fan_out > 0, \
         'fan_in and fan_out cannot be 0 at the same time'
-    avg = 1
+    avg = 2
     if fan_in * fan_out == 0:
-      avg = 2
-    x = math.sqrt(3.0f * avg / (fan_in + fan_out))
+        avg = 1
+    x = math.sqrt(3.0 * avg / (fan_in + fan_out))
     t.uniform(-x, x)
 
 
 def gaussian(t, fan_in=0, fan_out=0):
-    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
-    fan_out = nb_channel * kh * kw
-    for dense layer weight, fan_in = input_feature_length,
-    fan_out = output_feature_length
+    '''Initialize the values of the input tensor following a Gaussian
+    distribution with specific std.
+
+    Args:
+        fan_in(int): for the weight Tensor of a convolution layer,
+            fan_in = nb_channel * kh * kw; for dense layer,
+            fan_in = input_feature_length
+        fan_out(int): for the convolution layer weight Tensor,
+            fan_out = nb_filter * kh * kw; for the weight Tensor of a dense
+            layer, fan_out = output_feature_length
 
     Ref Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: Delving Deep into
     Rectifiers: Surpassing Human-Level Performance on ImageNet Classification
-
-    assert fan_in >0 or fan_out > 0, \
+    '''
+    assert fan_in > 0 or fan_out > 0, \
         'fan_in and fan_out cannot be 0 at the same time'
-    avg = 1
+    avg = 2
     if fan_in * fan_out == 0:
-      avg = 2
-    std = math.sqrt(2.0f * avg / (fan_in + fan_out))
+        avg = 1
+    std = math.sqrt(2.0 * avg / (fan_in + fan_out))
     t.gaussian(0, std)
-'''
-
-
-def uniform(t, low=0, high=1):
-    '''Initialize the parameter values following an Uniform distribution.
-
-    Args:
-        t (Tensor): the parater tensor
-        low (float): lower bound
-        high (float): higher bound
-    '''
-    t.uniform(low, high)
-
-
-def gaussian(t, mean=0, std=0.01):
-    '''Initialize the parameter values following an Gaussian distribution.
-
-    Args:
-        t (Tensor): the parater tensor
-        mean (float): mean of the distribution
-        std (float): standard variance
-    '''
-    t.gaussian(mean, std)
 
 
 def xavier(t):
     '''Initialize the matrix parameter follow a Uniform distribution from
     [-sqrt(6/(fan_in + fan_out)), sqrt(6/(fan_in + fan_out))].
 
+    Deprecated. Please use uniform()
+
     Args:
         t (Tensor): the parater tensor
     '''
@@ -106,6 +97,8 @@ def glorot(t):
     '''Initialize the matrix parameter follow a Gaussian distribution with
     mean = 0 and std = sqrt(2.0 / (nb_row + nb_col))
 
+    Deprecated. Please use gaussian()
+
     Args:
         t (Tensor): the parater tensor
     '''
@@ -118,6 +111,8 @@ def msra(t):
     '''Initialize the matrix parameter follow a Guassian distribution with
     mean = 0, std = math.sqrt(2.0 / nb_row).
 
+    Deprecated. Please use gaussian()
+
     Ref [He, Zhang, Ren and Sun 2015]: Specifically accounts for ReLU
     nonlinearities.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/cdd718ed/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index a964f16..338c6b0 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -44,8 +44,8 @@ class Optimizer(object):
 
     1. construct the optimizer
     2. (optional) register each parameter with its specs.
-    3. use the optimizer to update parameter values given parameter
-        gradients and other optional info
+    3. use the optimizer to update parameter values given parameter gradients
+       and other optional info
 
     The subclasses should override the apply_with_lr function to do the real
     parameter udpate.


[05/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.
SINGA-237 New documentation files for SINGA v1.0

1. copy the 'examples' folder into docs/ to generate htmls files using the README.md files
2. add software_stack.md to describe the major data structures of v1.0
3. add device.rst to introduce the Device APIs


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/bc822cd2
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/bc822cd2
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/bc822cd2

Branch: refs/heads/dev
Commit: bc822cd208655cf7fd9aea5931e2618f4e2bbe45
Parents: 1db2784
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Aug 11 23:13:12 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Sun Aug 14 23:44:56 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                      |   1 +
 doc/_static/images/singav1-sw.png | Bin 0 -> 24326 bytes
 doc/conf.py                       |   9 +--
 doc/docs.rst                      |   6 +-
 doc/docs/device.rst               |  47 ++++++++++++++++
 doc/docs/examples.rst             |   6 ++
 doc/docs/index.rst                |  15 ++---
 doc/docs/jp/index.md              |  23 --------
 doc/docs/kr/index.md              |  23 --------
 doc/docs/software_stack.md        |  99 +++++++++++++++++++++++++++++++++
 doc/docs/tensor.md                |   7 +++
 doc/docs/zh/index.md              |  10 ++--
 12 files changed, 177 insertions(+), 69 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/Makefile
----------------------------------------------------------------------
diff --git a/doc/Makefile b/doc/Makefile
index 62a2236..c6eddf1 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -50,6 +50,7 @@ clean:
 
 .PHONY: html
 html:
+	cp -rf ../examples docs/
 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/_static/images/singav1-sw.png
----------------------------------------------------------------------
diff --git a/doc/_static/images/singav1-sw.png b/doc/_static/images/singav1-sw.png
new file mode 100644
index 0000000..e443c6e
Binary files /dev/null and b/doc/_static/images/singav1-sw.png differ

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 86dc031..20ba51a 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -16,9 +16,10 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
-# import os
-# import sys
-# sys.path.insert(0, os.path.abspath('.'))
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(1, '../src/python/singa/')
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser
@@ -35,7 +36,7 @@ source_parsers = {
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-   
+'sphinx.ext.autodoc'
 ]
 
 # Add any paths that contain templates here, relative to this directory.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs.rst
----------------------------------------------------------------------
diff --git a/doc/docs.rst b/doc/docs.rst
index 2ebea60..400b12a 100644
--- a/doc/docs.rst
+++ b/doc/docs.rst
@@ -2,7 +2,5 @@ Documentation
 =============
 
 .. toctree::
-	docs/index
- 	docs/zh/index
-	docs/jp/index
-	docs/kr/index
+   docs/index
+   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
new file mode 100644
index 0000000..aa5defb
--- /dev/null
+++ b/doc/docs/device.rst
@@ -0,0 +1,47 @@
+Device
+=======
+
+
+The Device abstract represent a hardware device with memory and compuation units.
+
+Specific devices
+----------------
+Currently, SINGA has three Device implmentations,
+
+1. CudaGPU for an Nvidia GPU card which runs Cuda code
+2. CppCPU for a CPU which runs Cpp code
+3. OpenclGPU for a GPU card which runs OpenCL code
+
+
+Create devices
+---------------
+
+Python API
+~~~~~~~~~~
+
+.. autofunction:: device.create_cuda_gpus
+
+.. autofunction:: device.create_cuda_gpus_on
+
+.. autofunction:: device.create_cuda_gpu_on
+
+.. autofunction:: device.get_default_device
+
+
+The following code shows how to create devices,
+
+.. code:: python
+
+   from singa import device
+   cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
+   host = device.get_default_device()  # get the default host device (a CppCPU)
+   ary1 = device.create_cuda_gpus(2)  # create 2 devices, starting from ID 0
+   ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
+
+
+
+CPP API
+~~~~~~~
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
new file mode 100644
index 0000000..b0b2af8
--- /dev/null
+++ b/doc/docs/examples.rst
@@ -0,0 +1,6 @@
+Examples
+========
+
+.. toctree::
+
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index a6a1b49..8a74976 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -2,15 +2,8 @@ English
 =======
 
 .. toctree::
-	overview
-        installation
-	quick-start
-        programming-guide
-        distributed-training
-        data
-        checkpoint
-        python
-        test
-        gpu
-        examples
 
+   installation
+   software_stack
+   device
+   examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/jp/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/jp/index.md b/doc/docs/jp/index.md
deleted file mode 100644
index 6679198..0000000
--- a/doc/docs/jp/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \u6700\u65b0\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8
-
----
-
-* [\u30a4\u30f3\u30c8\u30ed\u30c0\u30af\u30b7\u30e7\u30f3](overview.html)
-* [\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb](installation.html)
-* [\u30af\u30a4\u30c3\u30af\u30b9\u30bf\u30fc\u30c8](quick-start.html)
-* [\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0 \u30ac\u30a4\u30c9](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\u5206\u6563 \u30c8\u30ec\u30fc\u30cb\u30f3\u30b0](distributed-training.html)
-* [\u30c7\u30fc\u30bf\u306e\u6e96\u5099](data.html)
-* [Checkpoint \u3068 Resume](checkpoint.html)
-* [\u30d1\u30d5\u30a9\u30fc\u30de\u30f3\u30b9\u30c6\u30b9\u30c8 \u3068 \u7279\u5fb4\u62bd\u51fa](test.html)
-* [\u30b5\u30f3\u30d7\u30eb](examples.html)
-    * Feed-forward \u30e2\u30c7\u30eb
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/kr/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/kr/index.md b/doc/docs/kr/index.md
deleted file mode 100644
index 990d5d9..0000000
--- a/doc/docs/kr/index.md
+++ /dev/null
@@ -1,23 +0,0 @@
-# \ucd5c\uc2e0 \ubb38\uc11c
-
----
-
-* [\uac1c\uc694](overview.html)
-* [\uc778\uc2a4\ud1a8](installation.html)
-* [\ud035 \uc2a4\ud0c0\ud2b8](quick-start.html)
-* [\ud504\ub85c\uadf8\ub798\ubc0d \uac00\uc774\ub4dc](programming-guide.html)
-    * [NeuralNet](neural-net.html)
-        * [Layer](layer.html)
-        * [Param](param.html)
-    * [TrainOneBatch](train-one-batch.html)
-    * [Updater](updater.html)
-* [\ubd84\uc0b0 \ud2b8\ub808\uc774\ub2dd](distributed-training.html)
-* [\ub370\uc774\ud130 \uc900\ube44](data.html)
-* [Checkpoint \uc640 Resume](checkpoint.html)
-* [\uc131\ub2a5\ud14c\uc2a4\ud2b8 \ubc0f \ud2b9\uc9d5\ucd94\ucd9c](test.html)
-* [\uc0d8\ud50c](examples.html)
-    * Feed-forward \ubaa8\ub378
-        * [CNN](cnn.html)
-        * [MLP](mlp.html)
-    * [RBM + Auto-encoder](rbm.html)
-    * [RNN](rnn.html)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/software_stack.md
----------------------------------------------------------------------
diff --git a/doc/docs/software_stack.md b/doc/docs/software_stack.md
new file mode 100644
index 0000000..c60b6a5
--- /dev/null
+++ b/doc/docs/software_stack.md
@@ -0,0 +1,99 @@
+# Software Stack
+
+SINGA's software stack includes three major components, namely, core, IO and
+model. Figure 1 illustrates these components together with the hardware.
+The core component provides memory management and tensor operations;
+IO has classes for reading (and writing) data from (to) disk and network; The
+model component provides data structures and algorithms for machine learning models,
+e.g., layers for neural network models, optimizers/initializer/metric/loss for
+general machine learning models.
+
+
+<img src="../_static/images/singav1-sw.png" align="center" width="500px"/>
+<br/>
+<span><strong>Figure 1 - SINGA V1 software stack.</strong></span>
+
+## Core
+
+[Tensor](tensor.html) and [Device](device.html) are two core abstractions in SINGA. Tensor class represents a
+multi-dimensional array, which stores model variables and provides linear algebra
+operations for machine learning
+algorithms, including matrix multiplication and random functions. Each tensor
+instance (i.e. a tensor) is allocated on a Device instance.
+Each Device instance (i.e. a device) is created against one hardware device,
+e.g. a GPU card or a CPU core. Devices manage the memory of tensors and execute
+tensor operations on its execution units, e.g. CPU threads or CUDA streams.
+
+Depending on the hardware and the programming language, SINGA have implemented
+the following specific device classes:
+
+* **CudaGPU** represents an Nvidia GPU card. The execution units are the CUDA streams.
+* **CppCPU** represents a normal CPU. The execution units are the CPU threads.
+* **OpenclGPU** represents normal GPU card from both Nvidia and AMD.
+  The execution units are the CommandQueues. Given that OpenCL is compatible with
+  many hardware devices, e.g. FPGA and ARM, the OpenclGPU has the potential to be
+  extended for other devices.
+
+Different types of devices use different programming languages to write the kernel
+functions for tensor operations,
+
+* CppMath (tensor_math_cpp.h) implements the tensor operations using Cpp for CppCPU
+* CudaMath (tensor_math_cuda.h) implements the tensor operations using CUDA for CudaGPU
+* OpenclMath (tensor_math_opencl.h) implements the tensor operations using OpenCL for OpenclGPU
+
+In addition, different types of data, such as float32 and float16, could be supported by adding
+the corresponding tensor functions.
+
+Typically, users would create a device instance and pass it to create multiple
+tensor instances. When users call the Tensor functions, these function would invoke
+the corresponding implementation (CppMath/CudaMath/OpenclMath) automatically. In
+other words, the implementation of Tensor operations is transparent to users.
+
+Most machine learning algorithms could be expressed using (dense or sparse) tensors.
+Therefore, with the Tensor abstraction, SINGA would be able to run a wide range of models,
+including deep learning models and other traditional machine learning models.
+
+The Tensor and Device abstractions are extensible to support a wide range of hardware device
+using different programming languages. A new hardware device would be supported by
+adding a new Device subclass and the corresponding implementation of the Tensor
+operations (xxxMath).
+
+Optimizations in terms of speed and memory could be implemented by Device, which
+manages both operation execution and memory malloc/free. More optimization details
+would be described in the [Device page](device.html).
+
+
+## Model
+
+On top of the Tensor and Device abstractions, SINGA provides some higher level
+classes for machine learning modules.
+
+* [Layer](layer.html) and its subclasses are specific for neural networks. Every layer provides
+  functions for forward propagating features and backward propagating gradients w.r.t the training loss functions.
+  They wraps the complex layer operations so that users can easily create neural nets
+  by connecting a set of layers.
+
+* [Initializer](initializer.html) and its subclasses provide variant methods of initializing
+  model parameters (stored in Tensor instances), following Uniform, Gaussian, etc.
+
+* [Loss](loss.html) and its subclasses defines the training objective loss functions.
+  Both functions of computing the loss values and computing the gradient of the prediction w.r.t the
+  objective loss are implemented. Example loss functions include squared error and cross entropy.
+
+* [Metric](metric.html) and its subclasses provide the function to measure the
+  performance of the model, e.g., the accuracy.
+
+* [Optimizer](optimizer.html) and its subclasses implement the methods for updating
+  model parameter values using parameter gradients, including SGD, AdaGrad, RMSProp etc.
+
+
+## IO
+
+The IO module consists of classes for data loading, data preprocessing and message passing.
+
+* Reader and its subclasses load string records from disk files
+* Writer and its subclasses write string records to disk files
+* Encoder and its subclasses encode Tensor instances into string records
+* Decoder and its subclasses decodes string records into Tensor instances
+* Endpoint represents a communication endpoint which provides functions for passing messages to each other.
+* Message represents communication messages between Endpoint instances. It carries both meta data and payload.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/tensor.md
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.md b/doc/docs/tensor.md
new file mode 100644
index 0000000..eaf8362
--- /dev/null
+++ b/doc/docs/tensor.md
@@ -0,0 +1,7 @@
+# Tensor
+
+
+##
+
+
+##

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc822cd2/doc/docs/zh/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/zh/index.md b/doc/docs/zh/index.md
index c44a2cf..4b49d5f 100644
--- a/doc/docs/zh/index.md
+++ b/doc/docs/zh/index.md
@@ -1,7 +1,9 @@
 SINGA \u4e2d\u6587\u6587\u6863
----
+==============
 
-* [\u7b80\u4ecb](overview.html)
-* [\u5b89\u88c5](installation_source.html)
-* [\u4f7f\u7528\u6307\u5357](programming-guide.html)
+.. toctree::
+
+   overview
+   installation_source
+   programming-guide
 


[09/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0 - add python installation instruction, remove flask dependency

Posted by wa...@apache.org.
SINGA-237 New documentation files for SINGA v1.0
  - add python installation instruction, remove flask dependency


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/410f238a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/410f238a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/410f238a

Branch: refs/heads/dev
Commit: 410f238af4388c174e9a2725baf40153cacb0915
Parents: d3a57cf
Author: aaronwwf <dc...@gmail.com>
Authored: Sun Aug 14 23:51:03 2016 +0800
Committer: aaronwwf <dc...@gmail.com>
Committed: Mon Aug 15 16:21:23 2016 +0800

----------------------------------------------------------------------
 doc/docs/installation.md     |  73 +++++++++++-
 src/python/setup.py.in       |   5 +-
 src/python/singa/__init__.py | 240 --------------------------------------
 src/python/singa/command.py  | 240 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 313 insertions(+), 245 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/410f238a/doc/docs/installation.md
----------------------------------------------------------------------
diff --git a/doc/docs/installation.md b/doc/docs/installation.md
index 8ab617f..6bfdee3 100755
--- a/doc/docs/installation.md
+++ b/doc/docs/installation.md
@@ -3,7 +3,7 @@
 ## Dependencies
 
 ### Required
-* Google Protobuf (>=2.5)
+* Google Protobuf (>=2.5,<3)
 * BLAS (tested with OpenBLAS >=0.2.10)
 * CUDA (tested with 6.5, 7.0 and 7.5)
 * CUDNN (v4 and v5)
@@ -52,7 +52,7 @@ Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
     $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
     $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
 
-You can use `ccmake ..` to configure the compilation options including using
+You can use `cmake ..` to configure the compilation options including using
 LMDB, GLOG, etc.
 
 After compiling SINGA, you can run the unit tests by
@@ -65,5 +65,74 @@ tests, then you have successfully installed SINGA. Please proceed to try the exa
 
 ### MacOS
 
+Currently only Linux OS is officially support.
 
 ### Windows
+
+Currently only Linux OS is officially support.
+
+
+# Install SINGA Python Module
+
+SINGA provide a python binding for python programers. Users can either install from source or 
+from pre-built wheel file.
+
+## Install from source
+
+### Required
+* python(==2.7)   
+* pip(>=1.5)
+* SWIG(>=3.0)   
+* numpy(>=1.11.0)   
+* Google protobuf(>=2.5,<3)   
+
+
+### Configuration
+To build SINGA python package, users should turn on python building switch in cmake config file: "CMakeList.txt"
+
+    OPTION(USE_PYTHON "Generate py wrappers" ON)
+
+### Instructions
+Follow the instructions in the above sections to build SINGA from source,
+
+After that, execute the following commands:
+
+    # under the build directory
+    $ cd python
+    $ sudo pip install . 
+
+Then singa package should be installed in the corresponding python library. 
+
+## Pip Install from wheel 
+
+Install pip if it is not already installed:
+
+    $ sudo apt-get install python-pip python-dev
+
+Then, select the correct binary to install:
+
+    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.5
+    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5/singa-1.0.0-cp27-none-linux_x86_64.whl
+
+    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.6
+    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6/singa-1.0.0-cp27-none-linux_x86_64.whl
+
+    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.5, CUDA toolkit 7.5 and CuDNN v5
+    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
+   
+    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.6, CUDA toolkit 7.5 and CuDNN v5
+    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
+   
+Install SINGA:
+
+    $ sudo pip install --upgrade $SINGA_WHEEL_URL
+
+### build wheel file from source
+
+Users can build wheel file from source. After build SINGA, execute the following commands:
+
+    # under the build directory
+    $ cd python
+    $ python setup.py bdist_wheel
+
+Then users may get built wheel file under "dist" directory

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/410f238a/src/python/setup.py.in
----------------------------------------------------------------------
diff --git a/src/python/setup.py.in b/src/python/setup.py.in
index d1ac3c9..f2cd9f3 100644
--- a/src/python/setup.py.in
+++ b/src/python/setup.py.in
@@ -42,8 +42,7 @@ setup(
 
     install_requires=[
         'numpy>=1.11.0',
-        'protobuf>=2.5.0,<3',
-        'flask>=0.10.1'
+        'protobuf>=2.5.0,<3'
         ],
 
     #List additional groups of dependencies here (e.g. development
@@ -75,7 +74,7 @@ setup(
 
     entry_points={
         'console_scripts': [
-            'singa=singa:main',
+            'singa=singa.command:main',
         ],
     },
 )

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/410f238a/src/python/singa/__init__.py
----------------------------------------------------------------------
diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py
index f14c8c5..e69de29 100644
--- a/src/python/singa/__init__.py
+++ b/src/python/singa/__init__.py
@@ -1,240 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# =============================================================================
-
-'''
-This script is the main entrance for user to run singa inside a model workspace
-
-To use this script, user sudo install these dependencies: flask pillow and protobuf
-'''
-
-import sys, glob, os, random, shutil, time
-from flask import Flask, request, redirect, url_for
-import numpy as np
-import ConfigParser
-import urllib, traceback
-
-
-from argparse import ArgumentParser
-from argparse import RawDescriptionHelpFormatter
-sys.path.append(os.getcwd())
-
-__all__ = []
-__version__ = 0.1
-__date__ = '2016-07-20'
-__updated__ = '2016-07-20'
-__shortdesc__ = '''
-welcome to singa
-'''
-
-app = Flask(__name__)
-config = ConfigParser.RawConfigParser()
-service = {}
-data_path = "data_"
-parameter_path = "parameter_"
-
-debug = False
-
-class CLIError(Exception):
-    '''Generic exception to raise and log different fatal errors.'''
-    def __init__(self, msg):
-        super(CLIError).__init__(type(self))
-        self.msg = "E: %s" % msg
-    def __str__(self):
-        return self.msg
-    def __unicode__(self):
-        return self.msg
-
-def main(argv=None): # IGNORE:C0111
-    '''Command line options.'''
-
-    from . import device
-
-    if argv is None:
-        argv = sys.argv
-    else:
-        sys.argv.extend(argv)
-
-    program_name = os.path.basename(sys.argv[0])
-    program_version = "v%s" % __version__
-    program_build_date = str(__updated__)
-    program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
-    program_shortdesc = __shortdesc__
-    program_license = '''%s
-
-  Created by dbsystem group on %s.
-  Copyright 2016 NUS School of Computing. All rights reserved.
-
-  Licensed under the Apache License 2.0
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Distributed on an "AS IS" basis without warranties
-  or conditions of any kind, either express or implied.
-
-USAGE
-''' % (program_shortdesc, str(__date__))
-
-    global debug
-
-    try:
-        # Setup argument parser
-        parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
-        parser.add_argument("-p", "--port", dest="port", default=5000, help="the port to listen to, default is 5000")
-        parser.add_argument("-param", "--parameter", dest="parameter",  help="the parameter file path to be loaded")
-        parser.add_argument("-D", "--debug", dest="debug", action="store_true", help="whether need to debug")
-        parser.add_argument("-R", "--reload", dest="reload_data", action="store_true", help="whether need to reload data")
-        parser.add_argument("-C", "--cpu", dest="use_cpu", action="store_true", help="Using cpu or not, default is using gpu")
-        parser.add_argument("-m", "--mode", dest="mode", choices=['train','test','serve'], default='serve', help="On Which mode (train,test,serve) to run singa")
-        parser.add_argument('-V', '--version', action='version', version=program_version_message)
-
-        # Process arguments
-        args = parser.parse_args()
-
-        port = args.port
-        parameter_file = args.parameter
-        mode = args.mode
-        need_reload = args.reload_data
-        use_cpu = args.use_cpu
-        debug = args.debug
-
-        #prepare data files
-        config.read('file.cfg')
-        file_prepare(need_reload)
-
-
-        import network as net
-        model = net.create()
-
-        #load parameter
-        parameter_file=get_parameter(parameter_file)
-
-        if parameter_file:
-            print "load parameter file: %s" % parameter_file
-            model.load(parameter_file)
-
-        if use_cpu:
-            raise CLIError("Currently cpu is not support!")
-        else:
-            print "runing with gpu"
-            d = device.create_cuda_gpu()
-
-        model.to_device(d)
-
-        if mode == "serve":
-            print "runing singa in serve mode, listen to  port: %s " % port
-            global service
-            from serve import Service
-            service =Service(model,d)
-
-            app.debug = debug
-            app.run(host='0.0.0.0', port= port)
-        elif mode == "train":
-            print "runing singa in train mode"
-            global trainer
-            from train import Trainer
-            trainer= Trainer(model,d)
-            if not parameter_file:
-                trainer.initialize()
-            trainer.train()
-        else:
-            raise CLIError("Currently only serve mode is surpported!")
-        return 0
-    except KeyboardInterrupt:
-        ### handle keyboard interrupt ###
-        return 0
-    except Exception, e:
-        if debug:
-            traceback.print_exc()
-            raise(e)
-        indent = len(program_name) * " "
-        sys.stderr.write(program_name + ": " + str(e) + "\n")
-        sys.stderr.write(indent + "  for help use --help \n\n")
-        return 2
-
-def file_prepare(reload_data=False):
-    '''
-        download all files and generate data.py
-    '''
-    if not reload_data and os.path.exists("data_.py"):
-        return
-
-    print "download file"
-    #clean data
-    shutil.rmtree("data_.py",ignore_errors=True)
-    shutil.rmtree("data_",ignore_errors=True)
-
-    data_py=open("data_.py",'w')
-    data_py.write("#%s" % "This file is Generated by SINGA, please don't edit\n\n")
-    if config.has_section("data"):
-        file_list = config.items("data")
-        #download files
-        for f in file_list:
-            name,path=download_file(f[0],f[1],data_path)
-            data_py.write("%s=\"%s\"\n" % (name,path))
-
-    data_py.flush()
-    data_py.close()
-
-    if config.has_section("parameter"):
-        parameter_list = config.items("parameter")
-        for p in parameter_list:
-            download_file(p[0],p[1],parameter_path)
-
-def download_file(name,path,dest):
-    '''
-    download one file to dest
-    '''
-    if not os.path.exists(dest):
-        os.makedirs(dest)
-    if (path.startswith('http')):
-        file_name = path.split('/')[-1]
-        target = os.path.join(dest,file_name)
-        urllib.urlretrieve(path,target)
-    return name,target
-
-
-def get_parameter(file_name=None):
-    '''
-    get the paticular file name or get the last parameter file
-    '''
-    if not os.path.exists(parameter_path):
-        os.makedirs(parameter_path)
-        return
-
-    if file_name:
-	return os.path.join(parameter_path,file_name)
-
-    parameter_list = [ os.path.join(parameter_path,f) for f in os.listdir(parameter_path)]
-    if len(parameter_list)==0:
-        return
-    parameter_list.sort()
-
-    return parameter_list[-1]
-
-@app.route("/")
-def index():
-    return "Hello SINGA User!"
-
-@app.route('/predict', methods=['POST'])
-def predict():
-    if request.method == 'POST':
-        try:
-            response=service.serve(request)
-        except Exception as e:
-            return e
-        return response
-    return "error, should be post request"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/410f238a/src/python/singa/command.py
----------------------------------------------------------------------
diff --git a/src/python/singa/command.py b/src/python/singa/command.py
new file mode 100644
index 0000000..f14c8c5
--- /dev/null
+++ b/src/python/singa/command.py
@@ -0,0 +1,240 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# =============================================================================
+
+'''
+This script is the main entrance for user to run singa inside a model workspace
+
+To use this script, user sudo install these dependencies: flask pillow and protobuf
+'''
+
+import sys, glob, os, random, shutil, time
+from flask import Flask, request, redirect, url_for
+import numpy as np
+import ConfigParser
+import urllib, traceback
+
+
+from argparse import ArgumentParser
+from argparse import RawDescriptionHelpFormatter
+sys.path.append(os.getcwd())
+
+__all__ = []
+__version__ = 0.1
+__date__ = '2016-07-20'
+__updated__ = '2016-07-20'
+__shortdesc__ = '''
+welcome to singa
+'''
+
+app = Flask(__name__)
+config = ConfigParser.RawConfigParser()
+service = {}
+data_path = "data_"
+parameter_path = "parameter_"
+
+debug = False
+
+class CLIError(Exception):
+    '''Generic exception to raise and log different fatal errors.'''
+    def __init__(self, msg):
+        super(CLIError).__init__(type(self))
+        self.msg = "E: %s" % msg
+    def __str__(self):
+        return self.msg
+    def __unicode__(self):
+        return self.msg
+
+def main(argv=None): # IGNORE:C0111
+    '''Command line options.'''
+
+    from . import device
+
+    if argv is None:
+        argv = sys.argv
+    else:
+        sys.argv.extend(argv)
+
+    program_name = os.path.basename(sys.argv[0])
+    program_version = "v%s" % __version__
+    program_build_date = str(__updated__)
+    program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
+    program_shortdesc = __shortdesc__
+    program_license = '''%s
+
+  Created by dbsystem group on %s.
+  Copyright 2016 NUS School of Computing. All rights reserved.
+
+  Licensed under the Apache License 2.0
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Distributed on an "AS IS" basis without warranties
+  or conditions of any kind, either express or implied.
+
+USAGE
+''' % (program_shortdesc, str(__date__))
+
+    global debug
+
+    try:
+        # Setup argument parser
+        parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
+        parser.add_argument("-p", "--port", dest="port", default=5000, help="the port to listen to, default is 5000")
+        parser.add_argument("-param", "--parameter", dest="parameter",  help="the parameter file path to be loaded")
+        parser.add_argument("-D", "--debug", dest="debug", action="store_true", help="whether need to debug")
+        parser.add_argument("-R", "--reload", dest="reload_data", action="store_true", help="whether need to reload data")
+        parser.add_argument("-C", "--cpu", dest="use_cpu", action="store_true", help="Using cpu or not, default is using gpu")
+        parser.add_argument("-m", "--mode", dest="mode", choices=['train','test','serve'], default='serve', help="On Which mode (train,test,serve) to run singa")
+        parser.add_argument('-V', '--version', action='version', version=program_version_message)
+
+        # Process arguments
+        args = parser.parse_args()
+
+        port = args.port
+        parameter_file = args.parameter
+        mode = args.mode
+        need_reload = args.reload_data
+        use_cpu = args.use_cpu
+        debug = args.debug
+
+        #prepare data files
+        config.read('file.cfg')
+        file_prepare(need_reload)
+
+
+        import network as net
+        model = net.create()
+
+        #load parameter
+        parameter_file=get_parameter(parameter_file)
+
+        if parameter_file:
+            print "load parameter file: %s" % parameter_file
+            model.load(parameter_file)
+
+        if use_cpu:
+            raise CLIError("Currently cpu is not support!")
+        else:
+            print "runing with gpu"
+            d = device.create_cuda_gpu()
+
+        model.to_device(d)
+
+        if mode == "serve":
+            print "runing singa in serve mode, listen to  port: %s " % port
+            global service
+            from serve import Service
+            service =Service(model,d)
+
+            app.debug = debug
+            app.run(host='0.0.0.0', port= port)
+        elif mode == "train":
+            print "runing singa in train mode"
+            global trainer
+            from train import Trainer
+            trainer= Trainer(model,d)
+            if not parameter_file:
+                trainer.initialize()
+            trainer.train()
+        else:
+            raise CLIError("Currently only serve mode is surpported!")
+        return 0
+    except KeyboardInterrupt:
+        ### handle keyboard interrupt ###
+        return 0
+    except Exception, e:
+        if debug:
+            traceback.print_exc()
+            raise(e)
+        indent = len(program_name) * " "
+        sys.stderr.write(program_name + ": " + str(e) + "\n")
+        sys.stderr.write(indent + "  for help use --help \n\n")
+        return 2
+
+def file_prepare(reload_data=False):
+    '''
+        download all files and generate data.py
+    '''
+    if not reload_data and os.path.exists("data_.py"):
+        return
+
+    print "download file"
+    #clean data
+    shutil.rmtree("data_.py",ignore_errors=True)
+    shutil.rmtree("data_",ignore_errors=True)
+
+    data_py=open("data_.py",'w')
+    data_py.write("#%s" % "This file is Generated by SINGA, please don't edit\n\n")
+    if config.has_section("data"):
+        file_list = config.items("data")
+        #download files
+        for f in file_list:
+            name,path=download_file(f[0],f[1],data_path)
+            data_py.write("%s=\"%s\"\n" % (name,path))
+
+    data_py.flush()
+    data_py.close()
+
+    if config.has_section("parameter"):
+        parameter_list = config.items("parameter")
+        for p in parameter_list:
+            download_file(p[0],p[1],parameter_path)
+
+def download_file(name,path,dest):
+    '''
+    download one file to dest
+    '''
+    if not os.path.exists(dest):
+        os.makedirs(dest)
+    if (path.startswith('http')):
+        file_name = path.split('/')[-1]
+        target = os.path.join(dest,file_name)
+        urllib.urlretrieve(path,target)
+    return name,target
+
+
+def get_parameter(file_name=None):
+    '''
+    get the paticular file name or get the last parameter file
+    '''
+    if not os.path.exists(parameter_path):
+        os.makedirs(parameter_path)
+        return
+
+    if file_name:
+	return os.path.join(parameter_path,file_name)
+
+    parameter_list = [ os.path.join(parameter_path,f) for f in os.listdir(parameter_path)]
+    if len(parameter_list)==0:
+        return
+    parameter_list.sort()
+
+    return parameter_list[-1]
+
+@app.route("/")
+def index():
+    return "Hello SINGA User!"
+
+@app.route('/predict', methods=['POST'])
+def predict():
+    if request.method == 'POST':
+        try:
+            response=service.serve(request)
+        except Exception as e:
+            return e
+        return response
+    return "error, should be post request"


[02/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index 6e84a4f..2e60554 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -16,23 +16,63 @@
 # under the License.
 # =============================================================================
 """
-This script includes Tensor class and its methods for python users
-to call singa::Tensor and its methods
+Example usage::
+
+    from singa import tensor
+    from singa import device
+
+    # create a tensor with shape (2,3), default CppCPU device and float32
+    x = tensor.Tensor((2,3))
+    x.set_value(0.4)
+
+    # create a tensor from a numpy array
+    y = tensor.from_numpy((3,3), dtype=np.float32)
+    y.uniform(-1, 1)
+
+    z = mult(x, y)  # gemm -> z of shape (2, 3)
+
+    x += z # element-wise addition
+
+    dev = device.create_cuda_gpu()
+    x.to_device(dev)  # move the data to a gpu device
+
+    r = relu(x)
+
+    r.to_host()  # move the data back to host cpu
+    s = r.to_numpy()  # tensor -> numpy array, r must be on cpu
+
+
+There are two set of tensor functions,
+
+Tensor member functions
+    which would change the internal state of the Tensor instance.
+Tensor module functions
+    which accept Tensor instances as arguments and return Tensor instances.
+
+Every Tesor instance must be initialized before reading data from it.
 """
 
 import numpy as np
+from functools import reduce
 from .proto import core_pb2
 from . import singa_wrap as singa
-from functools import reduce
+import device
 
 
 class Tensor(object):
-    ''' Class and member functions for singa::Tensor
+    '''Create a Py Tensor, which wraps a swig converted Tensor from CPP Tensor
+
+    The three arguments are three attributes of the Tensor.
+
+    Args:
+        shape (list<int>): a list of integers for the tensor shape. If shape is
+            not specified, the created tensor is called a dummy tensor.
+        device: a swig converted Device instance using the device moduel . If it
+            is None, then the default host device would be used.
+        dtype: data type. currently, most operations only accept kFloat32.
     '''
 
     def __init__(self, shape=None, device=None, dtype=core_pb2.kFloat32):
-        ''' shape = (tuple)
-        '''
         if shape is None:
             # call constructor of singa::Tensor
             self.singa_tensor = singa.Tensor()
@@ -48,125 +88,230 @@ class Tensor(object):
             self.device = device
             self.dtype = dtype
 
-    def copy_from_numpy(self, np_array, offset=0):
-        ''' this method stores the values of numpy array into tensor data
-            from the position of offset
-        '''
-        assert np_array.size == self.size(), 'tensor shape should be the same'
-        if not np_array.ndim == 1:
-            np_array = np_array.flatten()
-        dt = np_array.dtype
-        if dt == np.float32:
-            self.singa_tensor.floatCopyDataFromHostPtr(np_array)
-        elif dt == np.int or dt == np.int32:
-            self.singa_tensor.intCopyDataFromHostPtr(np_array)
-        else:
-            print 'Not implemented yet for ', dt
-
-    # deprecated, access the member data_type directly
-    def data_type(self):
-        return self.singa_tensor.data_type()
-
-    # deprecated, access the member shape directly
-    def shape(self, axis=None):
-        if axis is None:
-            return self.singa_tensor.shape()
-        else:
-            return self.singa_tensor.shape(axis)
-
     def ndim(self):
+        '''
+        Returns:
+            the number of dimensions of the tensor.
+        '''
         return self.singa_tensor.nDim()
 
-    def is_transpose(self):  # TODO(wangwei) make transpose a member
+    def is_transpose(self):
+        '''
+        Returns:
+            True if the internal data is transposed; otherwise False.
+        '''
         return self.singa_tensor.transpose()
 
     def size(self):  # TODO(wangwei) compute size
+        '''
+        Returns:
+            the number of elements of the tensor.
+        '''
         return self.singa_tensor.Size()
 
     def memsize(self):
+        '''
+        Returns:
+            the number of Bytes allocated for this tensor.
+        '''
         return self.singa_tensor.MemSize()
 
     def reshape(self, shape):
+        '''Change the tensor shape.
+
+        Args:
+            shape (list<int>): new shape, which should have the same volumn as
+                the original shape.
+        '''
         assert product(self.shape) == product(shape), \
-               'product of shape should be equal'
+            'product of shape should be equal'
         self.shape = shape
-        self.singa_tensor.Reshape(_tuple_to_vector(shape))
+        self.singa_tensor.Reshape(list(shape))
 
     def reset_like(self, t):
+        '''Reset the shape, dtype and device as the given tensor.
+
+        Args:
+            t (Tensor)
+        '''
         self.singa_tensor.ResetLike(t.singa_tensor)
 
+    '''
     def as_type(self, dtype):
+        Change the data type.
+
+        Args:
+            dtype:
         self.singa_tensor.AsType(dtype)
+    '''
 
     def to_device(self, device):
+        '''Move the tensor data onto a given device.
+
+        Args:
+            device: a swig Device converted from CudaGPU or CppCPU or OpenclGPU
+        '''
         self.singa_tensor.ToDevice(device)
 
     def to_host(self):
+        '''Move the tensor data onto the default host CppCPU device.
+        '''
         self.singa_tensor.ToHost()
 
     def l2(self):
+        '''
+        Returns:
+            the L2 norm.
+        '''
         return self.singa_tensor.L2()
 
     def l1(self):
+        '''
+        Returns:
+            the L1 norm.
+        '''
         return self.singa_tensor.L1()
 
     def set_value(self, x):
+        '''Set all elements of the tensor to be the give value.
+
+        Args:
+            x (float), a float value to be set to all elements.
+        '''
         # assert type(x) == float, 'set value only accepts float input'
         # if isinstance(x, float):
         self.singa_tensor.floatSetValue(x)
 
+    def copy_from_numpy(self, np_array, offset=0):
+        ''' Copy the data from the numpy array.
+
+        Args:
+            np_array: source numpy array
+            offset (int): destination offset
+        '''
+        assert np_array.size == self.size(), 'tensor shape should be the same'
+        if not np_array.ndim == 1:
+            np_array = np_array.flatten()
+        dt = np_array.dtype
+        if dt == np.float32:
+            self.singa_tensor.floatCopyDataFromHostPtr(np_array)
+        elif dt == np.int or dt == np.int32:
+            self.singa_tensor.intCopyDataFromHostPtr(np_array)
+        else:
+            print 'Not implemented yet for ', dt
+
     def copy_data(self, t):
+        '''Copy data from other Tensor instance.
+
+        Args:
+            t (Tensor): source Tensor.
+        '''
+        assert type(t) == Tensor, 't must be a singa Tensor instance'
         self.singa_tensor.CopyData(t.singa_tensor)
 
     def clone(self):
-        ''' it does deep copy
-            call singa::Tensor::Clone()
+        '''
+        Returns:
+            a new Tensor which does deep copy of this tensor
         '''
         return _call_singa_func(self.singa_tensor.Clone)
 
-    def transpose(self):
-        ''' shallow copy, negate the transpose field
-            call singa::Tensor::T()
+    def T(self):
+        ''' shallow copy, negate the transpose field.
+
+        Returns:
+            a new Tensor which shares the underlying data memory (shallow copy)
+            but is marked as a transposed version of this tensor.
         '''
         return _call_singa_func(self.singa_tensor.T)
 
+    '''
     def copy(self):
-        ''' shallow copy
+        shallow copy
             call copy constructor of singa::Tensor
-        '''
         return _call_singa_func(singa.Tensor, self.singa_tensor)
+    '''
 
     def deepcopy(self):
-        ''' deep copy
-            call singa::Tensor::Clone()
+        '''Same as clone().
+
+        Returns:
+            a new Tensor
         '''
         return self.clone()
 
     def bernoulli(self, p):
+        '''Sample 0/1 for each element according to the given probability.
+
+        Args:
+            p (float): with probability p, each element is sample to 1.
+        '''
         singa.floatBernoulli(float(p), self.singa_tensor)
 
     def gaussian(self, mean, std):
+        '''Generate a value for each element following a Gaussian distribution.
+
+        Args:
+            mean (float): mean of the distribution
+            std (float): standard variance of the distribution
+        '''
         singa.floatGaussian(float(mean), float(std), self.singa_tensor)
 
     def uniform(self, low, high):
+        '''Generate a value for each element following a uniform distribution.
+
+        Args:
+            low (float): the lower bound
+            high (float): the hight bound
+        '''
         singa.floatUniform(float(low), float(high), self.singa_tensor)
 
     def add_column(self, v):
+        '''Add a tensor to each column of this tensor.
+
+        Args:
+            v (Tensor): a Tensor to be added as a column to this tensor.
+        '''
         singa.AddColumn(v.singa_tensor, self.singa_tensor)
 
     def add_row(self, v):
+        '''Add a tensor to each row of this tensor.
+
+        Args:
+            v (Tensor): a Tensor to be added as a row to this tensor.
+        '''
         singa.AddRow(v.singa_tensor, self.singa_tensor)
 
     def div_column(self, v):
+        '''Divide each column of this tensor by v.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the column of self.
+        '''
         singa.DivColumn(v.singa_tensor, self.singa_tensor)
 
     def div_row(self, v):
+        '''Divide each row of this tensor by v.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the row of self.
+        '''
         singa.DivRow(v.singa_tensor, self.singa_tensor)
 
     def mult_column(self, v):
+        '''Multiply each column of this tensor by v element-wisely.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the column of self.
+        '''
         singa.MultColumn(v.singa_tensor, self.singa_tensor)
 
     def mult_row(self, v):
+        '''Multiply each row of this tensor by v element-wisely.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the row of self.
+        '''
         singa.MultRow(v.singa_tensor, self.singa_tensor)
 
     '''
@@ -174,6 +319,11 @@ class Tensor(object):
     '''
 
     def __iadd__(self, x):
+        ''' inplace element-wise addition with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor += x.singa_tensor
         else:
@@ -181,6 +331,12 @@ class Tensor(object):
         return self
 
     def __isub__(self, x):
+        ''' inplace element-wise subtraction with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
+
         if isinstance(x, Tensor):
             self.singa_tensor -= x.singa_tensor
         else:
@@ -188,6 +344,11 @@ class Tensor(object):
         return self
 
     def __imul__(self, x):
+        ''' inplace element-wise multiplication with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor *= x.singa_tensor
         else:
@@ -195,6 +356,11 @@ class Tensor(object):
         return self
 
     def __idiv__(self, x):
+        ''' inplace element-wise division by a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor /= x.singa_tensor
         else:
@@ -272,29 +438,72 @@ def product(shape):
 
 
 def sizeof(dtype):
+    '''
+    Returns:
+        the number of bytes of the given SINGA data type defined in core.proto
+    '''
     return singa.SizeOf(dtype)
 
 
 def reshape(t, s):
+    '''Reshape the input tensor with the given shape.
+
+    Args:
+        t (Tensor): the tensor to be changed
+        s (list<int>): the new shape, which should have the same volumn as the
+            old shape.
+
+    Returns:
+        the new Tensor
+    '''
     return _call_singa_func(singa.Reshape, t.singa_tensor, s)
 
 
 def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0):
+    '''Copy the data between two Tensor instances which could be on different
+    devices.
+
+    Args:
+        dst (Tensor): destination Tensor
+        src (Tensor): source Tensor
+        size (int) : number of elements to copy
+        dst_offset (int): offset in terms of elements to the start of dst
+        src_offset (int): offset in terms of elements to the start of src
+    '''
     singa.CopyDataToFrom(dst.singa_tensor, src.singa_tensor, size,
                          dst_offset, src_offset)
 
 
 def from_numpy(np_array):
+    '''Create a Tensor instance with the shape, dtype and values from the numpy
+    array.
+
+    Args:
+        np_array: the numpy array.
+
+    Returns:
+        A Tensor instance allocated on the default CppCPU device.
+    '''
     ret = Tensor(np_array.shape)
     ret.copy_from_numpy(np_array)
     return ret
 
 
 def to_numpy(t):
-    ''' this method gets the values of tensor data and
-        returns it as numpy array
-        TODO(wangwei) clone t to host
+    '''Convert the tensor into a numpy array.
+
+    Since numpy array is allocated on CPU devices, the input Tensor instance
+    must be on the default CppCPU device.
+
+    Args:
+        t (Tensor), a Tensor on the default CppCPU device.
+
+    Returns:
+        a numpy array
     '''
+    assert t.device == device.get_default_device() or t.device is None, \
+        'Please move the tensor onto the default host device'
+
     if t.dtype == core_pb2.kFloat32:
         np_array = t.singa_tensor.floatGetValue(int(t.size()))
     elif t.dtype == core_pb2.kInt:
@@ -305,34 +514,96 @@ def to_numpy(t):
 
 
 def abs(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = abs(x), x is an element of t
+    '''
     return _call_singa_func(singa.Abs, t.singa_tensor)
 
 
 def exp(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = exp(x), x is an element of t
+    '''
     return _call_singa_func(singa.Exp, t.singa_tensor)
 
 
 def log(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = log(x), x is an element of t
+    '''
     return _call_singa_func(singa.Log, t.singa_tensor)
 
 
 def relu(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = x if x >0; otherwise 0; x is an element
+        of t
+    '''
     return _call_singa_func(singa.ReLU, t.singa_tensor)
 
 
 def sigmoid(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = sigmoid(x); x is an element of t
+    '''
     return _call_singa_func(singa.Sigmoid, t.singa_tensor)
 
 
 def square(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = x * x, x is an element of t
+    '''
     return _call_singa_func(singa.Square, t.singa_tensor)
 
 
 def tanh(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = tanh(x), x is an element of t
+    '''
     return _call_singa_func(singa.Tanh, t.singa_tensor)
 
 
 def sum(t, axis=None):
+    '''Sum elements of the input tensor long the given axis.
+
+    Args:
+        t (Tensor): input Tensor
+        axis (int, optional): if None, the summation is done over all elements;
+            if axis is provided, then it is calculated along the given axis,
+            e.g. 0 -- sum each column; 1 -- sum each row.
+
+    Returns:
+        a float value as the sum of all elements, or a new Tensor
+    '''
+
     if axis is None:
         return singa.floatSum(t.singa_tensor)
     else:
@@ -340,6 +611,17 @@ def sum(t, axis=None):
 
 
 def pow(t, x, out=None):
+    '''
+    Args:
+        t (Tensor): input tensor
+        x (float or Tensor): y[i] = t[i]^x if x is a float value; otherwise,
+            y[i]= t[i]^x[i] if x is a tensor.
+        out (None or Tensor): if None, a new Tensor would be constructed to
+            store the result; otherwise, the result is put into out.
+
+    Returns:
+        the result tensor.
+    '''
     if out is None:
         if isinstance(x, Tensor):
             return _call_singa_func(singa.Pow, t.singa_tensor, x.singa_tensor)
@@ -353,7 +635,17 @@ def pow(t, x, out=None):
         return out
 
 
-def average(t, axis=0):
+def average(t, axis=None):
+    '''
+    Args:
+        t (Tensor): input Tensor
+        axis (int, optional): if None, average all elements; otherwise average
+            along the given dimension. 0 for averaging each column; 1 for
+            averaging each row.
+
+    Returns:
+        a float value if axis is None; otherwise, a new Tensor for the result.
+    '''
     if t.ndim() > 1:
         return _call_singa_func(singa.Average, t.singa_tensor, axis)
     else:
@@ -361,6 +653,15 @@ def average(t, axis=0):
 
 
 def softmax(t, out=None):
+    '''Apply SoftMax for each row of the Tensor.
+
+    Args:
+        t (Tensor): the input 1d or 2d tensor
+        out (Tensor, optional): if not None, it is used to store the result
+
+    Returns:
+        the result Tensor
+    '''
     if out is None:
         return _call_singa_func(singa.SoftMax, t.singa_tensor)
     else:
@@ -369,22 +670,73 @@ def softmax(t, out=None):
 
 
 def lt(t, x):
+    '''Elementi-wise comparison for t < x
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] < x ? 1.0f:0.0f,
+        or t[i] < x[i] ? 1.0f:0.0f
+    '''
     return t < x
 
 
 def le(t, x):
+    '''Elementi-wise comparison for t <= x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] <= x ? 1.0f:0.0f,
+        or t[i] <= x[i] ? 1.0f:0.0f
+    '''
     return t <= x
 
 
 def gt(t, x):
+    '''Elementi-wise comparison for t > x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] > x ? 1.0f:0.0f,
+        or t[i] > x[i] ? 1.0f:0.0f
+    '''
     return t > x
 
 
 def ge(t, x):
+    '''Elementi-wise comparison for t >= x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] >= x ? 1.0f:0.0f,
+        or t[i] >= x[i] ? 1.0f:0.0f
+    '''
     return t >= x
 
 
 def add(lhs, rhs, ret=None):
+    '''Elementi-wise addition.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__add__()
         return lhs + rhs
@@ -397,6 +749,17 @@ def add(lhs, rhs, ret=None):
 
 
 def sub(lhs, rhs, ret=None):
+    '''Elementi-wise subtraction.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__sub__()
         return lhs - rhs
@@ -409,6 +772,18 @@ def sub(lhs, rhs, ret=None):
 
 
 def eltwise_mult(lhs, rhs, ret=None):
+    '''Elementi-wise multiplication.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
+
     if ret is None:
         # call Tensor.__mul__()
         return lhs * rhs
@@ -423,8 +798,21 @@ def eltwise_mult(lhs, rhs, ret=None):
 
 
 def mult(A, B, C=None, alpha=1.0, beta=0.0):
-    '''
+    '''Do matrix-matrix or matrix-vector multiplication.
+
     This function returns C = alpha * A * B + beta * C
+
+    Args:
+        A (Tensor): 2d Tensor
+        B (Tensor): If B is a 1d Tensor, GEMV would be invoked for matrix-vector
+            multiplication; otherwise GEMM would be invoked.
+        C (Tensor, optional): for storing the result; If None, a new Tensor
+            would be created.
+        alpha (float)
+        beta (float)
+
+    Returns:
+        the result Tensor
     '''
     if C is None:
         return _call_singa_func(singa.Mult, A.singa_tensor, B.singa_tensor)
@@ -435,6 +823,17 @@ def mult(A, B, C=None, alpha=1.0, beta=0.0):
 
 
 def div(lhs, rhs, ret=None):
+    '''Elementi-wise division.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__div__()
         return lhs / rhs
@@ -447,51 +846,125 @@ def div(lhs, rhs, ret=None):
 
 
 def axpy(alpha, x, y):
-    if isinstance(alpha, float):
-        singa.floatAxpy(alpha, x.singa_tensor, y.singa_tensor)
+    '''Element-wise operation for y += alpha * x.
+
+    Args:
+        alpha (float)
+        x (Tensor)
+        y (Tensor)
+
+    Returns:
+        y
+    '''
+    singa.floatAxpy(float(alpha), x.singa_tensor, y.singa_tensor)
     return y
 
 
 def bernoulli(p, t):
-    if isinstance(p, float):
-        singa.floatBernoulli(p, t.singa_tensor)
+    '''Generate a binary value for each element of t.
+
+    Args:
+        p (float): each element is 1 with probability p; and 0 with 1 - p
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatBernoulli(float(p), t.singa_tensor)
     return t
 
 
 def gaussian(mean, std, t):
-    if isinstance(mean, float):
-        singa.floatGaussian(mean, std, t.singa_tensor)
+    '''Generate values following a Gaussian distribution.
+
+    Args:
+        mean (float): the mean of the Gaussian distribution.
+        std (float): the standard variance of the Gaussian distribution.
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatGaussian(float(mean), float(std), t.singa_tensor)
     return t
 
 
 def uniform(low, high, t):
-    if isinstance(low, float):
-        singa.floatUniform(low, high, t.singa_tensor)
+    '''Generate values following a Uniform distribution.
+
+    Args:
+        low (float): the lower bound
+        hight (float): the higher bound
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatUniform(float(low), float(high), t.singa_tensor)
     return t
 
 
 def add_column(alpha, v, beta, M):
-    singa.floatAddColumn(alpha, beta, v.singa_tensor, M.singa_tensor)
+    '''Add v to each column of M.
+
+    Denote each column of M as m, m = alpha * v + beta * m
+
+    Args:
+        alpha (float)
+        v (Tensor)
+        beta (float)
+        M (Tensor): 2d tensor
+    Returns:
+        M
+    '''
+    singa.floatAddColumn(float(alpha), float(beta), v.singa_tensor,
+                         M.singa_tensor)
     return M
 
 
 def add_row(alpha, v, beta, M):
+    '''Add v to each row of M.
+
+    Denote each row of M as m, m = alpha * v + beta * m
+
+    Args:
+        alpha (float)
+        v (Tensor)
+        beta (float)
+        M (Tensor): 2d tensor
+    Returns:
+        M
+    '''
     singa.floatAddRow(alpha, beta, v.singa_tensor, M.singa_tensor)
     return M
 
 
 def sum_columns(M):
+    '''Sum all columns into a single column.
+
+    Args:
+        M (Tensor): the input 2d tensor.
+
+    Returns:
+        a new Tensor as the resulted column.
+    '''
     assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
-    nb_col = M.shape(0)
-    ret = Tensor((nb_col, 1))
+    ret = Tensor((M.shape[0], 1))
     singa.SumColumns(M.singa_tensor, ret.singa_tensor)
     return ret
 
 
 def sum_rows(M):
+    '''Sum all rows into a single row.
+
+    Args:
+        M (Tensor): the input 2d tensor.
+
+    Returns:
+        a new Tensor as the resulted row.
+    '''
     assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
-    nb_row = M.shape(1)
-    ret = Tensor((1, nb_row))
+    ret = Tensor((1, M.shape[1]))
     singa.SumRows(M.singa_tensor, ret.singa_tensor)
     return ret
 
@@ -500,15 +973,6 @@ def sum_rows(M):
 '''
 
 
-def _tuple_to_vector(tshape):
-    ''' this function converts tuple to std::vector<int>
-    '''
-    vs = singa.Shape(len(tshape))
-    for i in range(len(tshape)):
-        vs[i] = tshape[i]
-    return vs
-
-
 def _call_singa_func(_singa_func, *args):
     ''' this function calls singa global functions that returns Tensor
         and create new python Tensor instance
@@ -516,7 +980,7 @@ def _call_singa_func(_singa_func, *args):
     '''
     new_t = Tensor()
     new_t.singa_tensor = _singa_func(*args)
-    new_t.shape = new_t.singa_tensor.shape()
+    new_t.shape = tuple(new_t.singa_tensor.shape())
     new_t.device = new_t.singa_tensor.device()
     new_t.dtype = new_t.singa_tensor.data_type()
     return new_t


[21/22] incubator-singa git commit: Merge branch commits for doc layout change and commits for other doc and code changes

Posted by wa...@apache.org.
Merge branch commits for doc layout change and commits for other doc and code changes

Conflicts:
	doc/Makefile
	doc/docs.rst
	doc/docs/index.rst
	doc/docs/zh/index.md
	doc/en/conf.py
	examples/index.rst
	src/python/singa/device.py
	src/python/singa/tensor.py


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/72d736a6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/72d736a6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/72d736a6

Branch: refs/heads/dev
Commit: 72d736a6ef665e6bea4e60f6d577ae3a38306e8c
Parents: c2173b3 31ae6bd
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 21:10:29 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 21:10:29 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                          | 200 +----------------
 doc/_static/style.css                 |   3 +
 doc/_templates/layout.html            |  58 -----
 doc/build.sh                          |  33 ---
 doc/community/issue-tracking.md       |   9 -
 doc/community/mail-lists.rst          |  10 -
 doc/community/source-repository.md    |  22 --
 doc/community/team-list.rst           |  64 ------
 doc/conf.py                           | 340 -----------------------------
 doc/develop/contribute-code.md        |  60 -----
 doc/develop/contribute-docs.md        |  28 ---
 doc/develop/how-contribute.md         |  11 -
 doc/develop/schedule.rst              |  40 ----
 doc/docs.rst                          |   6 -
 doc/docs/cnn.md                       | 141 ------------
 doc/docs/device.rst                   |  36 ---
 doc/docs/index.rst                    |  15 --
 doc/docs/initializer.rst              |  12 -
 doc/docs/installation.md              | 226 -------------------
 doc/docs/layer.rst                    |  14 --
 doc/docs/loss.rst                     |   7 -
 doc/docs/metric.rst                   |   8 -
 doc/docs/neural-net.md                | 327 ---------------------------
 doc/docs/optimizer.rst                |  11 -
 doc/docs/overview.rst                 |  99 ---------
 doc/docs/software_stack.md            |  99 ---------
 doc/docs/tensor.rst                   |  30 ---
 doc/docs/utils.rst                    |   6 -
 doc/docs/zh/index.md                  |   9 -
 doc/downloads.md                      |  67 ------
 doc/en/_templates/layout.html         |  61 ++++++
 doc/en/community/issue-tracking.md    |   9 +
 doc/en/community/mail-lists.rst       |  10 +
 doc/en/community/source-repository.md |  22 ++
 doc/en/community/team-list.rst        |  64 ++++++
 doc/en/conf.py                        | 339 ++++++++++++++++++++++++++++
 doc/en/develop/contribute-code.md     |  60 +++++
 doc/en/develop/contribute-docs.md     |  28 +++
 doc/en/develop/how-contribute.md      |  11 +
 doc/en/develop/schedule.rst           |  40 ++++
 doc/en/docs.rst                       |   6 +
 doc/en/docs/cnn.md                    | 141 ++++++++++++
 doc/en/docs/device.rst                |  38 ++++
 doc/en/docs/index.rst                 |  10 +
 doc/en/docs/installation.md           | 226 +++++++++++++++++++
 doc/en/docs/neural-net.md             | 327 +++++++++++++++++++++++++++
 doc/en/docs/overview.rst              |  99 +++++++++
 doc/en/docs/software_stack.md         |  99 +++++++++
 doc/en/docs/tensor.rst                |  54 +++++
 doc/en/downloads.md                   |  67 ++++++
 doc/en/index.rst                      | 109 +++++++++
 doc/index.rst                         | 109 ---------
 doc/make.bat                          | 281 ------------------------
 doc/zh/_templates/layout.html         |  61 ++++++
 doc/zh/conf.py                        | 339 ++++++++++++++++++++++++++++
 doc/zh/index.md                       |   9 +
 src/python/singa/device.py            |   1 +
 57 files changed, 2240 insertions(+), 2371 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/72d736a6/doc/Makefile
----------------------------------------------------------------------
diff --cc doc/Makefile
index c6eddf1,436a661..b5282b7
--- a/doc/Makefile
+++ b/doc/Makefile
@@@ -50,8 -26,9 +26,14 @@@ clean
  
  .PHONY: html
  html:
++<<<<<<< HEAD
 +	cp -rf ../examples docs/
 +	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
++=======
+ 	cp -rf ../examples en/docs/
+ 	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) en $(BUILDDIR)/html
+ 	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) zh $(BUILDDIR)/html/zh
++>>>>>>> v1doc
  	@echo
  	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
  

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/72d736a6/doc/en/conf.py
----------------------------------------------------------------------
diff --cc doc/en/conf.py
index 0000000,332a0d1..36080d9
mode 000000,100755..100755
--- a/doc/en/conf.py
+++ b/doc/en/conf.py
@@@ -1,0 -1,339 +1,339 @@@
 -# -*- coding: utf-8 -*-
 -#
 -# incubator-singa documentation build configuration file, created by
 -# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
 -#
 -# This file is execfile()d with the current directory set to its
 -# containing dir.
 -#
 -# Note that not all possible configuration values are present in this
 -# autogenerated file.
 -#
 -# All configuration values have a default; values that are commented out
 -# serve to show the default.
 -
 -# If extensions (or modules to document with autodoc) are in another directory,
 -# add these directories to sys.path here. If the directory is relative to the
 -# documentation root, use os.path.abspath to make it absolute, like shown here.
 -#
 -import os
 -import sys
 -sys.path.insert(0, os.path.abspath('.'))
 -sys.path.insert(1, os.path.abspath('../build/python'))
 -
 -# -- General configuration ------------------------------------------------
 -from recommonmark.parser import CommonMarkParser
 -
 -source_parsers = {
 -    '.md': CommonMarkParser,
 -}
 -
 -# If your documentation needs a minimal Sphinx version, state it here.
 -#
 -# needs_sphinx = '1.0'
 -
 -# Add any Sphinx extension module names here, as strings. They can be
 -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 -# ones.
 -extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
 -napoleon_google_docstring = True
 -
 -# Add any paths that contain templates here, relative to this directory.
 -templates_path = ['_templates']
 -
 -# The suffix(es) of source filenames.
 -# You can specify multiple suffix as a list of string:
 -#
 -# source_suffix = ['.rst', '.md']
 -source_suffix = ['.rst', '.md']
 -
 -# The encoding of source files.
 -#
 -source_encoding = 'utf-8-sig'
 -
 -# The master toctree document.
 -master_doc = 'index'
 -
 -# General information about the project.
 -project = u'incubator-singa'
 -copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
 -author = u'moaz'
 -
 -# The version info for the project you're documenting, acts as replacement for
 -# |version| and |release|, also used in various other places throughout the
 -# built documents.
 -#
 -# The short X.Y version.
 -version = u'1.0.0'
 -# The full version, including alpha/beta/rc tags.
 -release = u'1.0.0'
 -
 -# The language for content autogenerated by Sphinx. Refer to documentation
 -# for a list of supported languages.
 -#
 -# This is also used if you do content translation via gettext catalogs.
 -# Usually you set "language" from the command line for these cases.
 -language = None
 -
 -# There are two options for replacing |today|: either, you set today to some
 -# non-false value, then it is used:
 -#
 -# today = ''
 -#
 -# Else, today_fmt is used as the format for a strftime call.
 -#
 -# today_fmt = '%B %d, %Y'
 -
 -# List of patterns, relative to source directory, that match files and
 -# directories to ignore when looking for source files.
 -# This patterns also effect to html_static_path and html_extra_path
 -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 -
 -# The reST default role (used for this markup: `text`) to use for all
 -# documents.
 -#
 -# default_role = None
 -
 -# If true, '()' will be appended to :func: etc. cross-reference text.
 -#
 -# add_function_parentheses = True
 -
 -# If true, the current module name will be prepended to all description
 -# unit titles (such as .. function::).
 -#
 -# add_module_names = True
 -
 -# If true, sectionauthor and moduleauthor directives will be shown in the
 -# output. They are ignored by default.
 -#
 -# show_authors = False
 -
 -# The name of the Pygments (syntax highlighting) style to use.
 -pygments_style = 'sphinx'
 -
 -# A list of ignored prefixes for module index sorting.
 -# modindex_common_prefix = []
 -
 -# If true, keep warnings as "system message" paragraphs in the built documents.
 -# keep_warnings = False
 -
 -# If true, `todo` and `todoList` produce output, else they produce nothing.
 -todo_include_todos = False
 -
 -
 -# -- Options for HTML output ----------------------------------------------
 -
 -# The theme to use for HTML and HTML Help pages.  See the documentation for
 -# a list of builtin themes.
 -#
 -html_theme = 'sphinx_rtd_theme'
 -
 -# Theme options are theme-specific and customize the look and feel of a theme
 -# further.  For a list of options available for each theme, see the
 -# documentation.
 -#
 -# html_theme_options = {}
 -
 -# Add any paths that contain custom themes here, relative to this directory.
 -# html_theme_path = []
 -
 -# The name for this set of Sphinx documents.
 -# "<project> v<release> documentation" by default.
 -#
 -# html_title = u'Singa v1.0.0'
 -
 -# A shorter title for the navigation bar.  Default is the same as html_title.
 -#
 -# html_short_title = None
 -
 -# The name of an image file (relative to this directory) to place at the top
 -# of the sidebar.
 -#
 -html_logo = 'image/singa.png'
 -
 -# The name of an image file (relative to this directory) to use as a favicon of
 -# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 -# pixels large.
 -#
 -# html_favicon = None
 -
 -# Add any paths that contain custom static files (such as style sheets) here,
 -# relative to this directory. They are copied after the builtin static files,
 -# so a file named "default.css" will overwrite the builtin "default.css".
 -html_static_path = ['../_static']
 -
 -# Add any extra paths that contain custom files (such as robots.txt or
 -# .htaccess) here, relative to this directory. These files are copied
 -# directly to the root of the documentation.
 -#
 -# html_extra_path = []
 -
 -# If not None, a 'Last updated on:' timestamp is inserted at every page
 -# bottom, using the given strftime format.
 -# The empty string is equivalent to '%b %d, %Y'.
 -#
 -# html_last_updated_fmt = None
 -
 -# If true, SmartyPants will be used to convert quotes and dashes to
 -# typographically correct entities.
 -#
 -# html_use_smartypants = True
 -
 -# Custom sidebar templates, maps document names to template names.
 -#
 -# html_sidebars = {}
 -
 -# Additional templates that should be rendered to pages, maps page names to
 -# template names.
 -#
 -# html_additional_pages = {}
 -
 -# If false, no module index is generated.
 -#
 -# html_domain_indices = True
 -
 -# If false, no index is generated.
 -#
 -# html_use_index = True
 -
 -# If true, the index is split into individual pages for each letter.
 -#
 -# html_split_index = False
 -
 -# If true, links to the reST sources are added to the pages.
 -#
 -html_show_sourcelink = False
 -
 -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
 -#
 -# html_show_sphinx = True
 -
 -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
 -#
 -# html_show_copyright = True
 -
 -# If true, an OpenSearch description file will be output, and all pages will
 -# contain a <link> tag referring to it.  The value of this option must be the
 -# base URL from which the finished HTML is served.
 -#
 -# html_use_opensearch = ''
 -
 -# This is the file name suffix for HTML files (e.g. ".xhtml").
 -# html_file_suffix = None
 -
 -# Language to be used for generating the HTML full-text search index.
 -# Sphinx supports the following languages:
 -#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
 -#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
 -#
 -# html_search_language = 'en'
 -
 -# A dictionary with options for the search language support, empty by default.
 -# 'ja' uses this config value.
 -# 'zh' user can custom change `jieba` dictionary path.
 -#
 -# html_search_options = {'type': 'default'}
 -
 -# The name of a javascript file (relative to the configuration directory) that
 -# implements a search results scorer. If empty, the default will be used.
 -#
 -# html_search_scorer = 'scorer.js'
 -
 -# Output file base name for HTML help builder.
 -htmlhelp_basename = 'Singadoc'
 -
 -# -- Options for LaTeX output ---------------------------------------------
 -
 -latex_elements = {
 -     # The paper size ('letterpaper' or 'a4paper').
 -     #
 -     # 'papersize': 'letterpaper',
 -
 -     # The font size ('10pt', '11pt' or '12pt').
 -     #
 -     # 'pointsize': '10pt',
 -
 -     # Additional stuff for the LaTeX preamble.
 -     #
 -     # 'preamble': '',
 -
 -     # Latex figure (float) alignment
 -     #
 -     # 'figure_align': 'htbp',
 -}
 -
 -# Grouping the document tree into LaTeX files. List of tuples
 -# (source start file, target name, title,
 -#  author, documentclass [howto, manual, or own class]).
 -latex_documents = [
 -    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
 -     u'moaz', 'manual'),
 -]
 -
 -# The name of an image file (relative to this directory) to place at the top of
 -# the title page.
 -#
 -# latex_logo = None
 -
 -# For "manual" documents, if this is true, then toplevel headings are parts,
 -# not chapters.
 -#
 -# latex_use_parts = False
 -
 -# If true, show page references after internal links.
 -#
 -# latex_show_pagerefs = False
 -
 -# If true, show URL addresses after external links.
 -#
 -# latex_show_urls = False
 -
 -# Documents to append as an appendix to all manuals.
 -#
 -# latex_appendices = []
 -
 -# If false, no module index is generated.
 -#
 -# latex_domain_indices = True
 -
 -
 -# -- Options for manual page output ---------------------------------------
 -
 -# One entry per manual page. List of tuples
 -# (source start file, name, description, authors, manual section).
 -man_pages = [
 -    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
 -     [author], 1)
 -]
 -
 -# If true, show URL addresses after external links.
 -#
 -# man_show_urls = False
 -
 -
 -# -- Options for Texinfo output -------------------------------------------
 -
 -# Grouping the document tree into Texinfo files. List of tuples
 -# (source start file, target name, title, author,
 -#  dir menu entry, description, category)
 -texinfo_documents = [
 -    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
 -     author, 'incubator-singa', 'One line description of project.',
 -     'Miscellaneous'),
 -]
 -
 -# Documents to append as an appendix to all manuals.
 -#
 -# texinfo_appendices = []
 -
 -# If false, no module index is generated.
 -#
 -# texinfo_domain_indices = True
 -
 -# How to display URL addresses: 'footnote', 'no', or 'inline'.
 -#
 -# texinfo_show_urls = 'footnote'
 -
 -# If true, do not generate a @detailmenu in the "Top" node's menu.
 -#
 -# texinfo_no_detailmenu = False
++# -*- coding: utf-8 -*-
++#
++# incubator-singa documentation build configuration file, created by
++# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
++#
++# This file is execfile()d with the current directory set to its
++# containing dir.
++#
++# Note that not all possible configuration values are present in this
++# autogenerated file.
++#
++# All configuration values have a default; values that are commented out
++# serve to show the default.
++
++# If extensions (or modules to document with autodoc) are in another directory,
++# add these directories to sys.path here. If the directory is relative to the
++# documentation root, use os.path.abspath to make it absolute, like shown here.
++#
++import os
++import sys
++sys.path.insert(0, os.path.abspath('.'))
++sys.path.insert(1, os.path.abspath('../build/python'))
++
++# -- General configuration ------------------------------------------------
++from recommonmark.parser import CommonMarkParser
++
++source_parsers = {
++    '.md': CommonMarkParser,
++}
++
++# If your documentation needs a minimal Sphinx version, state it here.
++#
++# needs_sphinx = '1.0'
++
++# Add any Sphinx extension module names here, as strings. They can be
++# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
++# ones.
++extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
++napoleon_google_docstring = True
++
++# Add any paths that contain templates here, relative to this directory.
++templates_path = ['_templates']
++
++# The suffix(es) of source filenames.
++# You can specify multiple suffix as a list of string:
++#
++# source_suffix = ['.rst', '.md']
++source_suffix = ['.rst', '.md']
++
++# The encoding of source files.
++#
++source_encoding = 'utf-8-sig'
++
++# The master toctree document.
++master_doc = 'index'
++
++# General information about the project.
++project = u'incubator-singa'
++copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
++author = u'moaz'
++
++# The version info for the project you're documenting, acts as replacement for
++# |version| and |release|, also used in various other places throughout the
++# built documents.
++#
++# The short X.Y version.
++version = u'1.0.0'
++# The full version, including alpha/beta/rc tags.
++release = u'1.0.0'
++
++# The language for content autogenerated by Sphinx. Refer to documentation
++# for a list of supported languages.
++#
++# This is also used if you do content translation via gettext catalogs.
++# Usually you set "language" from the command line for these cases.
++language = None
++
++# There are two options for replacing |today|: either, you set today to some
++# non-false value, then it is used:
++#
++# today = ''
++#
++# Else, today_fmt is used as the format for a strftime call.
++#
++# today_fmt = '%B %d, %Y'
++
++# List of patterns, relative to source directory, that match files and
++# directories to ignore when looking for source files.
++# This patterns also effect to html_static_path and html_extra_path
++exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
++
++# The reST default role (used for this markup: `text`) to use for all
++# documents.
++#
++# default_role = None
++
++# If true, '()' will be appended to :func: etc. cross-reference text.
++#
++# add_function_parentheses = True
++
++# If true, the current module name will be prepended to all description
++# unit titles (such as .. function::).
++#
++# add_module_names = True
++
++# If true, sectionauthor and moduleauthor directives will be shown in the
++# output. They are ignored by default.
++#
++# show_authors = False
++
++# The name of the Pygments (syntax highlighting) style to use.
++pygments_style = 'sphinx'
++
++# A list of ignored prefixes for module index sorting.
++# modindex_common_prefix = []
++
++# If true, keep warnings as "system message" paragraphs in the built documents.
++# keep_warnings = False
++
++# If true, `todo` and `todoList` produce output, else they produce nothing.
++todo_include_todos = False
++
++
++# -- Options for HTML output ----------------------------------------------
++
++# The theme to use for HTML and HTML Help pages.  See the documentation for
++# a list of builtin themes.
++#
++html_theme = 'sphinx_rtd_theme'
++
++# Theme options are theme-specific and customize the look and feel of a theme
++# further.  For a list of options available for each theme, see the
++# documentation.
++#
++# html_theme_options = {}
++
++# Add any paths that contain custom themes here, relative to this directory.
++# html_theme_path = []
++
++# The name for this set of Sphinx documents.
++# "<project> v<release> documentation" by default.
++#
++# html_title = u'Singa v1.0.0'
++
++# A shorter title for the navigation bar.  Default is the same as html_title.
++#
++# html_short_title = None
++
++# The name of an image file (relative to this directory) to place at the top
++# of the sidebar.
++#
++html_logo = 'image/singa.png'
++
++# The name of an image file (relative to this directory) to use as a favicon of
++# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
++# pixels large.
++#
++# html_favicon = None
++
++# Add any paths that contain custom static files (such as style sheets) here,
++# relative to this directory. They are copied after the builtin static files,
++# so a file named "default.css" will overwrite the builtin "default.css".
++html_static_path = ['../_static']
++
++# Add any extra paths that contain custom files (such as robots.txt or
++# .htaccess) here, relative to this directory. These files are copied
++# directly to the root of the documentation.
++#
++# html_extra_path = []
++
++# If not None, a 'Last updated on:' timestamp is inserted at every page
++# bottom, using the given strftime format.
++# The empty string is equivalent to '%b %d, %Y'.
++#
++# html_last_updated_fmt = None
++
++# If true, SmartyPants will be used to convert quotes and dashes to
++# typographically correct entities.
++#
++# html_use_smartypants = True
++
++# Custom sidebar templates, maps document names to template names.
++#
++# html_sidebars = {}
++
++# Additional templates that should be rendered to pages, maps page names to
++# template names.
++#
++# html_additional_pages = {}
++
++# If false, no module index is generated.
++#
++# html_domain_indices = True
++
++# If false, no index is generated.
++#
++# html_use_index = True
++
++# If true, the index is split into individual pages for each letter.
++#
++# html_split_index = False
++
++# If true, links to the reST sources are added to the pages.
++#
++html_show_sourcelink = False
++
++# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
++#
++# html_show_sphinx = True
++
++# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
++#
++# html_show_copyright = True
++
++# If true, an OpenSearch description file will be output, and all pages will
++# contain a <link> tag referring to it.  The value of this option must be the
++# base URL from which the finished HTML is served.
++#
++# html_use_opensearch = ''
++
++# This is the file name suffix for HTML files (e.g. ".xhtml").
++# html_file_suffix = None
++
++# Language to be used for generating the HTML full-text search index.
++# Sphinx supports the following languages:
++#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
++#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
++#
++# html_search_language = 'en'
++
++# A dictionary with options for the search language support, empty by default.
++# 'ja' uses this config value.
++# 'zh' user can custom change `jieba` dictionary path.
++#
++# html_search_options = {'type': 'default'}
++
++# The name of a javascript file (relative to the configuration directory) that
++# implements a search results scorer. If empty, the default will be used.
++#
++# html_search_scorer = 'scorer.js'
++
++# Output file base name for HTML help builder.
++htmlhelp_basename = 'Singadoc'
++
++# -- Options for LaTeX output ---------------------------------------------
++
++latex_elements = {
++     # The paper size ('letterpaper' or 'a4paper').
++     #
++     # 'papersize': 'letterpaper',
++
++     # The font size ('10pt', '11pt' or '12pt').
++     #
++     # 'pointsize': '10pt',
++
++     # Additional stuff for the LaTeX preamble.
++     #
++     # 'preamble': '',
++
++     # Latex figure (float) alignment
++     #
++     # 'figure_align': 'htbp',
++}
++
++# Grouping the document tree into LaTeX files. List of tuples
++# (source start file, target name, title,
++#  author, documentclass [howto, manual, or own class]).
++latex_documents = [
++    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
++     u'moaz', 'manual'),
++]
++
++# The name of an image file (relative to this directory) to place at the top of
++# the title page.
++#
++# latex_logo = None
++
++# For "manual" documents, if this is true, then toplevel headings are parts,
++# not chapters.
++#
++# latex_use_parts = False
++
++# If true, show page references after internal links.
++#
++# latex_show_pagerefs = False
++
++# If true, show URL addresses after external links.
++#
++# latex_show_urls = False
++
++# Documents to append as an appendix to all manuals.
++#
++# latex_appendices = []
++
++# If false, no module index is generated.
++#
++# latex_domain_indices = True
++
++
++# -- Options for manual page output ---------------------------------------
++
++# One entry per manual page. List of tuples
++# (source start file, name, description, authors, manual section).
++man_pages = [
++    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
++     [author], 1)
++]
++
++# If true, show URL addresses after external links.
++#
++# man_show_urls = False
++
++
++# -- Options for Texinfo output -------------------------------------------
++
++# Grouping the document tree into Texinfo files. List of tuples
++# (source start file, target name, title, author,
++#  dir menu entry, description, category)
++texinfo_documents = [
++    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
++     author, 'incubator-singa', 'One line description of project.',
++     'Miscellaneous'),
++]
++
++# Documents to append as an appendix to all manuals.
++#
++# texinfo_appendices = []
++
++# If false, no module index is generated.
++#
++# texinfo_domain_indices = True
++
++# How to display URL addresses: 'footnote', 'no', or 'inline'.
++#
++# texinfo_show_urls = 'footnote'
++
++# If true, do not generate a @detailmenu in the "Top" node's menu.
++#
++# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/72d736a6/doc/en/docs/installation.md
----------------------------------------------------------------------
diff --cc doc/en/docs/installation.md
index 0000000,8ab617f..5d3c8a2
mode 000000,100755..100755
--- a/doc/en/docs/installation.md
+++ b/doc/en/docs/installation.md
@@@ -1,0 -1,69 +1,226 @@@
 -# Building SINGA from source
 -
 -## Dependencies
 -
 -### Required
 -* Google Protobuf (>=2.5)
 -* BLAS (tested with OpenBLAS >=0.2.10)
 -* CUDA (tested with 6.5, 7.0 and 7.5)
 -* CUDNN (v4 and v5)
 -* cmake (>=2.6)
 -
 -Users must install the above mandatory libraries.
 -Currently CUDA and CUDNN are also mandatory, but it would become optional later.
 -
 -### Optional
 -* Glog
 -* OpenCV (tested with 2.4.8)
 -* LMDB (tested with 0.9)
 -
 -
 -## Instructions
 -
 -Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
 -
 -
 -    $ git clone https://github.com/apache/incubator-singa.git
 -    $ cd incubator-singa/
 -    # switch to dev branch
 -    $ git checkout dev
 -
 -
 -If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
 -which could be downloaded as
 -
 -    $ git submodule init
 -    $ git submodule update
 -
 -
 -### Linux OS
 -
 -GCC (>=4.8.1) is required to compile SINGA on Linux OS.
 -In SINGA_ROOT, execute the following commands for compiling SINGA,
 -
 -    $ mkdir build && cd build
 -    # generate Makefile for compilation
 -    $ cmake ..
 -    # compile SINGA
 -    $ make
 -
 -Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
 -
 -    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
 -    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
 -
 -You can use `ccmake ..` to configure the compilation options including using
 -LMDB, GLOG, etc.
 -
 -After compiling SINGA, you can run the unit tests by
 -
 -    $ ./bin/test_singa
 -
 -You can see all the testing cases with testing results. If SINGA passes all
 -tests, then you have successfully installed SINGA. Please proceed to try the examples!
 -
 -
 -### MacOS
 -
 -
 -### Windows
++# Installation
++
++## Dependencies
++
++### Required
++* google protobuf (>=2.5,<3)
++* blas (tested with openblas >=0.2.10)
++* cmake (>=2.6)
++
++
++### Optional
++* glog
++* opencv (tested with 2.4.8)
++* lmdb (tested with 0.9)
++* cuda (tested with 6.5, 7.0 and 7.5)
++* cudnn (v4 and v5)
++
++PySINGA has additional dependencies
++
++* python(==2.7)
++* pip(>=1.5)
++* swig(>=3.0)
++* numpy(>=1.11.0)
++* openblas (>=0.2.10)
++
++Users are encouraged to install the cuda and [cudnn](https://developer.nvidia.com/cudnn) for running SINGA on GPUs to
++get better performance.
++Most of the dependent libraries could be installed via package mangers like
++apt-get or homebrew.
++
++    # for ubuntu users, tested on 14.04
++    sudo apt-get install libprotobuf-dev libopencv-dev protobuf-compiler libgoogle-glog-dev liblmdb-dev, python2.7-dev, python-pip, python-numpy
++
++    # for Mac OS users
++    brew install -vd glog lmdb
++    brew tap homebrew/science
++    brew install opencv
++    brew install openblas
++    brew tap homebrew/python
++    brew install python
++    brew install numpy  --with-openblas
++
++
++## Install PySINGA
++
++### From wheel
++
++After installing the dependencies for SINGA and PySINGA, please download the correct binary:
++
++    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.6
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.5, CUDA toolkit 7.5 and CuDNN v5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.6, CUDA toolkit 7.5 and CuDNN v5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++Then, run the following command
++
++    $ sudo pip install --upgrade $SINGA_WHEEL_URL
++
++If you do not have sudo right, you can run `pip install` in a python virtual environment.
++
++
++### From source
++
++Please compile SINGA from source (see the next section) with the 'USE_PYTHON' option on,
++and then run the following commands,
++
++    # under the build directory
++    $ cd python
++    $ sudo pip install .
++
++If you are using a virtual environment, you can ignore the `sudo` keyword.
++
++Developers can build the wheel file via
++
++    # under the build directory
++    $ cd python
++    $ python setup.py bdist_wheel
++
++
++The generated wheel file is under "dist" directory
++
++
++## Build SINGA from source
++
++Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
++
++    $ git clone https://github.com/apache/incubator-singa.git
++    $ cd incubator-singa/
++
++If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
++which could be downloaded as
++
++    $ git submodule init
++    $ git submodule update
++
++
++### Linux & MacOS
++
++GCC (>=4.8.1) is required to compile SINGA on Linux.
++For Mac OS users, you can use either GCC or Clang.
++
++In SINGA_ROOT, execute the following commands for compiling SINGA,
++
++    $ mkdir build && cd build
++    $ cmake ..
++    $ make
++    $ make install
++
++Note that if you are using CUDNN and it is not installed under system default
++folder, you need to let cmake know the paths to CUDNN,
++
++    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
++    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
++
++You can use `ccmake ..` to configure the compilation options including using
++generating python binding and changing the installation folder.
++If the dependent libraries are not in the system default paths, you need to export
++the following environment variables
++
++    export CMAKE_INCLUDE_PATH=<path to your header file folder>
++    export CMAKE_LIBRARY_PATH=<path to your lib file folder>
++
++After compiling SINGA, you can run the unit tests by
++
++    $ ./bin/test_singa
++
++You can see all the testing cases with testing results. If SINGA passes all
++tests, then you have successfully installed SINGA. Please proceed to try the examples!
++
++
++### Windows
++To be added.
++
++
++## FAQ
++
++* Q: Error from running `cmake ..`, which cannot find the dependent libraries.
++
++    A: If you haven't installed the libraries, please install them. If you installed
++    the libraries in a folder that is outside of the system folder, e.g. /usr/local,
++    please export the following variables
++
++        export CMAKE_INCLUDE_PATH=<path to your header file folder>
++        export CMAKE_LIBRARY_PATH=<path to your lib file folder>
++
++
++* Q: Error from `make`, e.g. the linking phase
++
++    A: If your libraries are in other folders than system default paths, you need
++    to export the following varaibles
++
++    $ export LIBRARY_PATH=<path to your lib file folder>
++    $ export LD_LIBRARY_PATH=<path to your lib file folder>
++
++
++* Q: Error from header files, e.g. 'cblas.h no such file or directory exists'
++
++    A: You need to include the folder of the cblas.h into CPLUS_INCLUDE_PATH,
++    e.g.,
++
++        $ export CPLUS_INCLUDE_PATH=/opt/OpenBLAS/include:$CPLUS_INCLUDE_PATH
++
++* Q:While compiling SINGA, I get error `SSE2 instruction set not enabled`
++
++    A:You can try following command:
++
++        $ make CFLAGS='-msse2' CXXFLAGS='-msse2'
++
++* Q:I get `ImportError: cannot import name enum_type_wrapper` from google.protobuf.internal when I try to import .py files.
++
++    A: You need to install the python binding of protobuf, which could be installed via
++
++        $ sudo apt-get install protobuf
++
++    or from source
++
++        $ cd /PROTOBUF/SOURCE/FOLDER
++        $ cd python
++        $ python setup.py build
++        $ python setup.py install
++
++* Q: When I build OpenBLAS from source, I am told that I need a Fortran compiler.
++
++    A: You can compile OpenBLAS by
++
++        $ make ONLY_CBLAS=1
++
++    or install it using
++
++        $ sudo apt-get install libopenblas-dev
++
++* Q: When I build protocol buffer, it reports that GLIBC++_3.4.20 not found in /usr/lib64/libstdc++.so.6.
++
++    A9: This means the linker found libstdc++.so.6 but that library
++    belongs to an older version of GCC than was used to compile and link the
++    program. The program depends on code defined in
++    the newer libstdc++ that belongs to the newer version of GCC, so the linker
++    must be told how to find the newer libstdc++ shared library.
++    The simplest way to fix this is to find the correct libstdc++ and export it to
++    LD_LIBRARY_PATH. For example, if GLIBC++_3.4.20 is listed in the output of the
++    following command,
++
++        $ strings /usr/local/lib64/libstdc++.so.6|grep GLIBC++
++
++    then you just set your environment variable as
++
++        $ export LD_LIBRARY_PATH=/usr/local/lib64:$LD_LIBRARY_PATH
++
++* Q: When I build glog, it reports that "src/logging_unittest.cc:83:20: error: \u2018gflags\u2019 is not a namespace-name"
++
++    A: It maybe that you have installed gflags with a different namespace such as "google". so glog can't find 'gflags' namespace.
++    Because it is not necessary to have gflags to build glog. So you can change the configure.ac file to ignore gflags.
++
++        1. cd to glog src directory
++        2. change line 125 of configure.ac  to "AC_CHECK_LIB(gflags, main, ac_cv_have_libgflags=0, ac_cv_have_libgflags=0)"
++        3. autoreconf
++
++    After this, you can build glog again.
++

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/72d736a6/src/python/singa/device.py
----------------------------------------------------------------------
diff --cc src/python/singa/device.py
index 65824c2,eff6783..2d93823
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@@ -114,9 -114,6 +114,10 @@@ def create_cuda_gpu_on(device_id)
      return devices[0]
  
  
 +default_device = singa.Platform.GetDefaultDevice()
 +
 +
  def get_default_device():
      '''Get the default host device which is a CppCPU device'''
 -    return singa.Platform.GetDefaultDevice()
 +    return default_device
++



[20/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.
SINGA-237 New documentation files for SINGA v1.0

update the layer identifier. if the implementation is transparent to devices,
then it has an extra identifier 'singa' besides the speicific identifiers,
i.e., 'singacpp', 'singacl' and 'singacuda'


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c2173b30
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c2173b30
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c2173b30

Branch: refs/heads/dev
Commit: c2173b3097a6f38ff2a44f48cc250219ad41b8d4
Parents: 5d20d35
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 20:46:24 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 21:04:32 2016 +0800

----------------------------------------------------------------------
 src/model/layer/convolution.cc |  1 -
 src/model/layer/pooling.cc     |  1 -
 src/python/singa/layer.py      | 16 ++++++++++------
 3 files changed, 10 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c2173b30/src/model/layer/convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/convolution.cc b/src/model/layer/convolution.cc
index 0d1751d..52e9d93 100644
--- a/src/model/layer/convolution.cc
+++ b/src/model/layer/convolution.cc
@@ -23,7 +23,6 @@
 namespace singa {
 using std::vector;
 
-RegisterLayerClass(singa_convolution, Convolution);
 RegisterLayerClass(singacpp_convolution, Convolution);
 void Convolution::Setup(const Shape &in_sample, const LayerConf &conf) {
   Layer::Setup(in_sample, conf);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c2173b30/src/model/layer/pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/pooling.cc b/src/model/layer/pooling.cc
index 23969da..a18f9de 100644
--- a/src/model/layer/pooling.cc
+++ b/src/model/layer/pooling.cc
@@ -20,7 +20,6 @@
 #include "singa/model/layer.h"
 namespace singa {
 
-RegisterLayerClass(singa_pooling, Pooling);
 RegisterLayerClass(singacpp_pooling, Pooling);
 void Pooling::Setup(const Shape& in_sample, const LayerConf& conf) {
   Layer::Setup(in_sample, conf);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c2173b30/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index a9f3826..86ba836 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -56,7 +56,8 @@ For example, CudnnConvolution layer is identified by 'cudnn_convolution';
 Some layers' implementation use only Tensor functions, thererfore they are
 transparent to the underlying devices. For threse layers, they would have
 multiple identifiers, e.g., singacpp_dropout, singacuda_dropout and
-singacl_dropout are all for the Dropout layer.
+singacl_dropout are all for the Dropout layer. In addition, it has an extra
+identifier 'singa', i.e. 'singa_dropout' also stands for the Dropout layer.
 
 engine is case insensitive. Each python layer would create the correct specific
 layer using the engine attribute.
@@ -439,7 +440,8 @@ class BatchNormalization(Layer):
         self.param_specs.append(_construct_param_specs_from_dict(beta_specs))
         self.param_specs.append(_construct_param_specs_from_dict(mean_specs))
         self.param_specs.append(_construct_param_specs_from_dict(var_specs))
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
+        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda',
+                               'singacl'])
         self.layer = _create_layer(engine, 'BatchNorm')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -466,7 +468,8 @@ class LRN(Layer):
         # TODO(wangwei) enable mode = 'within_channel'
         assert mode == 'cross_channel', 'only support mode="across_channel"'
         conf.norm_region = model_pb2.LRNConf.ACROSS_CHANNELS
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
+        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda',
+                               'singacl'])
         self.layer = _create_layer(engine, 'LRN')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -555,7 +558,8 @@ class Dropout(Layer):
         # 'cudnn' works for v>=5.0
         #  if engine.lower() == 'cudnn':
         #      engine = 'cuda'
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacuda', 'singacl'])
+        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda',
+                               'singacl'])
         self.layer = _create_layer(engine, 'Dropout')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -590,7 +594,8 @@ class Softmax(Layer):
         super(Softmax, self).__init__(name)
         # conf = self.conf.softmax_conf
         # conf.axis = axis
-        _check_engine(engine, ['cudnn', 'singacpp', 'singacl', 'singacuda'])
+        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacl',
+                               'singacuda'])
         self.layer = _create_layer(engine, 'Softmax')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
@@ -820,7 +825,6 @@ def _construct_param_specs_from_dict(specs):
         a ParamSpec object
     """
     conf = model_pb2.ParamSpec()
-    print 'convert', specs
     if 'name' in specs:
         conf.name = specs['name']
     if 'lr_mult' in specs:


[10/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.
SINGA-237 New documentation files for SINGA v1.0

Update installation.md.
Update flags in MacOS.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/30731ee4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/30731ee4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/30731ee4

Branch: refs/heads/dev
Commit: 30731ee412f85c80f2d8ce81671eaa7a84d53524
Parents: d3a57cf
Author: xiezl <xi...@comp.nus.edu.sg>
Authored: Mon Aug 15 16:42:25 2016 +0800
Committer: xiezl <xi...@comp.nus.edu.sg>
Committed: Mon Aug 15 16:42:25 2016 +0800

----------------------------------------------------------------------
 doc/docs/installation.md | 20 +++++++++++---------
 test/CMakeLists.txt      |  5 ++++-
 2 files changed, 15 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/30731ee4/doc/docs/installation.md
----------------------------------------------------------------------
diff --git a/doc/docs/installation.md b/doc/docs/installation.md
index 8ab617f..4cf4ea7 100755
--- a/doc/docs/installation.md
+++ b/doc/docs/installation.md
@@ -36,24 +36,29 @@ which could be downloaded as
     $ git submodule update
 
 
-### Linux OS
+### Linux & MacOS 
+
+GCC (>=4.8.1) is required to compile SINGA on Linux.
+You can use gcc compiler to do the installation on MacOS following the
+steps in Linux installation. 
+In addition, you can also install singa via clang compiler following the
+commands in this section.
 
-GCC (>=4.8.1) is required to compile SINGA on Linux OS.
 In SINGA_ROOT, execute the following commands for compiling SINGA,
 
     $ mkdir build && cd build
-    # generate Makefile for compilation
     $ cmake ..
-    # compile SINGA
     $ make
 
-Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
+Note that if you are using CUDNN and it is not installed under system default
+folder, you need to let cmake know the paths to CUDNN,
 
     $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
     $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
 
 You can use `ccmake ..` to configure the compilation options including using
-LMDB, GLOG, etc.
+LMDB, GLOG, etc. In addition, you can set the proper search paths for the
+dependent libraries.
 
 After compiling SINGA, you can run the unit tests by
 
@@ -63,7 +68,4 @@ You can see all the testing cases with testing results. If SINGA passes all
 tests, then you have successfully installed SINGA. Please proceed to try the examples!
 
 
-### MacOS
-
-
 ### Windows

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/30731ee4/test/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index f196928..6e7dd84 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -23,5 +23,8 @@ ADD_DEPENDENCIES(test_singa singa_core singa_utils)
 #MESSAGE(STATUS "link libs" ${singa_linker_libs})
 TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model
     singa_io proto protobuf ${SINGA_LINKER_LIBS})
-SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread ")
+IF(UNIX AND (NOT APPLE))
+    LIST(APPEND LINK_FLAGS "-pthread")
+ENDIF()
+SET_TARGET_PROPERTIES(test_singa PROPERTIES LINK_FLAGS "${LINK_FLAGS}")
 


[07/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.
SINGA-237 New documentation files for SINGA v1.0

Updated the comments of python files for autodoc to generate python APIs by Sphinx.

Fixed a bug in optimizer which ignored the momentum value


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/33992c90
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/33992c90
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/33992c90

Branch: refs/heads/dev
Commit: 33992c90191021451c9286ad28ad6140b80a9bd9
Parents: bc822cd
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Fri Aug 12 14:45:41 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Sun Aug 14 23:46:17 2016 +0800

----------------------------------------------------------------------
 cmake/Dependencies.cmake        |   5 +-
 doc/conf.py                     |  14 +-
 doc/docs/device.rst             |  29 +-
 doc/docs/index.rst              |   6 +
 doc/docs/initializer.rst        |  12 +
 doc/docs/layer.rst              |  14 +
 doc/docs/loss.rst               |   7 +
 doc/docs/metric.rst             |   8 +
 doc/docs/optimizer.rst          |  11 +
 doc/docs/tensor.md              |   7 -
 doc/docs/tensor.rst             |  30 ++
 doc/docs/utils.rst              |   6 +
 doc/index.rst                   |  28 +-
 examples/index.rst              |   6 +
 src/python/singa/device.py      |  31 ++
 src/python/singa/initializer.py |  86 ++++-
 src/python/singa/layer.py       | 417 ++++++++++++++----------
 src/python/singa/loss.py        | 105 +++++-
 src/python/singa/metric.py      |  49 ++-
 src/python/singa/optimizer.py   | 286 ++++++++--------
 src/python/singa/tensor.py      | 608 ++++++++++++++++++++++++++++++-----
 21 files changed, 1332 insertions(+), 433 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/cmake/Dependencies.cmake
----------------------------------------------------------------------
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index eb729db..c03c81e 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -61,12 +61,13 @@ IF(USE_OPENCV)
     MESSAGE(STATUS "Found OpenCV_${OpenCV_VERSION} at ${OpenCV_INCLUDE_DIRS}")
     INCLUDE_DIRECTORIES(SYSTEM ${OpenCV_INCLUDE_DIRS})
     LIST(APPEND SINGA_LINKER_LIBS ${OpenCV_LIBRARIES})
-ENDIF()    
+ENDIF()
 
 #LIST(APPEND SINGA_LINKER_LIBS "/home/wangwei/local/lib/libopenblas.so")
 #MESSAGE(STATUS "link lib : " ${SINGA_LINKER_LIBS})
 
 IF(USE_PYTHON)
-    FIND_PACKAGE(PythonLibs REQUIRED)
+    FIND_PACKAGE(PythonLibs 2.7 REQUIRED)
+    FIND_PACKAGE(PythonInterp 2.7 REQUIRED)
     FIND_PACKAGE(SWIG 3.0 REQUIRED)
 ENDIF()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 20ba51a..9f52d16 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -19,7 +19,8 @@
 import os
 import sys
 sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, '../src/python/singa/')
+sys.path.insert(1, os.path.abspath('../build/python'))
+#autodoc_mock_imports = ['singa.device', 'singa.tensor', 'singa.layer']
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser
@@ -35,9 +36,8 @@ source_parsers = {
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = [
-'sphinx.ext.autodoc'
-]
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -50,7 +50,7 @@ source_suffix = ['.rst', '.md']
 
 # The encoding of source files.
 #
-# source_encoding = 'utf-8-sig'
+source_encoding = 'utf-8-sig'
 
 # The master toctree document.
 master_doc = 'index'
@@ -150,7 +150,7 @@ html_theme = 'sphinx_rtd_theme'
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 #
-html_logo = '/singa.png'
+html_logo = 'image/singa.png'
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
@@ -203,7 +203,7 @@ html_static_path = ['_static']
 
 # If true, links to the reST sources are added to the pages.
 #
-html_show_sourcelink = False
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
 #

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
index aa5defb..53faf48 100644
--- a/doc/docs/device.rst
+++ b/doc/docs/device.rst
@@ -2,7 +2,10 @@ Device
 =======
 
 
-The Device abstract represent a hardware device with memory and compuation units.
+The Device abstract represents any hardware device with memory and compuation units.
+All [Tensor operations](tensor.html) are scheduled by the resident device for execution.
+Tensor memory is also managed by the device's memory manager. Therefore, optimization
+of memory and execution are implemented in the Device class.
 
 Specific devices
 ----------------
@@ -13,24 +16,14 @@ Currently, SINGA has three Device implmentations,
 3. OpenclGPU for a GPU card which runs OpenCL code
 
 
-Create devices
----------------
-
 Python API
-~~~~~~~~~~
-
-.. autofunction:: device.create_cuda_gpus
-
-.. autofunction:: device.create_cuda_gpus_on
-
-.. autofunction:: device.create_cuda_gpu_on
-
-.. autofunction:: device.get_default_device
+----------
 
+.. automodule:: singa.device
+   :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
 
-The following code shows how to create devices,
 
-.. code:: python
+The following code provides examples of creating devices::
 
    from singa import device
    cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
@@ -39,9 +32,5 @@ The following code shows how to create devices,
    ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
 
 
-
 CPP API
-~~~~~~~
-
-
-
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index 8a74976..2294054 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -6,4 +6,10 @@ English
    installation
    software_stack
    device
+   tensor
+   layer
+   initializer
+   loss
+   metric
+   optimizer
    examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/initializer.rst b/doc/docs/initializer.rst
new file mode 100644
index 0000000..a190702
--- /dev/null
+++ b/doc/docs/initializer.rst
@@ -0,0 +1,12 @@
+Initializer
+===========
+
+Python API
+----------
+
+.. automodule:: singa.initializer
+   :members:
+   :member-order: bysource
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/layer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/layer.rst b/doc/docs/layer.rst
new file mode 100644
index 0000000..62ef3c3
--- /dev/null
+++ b/doc/docs/layer.rst
@@ -0,0 +1,14 @@
+Layer
+======
+
+Python API
+-----------
+.. automodule:: singa.layer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/loss.rst
----------------------------------------------------------------------
diff --git a/doc/docs/loss.rst b/doc/docs/loss.rst
new file mode 100644
index 0000000..27872dd
--- /dev/null
+++ b/doc/docs/loss.rst
@@ -0,0 +1,7 @@
+Loss
+=========
+
+
+.. automodule:: singa.loss
+   :members:
+   :show-inheritance:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/metric.rst
----------------------------------------------------------------------
diff --git a/doc/docs/metric.rst b/doc/docs/metric.rst
new file mode 100644
index 0000000..35fa24e
--- /dev/null
+++ b/doc/docs/metric.rst
@@ -0,0 +1,8 @@
+Metric
+=========
+
+
+.. automodule:: singa.metric
+   :members:
+   :show-inheritance:
+   :member-order: bysource

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/optimizer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/optimizer.rst b/doc/docs/optimizer.rst
new file mode 100644
index 0000000..486c01e
--- /dev/null
+++ b/doc/docs/optimizer.rst
@@ -0,0 +1,11 @@
+Optimizer
+=========
+
+
+.. automodule:: singa.optimizer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/tensor.md
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.md b/doc/docs/tensor.md
deleted file mode 100644
index eaf8362..0000000
--- a/doc/docs/tensor.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Tensor
-
-
-##
-
-
-##

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.rst b/doc/docs/tensor.rst
new file mode 100644
index 0000000..ff6142e
--- /dev/null
+++ b/doc/docs/tensor.rst
@@ -0,0 +1,30 @@
+Tensor
+========
+
+Each Tensor instance is a multi-dimensional array allocated on a specific
+Device instance. Tensor instances store variables and provide
+linear algebra operations over different types of hardware devices without user
+awareness. Note that users need to make sure the tensor operands are
+allocated on the same device except copy functions.
+
+
+Tensor implementation
+---------------------
+
+SINGA has three different sets of implmentations of Tensor functions, one for each
+type of Device.
+
+* 'tensor_math_cpp.h' implements operations using Cpp (with CBLAS) for CppGPU devices.
+* 'tensor_math_cuda.h' implements operations using Cuda (with cuBLAS) for CudaGPU devices.
+* 'tensor_math_opencl.h' implements operations using OpenCL for OpenclGPU devices.
+
+Python API
+----------
+
+
+.. automodule:: singa.tensor
+   :members:
+
+
+CPP API
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/docs/utils.rst
----------------------------------------------------------------------
diff --git a/doc/docs/utils.rst b/doc/docs/utils.rst
new file mode 100644
index 0000000..5306719
--- /dev/null
+++ b/doc/docs/utils.rst
@@ -0,0 +1,6 @@
+Misc.
+=========
+
+
+.. automodule:: singa.utils
+   :members:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/doc/index.rst
----------------------------------------------------------------------
diff --git a/doc/index.rst b/doc/index.rst
index ec727b1..50c65d7 100755
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -7,9 +7,9 @@ Welcome to Apache Singa
 =======================
 
 Recent News
-===========
+-----------
 
-* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_ 
+* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
 
 * The **second release** is now available, 14 Jan, 2016. `Download SINGA v0.2.0 <downloads.html>`_.
 
@@ -34,7 +34,7 @@ Recent News
 * SINGA has been accepted by `Apache Incubator <http://incubator.apache.org/>`_, 17 March, 2015.
 
 Getting Started
-===============
+---------------
 * The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
 
 * The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
@@ -42,7 +42,7 @@ Getting Started
 * Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
 
 Documentation
-=============
+-------------
 
 * Documentations are listed `here <docs.html>`_.
 
@@ -51,8 +51,8 @@ Documentation
 * Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
 
 How to contribute
-=================
-  
+----------------------
+
 * Please subscribe to our development mailing list dev-subscribe@singa.incubator.apache.org.
 
 * If you find any issues using SINGA, please report it to the `Issue Tracker <https://issues.apache.org/jira/browse/singa>`_.
@@ -62,17 +62,17 @@ How to contribute
 More details on contributing to SINGA is described `here <develop/how-contribute.html>`_ .
 
 Citing SINGA
-============
+------------
 
 Please cite the following two papers if you use SINGA in your research:
 
 * B. C. Ooi, K.-L. Tan, S. Wang, W. Wang, Q. Cai, G. Chen, J. Gao, Z. Luo, A. K. H. Tung, Y. Wang, Z. Xie, M. Zhang, and K. Zheng. `SINGA: A distributed deep learning platform <http://www.comp.nus.edu.sg/~ooibc/singaopen-mm15.pdf>`_. ACM Multimedia (Open Source Software Competition) 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-oss.txt>`_).
 
-* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_). 
+* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
 
 .. toctree::
    :hidden:
-   
+
    downloads
    docs
 
@@ -85,25 +85,25 @@ Please cite the following two papers if you use SINGA in your research:
    develop/how-contribute
    develop/contribute-code
    develop/contribute-docs
-   
+
 .. toctree::
    :hidden:
    :maxdepth: 2
    :caption: Community
-   
+
    community/source-repository
    community/mail-lists
    community/issue-tracking
    community/team-list
-   
+
 
 
 License
-=======
+----------
 SINGA is released under `Apache License Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
 
 Disclaimers
-===========
+-----------
 
 Apache SINGA is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
new file mode 100644
index 0000000..d6faf5d
--- /dev/null
+++ b/examples/index.rst
@@ -0,0 +1,6 @@
+.. toctree::
+
+   char-rnn/README
+   imagenet/README
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
index aff3587..eff6783 100644
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@ -68,21 +68,52 @@ def device_query(id, verbose=False):
 
 
 def create_cuda_gpus(num):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        num (int): number of device to create.
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
+
     return singa.Platform.CreateCudaGPUs(num)
 
 
 def create_cuda_gpu():
+    '''Create a single CudaGPU device.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
+
     return singa.Platform.CreateCudaGPUs(1)[0]
 
 
 def create_cuda_gpus_on(device_ids):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        device_ids (list): a list of GPU card IDs.
+
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
     return singa.Platform.CreateCudaGPUsOn(device_ids)
 
 
 def create_cuda_gpu_on(device_id):
+    '''Create a CudaGPU device on the given device ID.
+
+    Args:
+        device_id (int): GPU card ID.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
     devices = create_cuda_gpus_on([device_id])
     return devices[0]
 
 
 def get_default_device():
+    '''Get the default host device which is a CppCPU device'''
     return singa.Platform.GetDefaultDevice()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/initializer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py
index 15caed3..277fd2f 100644
--- a/src/python/singa/initializer.py
+++ b/src/python/singa/initializer.py
@@ -15,29 +15,113 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-"""Popular initialization methods for parameter values (Tensor ojects)"""
+'''Popular initialization methods for parameter values (Tensor objects).
+
+Example usages::
+
+    from singa import tensor
+    from singa import initializer
+
+    x = tensor.Tensor((3, 5))
+    initializer.xavier(x)
+'''
 
 import math
 
 
+'''
+TODO(wangwei) update the uniform and gaussian initializers
+
+def uniform(t, fan_in=0, fan_out=0):
+    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
+    fan_out = nb_channel * kh * kw
+    for dense layer weight, fan_in = input_feature_length,
+    fan_out = output_feature_length
+    # Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
+    training deep feedforward neuralnetworks.
+
+    assert fan_in >0 or fan_out > 0, \
+        'fan_in and fan_out cannot be 0 at the same time'
+    avg = 1
+    if fan_in * fan_out == 0:
+      avg = 2
+    x = math.sqrt(3.0f * avg / (fan_in + fan_out))
+    t.uniform(-x, x)
+
+
+def gaussian(t, fan_in=0, fan_out=0):
+    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
+    fan_out = nb_channel * kh * kw
+    for dense layer weight, fan_in = input_feature_length,
+    fan_out = output_feature_length
+
+    Ref Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: Delving Deep into
+    Rectifiers: Surpassing Human-Level Performance on ImageNet Classification
+
+    assert fan_in >0 or fan_out > 0, \
+        'fan_in and fan_out cannot be 0 at the same time'
+    avg = 1
+    if fan_in * fan_out == 0:
+      avg = 2
+    std = math.sqrt(2.0f * avg / (fan_in + fan_out))
+    t.gaussian(0, std)
+'''
+
+
 def uniform(t, low=0, high=1):
+    '''Initialize the parameter values following an Uniform distribution.
+
+    Args:
+        t (Tensor): the parater tensor
+        low (float): lower bound
+        high (float): higher bound
+    '''
     t.uniform(low, high)
 
 
 def gaussian(t, mean=0, std=0.01):
+    '''Initialize the parameter values following an Gaussian distribution.
+
+    Args:
+        t (Tensor): the parater tensor
+        mean (float): mean of the distribution
+        std (float): standard variance
+    '''
     t.gaussian(mean, std)
 
 
 def xavier(t):
+    '''Initialize the matrix parameter follow a Uniform distribution from
+    [-sqrt(6/(fan_in + fan_out)), sqrt(6/(fan_in + fan_out))].
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
+
     scale = math.sqrt(6.0 / (t.shape[0] + t.shape[1]))
     t.uniform(-scale, scale)
 
 
 def glorot(t):
+    '''Initialize the matrix parameter follow a Gaussian distribution with
+    mean = 0 and std = sqrt(2.0 / (nb_row + nb_col))
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
     scale = math.sqrt(2.0 / (t.shape[0] + t.shape[1]))
     t.gaussian(0, 1)
     t *= scale
 
 
 def msra(t):
+    '''Initialize the matrix parameter follow a Guassian distribution with
+    mean = 0, std = math.sqrt(2.0 / nb_row).
+
+    Ref [He, Zhang, Ren and Sun 2015]: Specifically accounts for ReLU
+    nonlinearities.
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
     t.gaussian(0, math.sqrt(2.0 / t.shape[0]))

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index c8c8c05..0759716 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -14,7 +14,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-""" Python layers which wraps the C++ layers by providing easy to construct APIs
+""" Python layers wrap the C++ layers to provide simpler construction APIs.
+
+Example usages::
+
+    from singa import layer
+    from singa import tensor
+    from singa import device
+    from singa.model_pb2 import kTrain
+
+    layer.engine = 'cudnn'  # to use cudnn layers
+    dev = device.create_cuda_gpu()
+
+    # create a convolution layer
+    conv = layer.Conv2D('conv', 32, 3, 1, pad=1, input_sample_shape=(3, 32, 32))
+    conv.to_device(dev)  # move the layer data onto a CudaGPU device
+    x = tensor.Tensor((3, 32, 32), dev)
+    x.uniform(-1, 1)
+    y = conv.foward(kTrain, x)
+
+    dy = tensor.Tensor()
+    dy.reset_like(y)
+    dy.set_value(0.1)
+    # dp is a list of tensors for parameter gradients
+    dx, dp = conv.backward(kTrain, dy)
 """
 
 from sets import Set
@@ -22,23 +45,37 @@ from . import singa_wrap
 from .proto import model_pb2
 import tensor
 
-# engine could be 'cudnn', 'singa', which is used to create layers.
-# e.g., CudnnConvolution layer is identified by 'cudnn_convolution'
-# Convolution layer is identified by 'singa_convolution'
-# engine is case insensitive
+
 engine = 'cudnn'
+'''engine is the prefix of layer identifier.
+
+The value could be one of [**'cudnn', 'singacpp', 'singacuda', 'singacl'**], for
+layers implemented using the cudnn library, Cpp, Cuda and OpenCL respectively.
+For example, CudnnConvolution layer is identified by 'cudnn_convolution';
+'singacpp_convolution' is for Convolution layer;
+Some layers' implementation use only Tensor functions, thererfore they are
+transparent to the underlying devices. For threse layers, they would have
+multiple identifiers, e.g., singacpp_dropout, singacuda_dropout and
+singacl_dropout are all for the Dropout layer.
+
+engine is case insensitive. Each python layer would create the correct specific
+layer using the engine attribute.
+'''
 
 
 class Layer(object):
-    """Base Python layer class.
+    '''Base Python layer class.
 
-    Usages:
-        1.  construct layer without input_sample_shapes, goto 2;
-            construct layer with input_sample_shapes, goto 3;
+    Typically, the life cycle of a layer instance includes:
+        1. construct layer without input_sample_shapes, goto 2;
+           construct layer with input_sample_shapes, goto 3;
         2. call setup to create the parameters and setup other meta fields
         3. call forward or access layer members
         4. call backward and get parameters for update
-    """
+
+    Args:
+        name (str): layer name
+    '''
 
     def __init__(self, name, **kwargs):
         self.layer = None  # layer converted by swig
@@ -49,20 +86,24 @@ class Layer(object):
         self.has_setup = False
 
     def param_names(self):
+        '''
+        Returns:
+            a list of strings, one for the name of one parameter Tensor
+        '''
         names = []
         for x in self.param_specs:
             names.append(x['name'])
         return names
 
     def setup(self, in_shapes):
-        """Call the C++ setup function to create params and set some meta data.
+        '''Call the C++ setup function to create params and set some meta data.
 
         Args:
             in_shapes: if the layer accepts a single input Tensor, in_shapes is
                 a single tuple specifying the inpute Tensor shape; if the layer
                 accepts multiple input Tensor (e.g., the concatenation layer),
-                in_shapes is a tuple of tuples, each for one input Tensor shape
-        """
+                in_shapes is a tuple of tuples, each for one input Tensor
+        '''
         if self.has_setup:
             return
         self.layer.Setup(list(in_shapes),
@@ -70,54 +111,92 @@ class Layer(object):
         self.has_setup = True
 
     def get_output_sample_shape(self):
+        '''Called after setup to get the shape of the output sample(s).
+
+        Returns:
+            a tuple for a single output Tensor or a list of tuples if this layer
+            has multiple outputs
+        '''
         assert self.has_setup, \
             'Must call setup() before get_output_sample_shape()'
         return self.layer.GetOutputSampleShape()
 
     def param_values(self):
-        """Return param value tensors.
+        '''Return param value tensors.
 
-        Do not store these tensors as layer members because cpp Tensor could be
-        moved onto diff devices due to the change of layer device. However, the
-        py tensors would not update its internal cpp tensor automatically.
-        """
+        Parameter tensors are not stored as layer members because cpp Tensor
+        could be moved onto diff devices due to the change of layer device,
+        which would result in inconsistency.
+
+        Returns:
+            a list of tensors, one for each paramter
+        '''
         return tensor.from_raw_tensors(self.layer.param_values())
 
-    def forward(self, flag, input):
+    def forward(self, flag, x):
         '''Forward propagate through this layer.
 
         Args:
-            flag, kTrain or kEval
-            input, an input tensor
+            flag (int): kTrain or kEval
+            x (Tensor or list<Tensor>): an input tensor if the layer is
+                connected from a single layer; a list of tensors if the layer
+                is connected from multiple layers.
 
         Return:
-            a tensor for the transformed feature
+            a tensor if the layer is connected to a single layer; a list of
+            tensors if the layer is connected to multiple layers;
         '''
         assert self.has_setup, 'Must call setup() before forward()'
-        assert isinstance(input, tensor.Tensor), 'input must be py Tensor'
-        y = self.layer.Forward(flag, input.singa_tensor)
-        return tensor.from_raw_tensor(y)
+        if type(x) == list:
+            xs = []
+            for t in x:
+                x.append(t.singa_tensor)
+        else:
+            assert isinstance(input, tensor.Tensor), \
+                'input must be a Tensor or a list of Tensor'
+            xs = x
+        y = self.layer.Forward(flag, xs)
+        if type(y) == list:
+            return tensor.from_raw_tensors(y)
+        else:
+            return tensor.from_raw_tensor(y)
 
-    def backward(self, flag, grad):
-        '''Backward propagate through this layer.
+    def backward(self, flag, dy):
+        '''Backward propagate gradients through this layer.
 
         Args:
-            flag, for future use.
-            grad, gradient of the returned values of the forward function.
-
+            flag (int): for future use.
+            dy (Tensor or list<Tensor>): the gradient tensor(s) y w.r.t the
+                objective loss
         Return:
-            <dx, <dp1, dp2..>>, dx is the gradient of the input of the
-            forward function, dpi is the gradient of the i-th parameter
+            <dx, <dp1, dp2..>>, dx is a (set of) tensor(s) for the gradient of x
+            , dpi is the gradient of the i-th parameter
         '''
-        assert isinstance(grad, tensor.Tensor), 'grad must be py Tensor'
-        ret = self.layer.Backward(flag, grad.singa_tensor)
-        return tensor.from_raw_tensor(ret[0]), tensor.from_raw_tensors(ret[1])
+        if type(dy) == list:
+            dys = []
+            for t in dy:
+                dys.append(t.singa_tensor)
+        else:
+            assert isinstance(dy, tensor.Tensor), \
+                'the input must be a Tensor or a set of Tensor'
+            dys = dy.singa_tensor
+        ret = self.layer.Backward(flag, dys)
+        if type(ret[0]) == list:
+            dxs = tensor.from_raw_tensors(ret[0])
+        else:
+            dxs = tensor.from_raw_tensor(ret[0])
+        return dxs, tensor.from_raw_tensors(ret[1])
 
     def to_device(self, device):
+        '''Move layer state tensors onto the given device.
+
+        Args:
+            device: swig converted device, created using singa.device
+        '''
         self.layer.ToDevice(device)
 
     def as_type(self, dtype):
-        self.layer.AsType(dtype)
+        pass
 
     def __copy__(self):
         pass
@@ -127,43 +206,42 @@ class Layer(object):
 
 
 class Conv2D(Layer):
+    """Construct a layer for 2D convolution.
 
+    Args:
+        nb_kernels (int): num of the channels (kernels) of the input Tensor
+        kernel: an integer or a pair of integers for kernel height and width
+        stride: an integer or a pair of integers for stride height and width
+        border_mode (string): padding mode, case in-sensitive,
+            'valid' -> padding is 0 for height and width
+            'same' -> padding is half of the kernel (floor), the kernel must be
+            odd number.
+        cudnn_prefer (string): the preferred algorithm for cudnn convolution
+            which could be 'fatest', 'autotune', 'limited_workspace' and
+            'no_workspace'
+        data_format (string): either 'NCHW' or 'NHWC'
+        use_bias (bool): True or False
+        pad: an integer or a pair of integers for padding height and width
+        W_specs (dict): used to specify the weight matrix specs, fields
+            include,
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            TODO(wangwei) 'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        b_specs (dict): hyper-parameters for bias vector, similar as W_specs
+        name (string): layer name.
+        input_sample_shape: 3d tuple for the shape of the input Tensor
+            without the batchsize, e.g., (channel, height, width) or
+            (height, width, channel)
+    """
     def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same',
                  cudnn_prefer='fatest', data_format='NCHW',
                  use_bias=True, W_specs=None, b_specs=None,
                  pad=None, input_sample_shape=None):
-        """Construct a layer for 2D convolution.
-
-        Args:
-            nb_kernels (int): num of the channels (kernels) of the input Tensor
-            kernel: an integer or a pair of integers for kernel height and width
-            stride: an integer or a pair of integers for stride height and width
-            border_mode (string): padding mode, case in-sensitive,
-                'valid' -> padding is 0 for height and width
-                'same' -> padding is half of the kernel (floor),
-                    the kernel must be odd number.
-            cudnn_prefer (string): the preferred algorithm for cudnn convolution
-                which could be 'fatest', 'autotune', 'limited_workspace' and
-                'no_workspace'
-            data_format (string): either 'NCHW' or 'NHWC'
-            use_bias (bool): True or False
-            pad: an integer or a pair of integers for padding height and width
-            W_specs (dict): used to specify the weight matrix specs, fields
-                include,
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                TODO(wangwei) 'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            b_specs (dict): hyper-parameters for bias vector, similar as W_specs
-            name (string): layer name.
-            input_sample_shape: 3d tuple for the shape of the input Tensor
-                without the batchsize, e.g., (channel, height, width) or
-                (height, width, channel)
-        """
         super(Conv2D, self).__init__(name)
         assert data_format == 'NCHW', 'Not supported data format: %s ' \
             'only "NCHW" is enabled currently' % (data_format)
@@ -195,19 +273,19 @@ class Conv2D(Layer):
 
 
 class Conv1D(Conv2D):
+    """Construct a layer for 1D convolution.
+
+    Most of the args are the same as those for Conv2D except the kernel,
+    stride, pad, which is a scalar instead of a tuple.
+    input_sample_shape is a tuple with a single value for the input feature
+    length
+    """
 
     def __init__(self, name, nb_kernels, kernel=3, stride=1,
                  border_mode='same', cudnn_prefer='fatest',
                  use_bias=True, W_specs={'init': 'Xavier'},
                  b_specs={'init': 'Constant', 'value': 0}, pad=None,
                  input_sample_shape=None):
-        """Construct a layer for 1D convolution.
-
-        Most of the args are the same as those for Conv2D except the kernel,
-        stride, pad, which is a scalar instead of a tuple.
-        input_sample_shape is a tuple with a single value for the input feature
-        length
-        """
         pad = None
         if pad is not None:
             pad = (0, pad)
@@ -227,7 +305,15 @@ class Conv1D(Conv2D):
 
 
 class Pooling2D(Layer):
+    '''2D pooling layer providing max/avg pooling.
+
+    All args are the same as those for Conv2D, except the following one
 
+    Args:
+        mode: pooling type, model_pb2.PoolingConf.MAX or
+            model_pb2.PoolingConf.AVE
+
+    '''
     def __init__(self, name, mode, kernel=3, stride=2, border_mode='same',
                  pad=None, data_format='NCHW', input_sample_shape=None):
         super(Pooling2D, self).__init__(name)
@@ -312,28 +398,26 @@ class AvgPooling1D(AvgPooling2D):
 
 
 class BatchNormalization(Layer):
-    # TODO(wangwei) add mode and epsilon arguments
+    """Batch-normalization.
 
+    Args:
+        momentum (float): for running average mean and variance.
+        beta_specs (dict): dictionary includes the fields for the beta
+            param:
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        gamma_specs (dict): similar to beta_specs, but for the gamma param.
+        name (string): layer name
+        input_sample_shape (tuple): with at least one integer
+    """
     def __init__(self, name, momentum=0.9,
                  beta_specs=None, gamma_specs=None, input_sample_shape=None):
-        """Batch-normalization.
-
-        Args:
-            momentum (float): for running average mean and variance.
-            beta_specs (dict): dictionary includes the fields for the beta
-                param:
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            gamma_specs (dict): similar to beta_specs, but for the gamma param.
-            name (string): layer name
-            input_sample_shape (tuple): with at least one integer
-        """
         super(BatchNormalization, self).__init__(name)
         conf = self.conf.batchnorm_conf
         conf.factor = momentum
@@ -362,16 +446,17 @@ class BatchNormalization(Layer):
 
 
 class LRN(Layer):
+    """Local response normalization.
+
+    Args:
+        size (int): # of channels to be crossed
+            normalization.
+        mode (string): 'cross_channel'
+        input_sample_shape (tuple): 3d tuple, (channel, height, width)
+    """
+
     def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel',
                  k=1, input_sample_shape=None):
-        """Local response normalization.
-
-        Args:
-            size (int): # of channels to be crossed
-                normalization.
-            mode (string): 'cross_channel'
-            input_sample_shape (tuple): 3d tuple, (channel, height, width)
-        """
         super(LRN, self).__init__(name)
         conf = self.conf.lrn_conf
         conf.local_size = size
@@ -388,29 +473,28 @@ class LRN(Layer):
 
 
 class Dense(Layer):
+    """Apply linear/affine transformation, also called inner-product or
+    fully connected layer.
 
+    Args:
+        num_output (int): output feature length.
+        use_bias (bool): add a bias vector or not to the transformed feature
+        W_specs (dict): specs for the weight matrix
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        b_specs (dict): specs for the bias vector, same fields as W_specs.
+        W_transpose (bool): if true, output=x*W.T+b;
+        input_sample_shape (tuple): input feature length
+    """
     def __init__(self, name, num_output, use_bias=True,
                  W_specs=None, b_specs=None,
                  W_transpose=True, input_sample_shape=None):
-        """Apply linear/affine transformation, also called inner-product or
-        fully connected layer.
-
-        Args:
-            num_output (int): output feature length.
-            use_bias (bool): add a bias vector or not to the transformed feature
-            W_specs (dict): specs for the weight matrix
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            b_specs (dict): specs for the bias vector, same fields as W_specs.
-            W_transpose (bool): if true, output=x*W.T+b;
-            input_sample_shape (tuple): input feature length
-        """
         super(Dense, self).__init__(name)
         conf = self.conf.dense_conf
         conf.num_output = num_output
@@ -435,14 +519,14 @@ class Dense(Layer):
 
 
 class Dropout(Layer):
+    """Droput layer.
 
-    def __init__(self, name, p=0.5, input_sample_shape=None):
-        """Droput layer.
+    Args:
+        p (float): probability for dropping out the element, i.e., set to 0
+        name (string): layer name
+    """
 
-        Args:
-            p (float): probability for dropping out the element, i.e., set to 0
-            name (string): layer name
-        """
+    def __init__(self, name, p=0.5, input_sample_shape=None):
         super(Dropout, self).__init__(name)
         conf = self.conf.dropout_conf
         conf.dropout_ratio = p
@@ -456,15 +540,14 @@ class Dropout(Layer):
 
 
 class Activation(Layer):
+    """Activation layers.
 
+    Args:
+        name (string): layer name
+        mode (string): 'relu', 'sigmoid', or 'tanh'
+        input_sample_shape (tuple): shape of a single sample
+    """
     def __init__(self, name, mode='relu', input_sample_shape=None):
-        """Activation layers.
-
-        Args:
-            name (string): layer name
-            mode (string): 'relu', 'sigmoid', or 'tanh'
-            input_sample_shape (tuple): shape of a single sample
-        """
         super(Activation, self).__init__(name)
         self.conf.type = (engine + '_' + mode).lower()
         _check_engine(engine, ['cudnn', 'singa'])
@@ -474,15 +557,14 @@ class Activation(Layer):
 
 
 class Softmax(Layer):
+    """Apply softmax.
 
+    Args:
+        axis (int): reshape the input as a matrix with the dimension
+            [0,axis) as the row, the [axis, -1) as the column.
+        input_sample_shape (tuple): shape of a single sample
+    """
     def __init__(self, name, axis=1, input_sample_shape=None):
-        """Apply softmax.
-
-        Args:
-            axis (int): reshape the input as a matrix with the dimension
-                [0,axis) as the row, the [axis, -1) as the column.
-            input_sample_shape (tuple): shape of a single sample
-        """
         super(Softmax, self).__init__(name)
         # conf = self.conf.softmax_conf
         # conf.axis = axis
@@ -493,14 +575,14 @@ class Softmax(Layer):
 
 
 class Flatten(Layer):
+    """Reshape the input tensor into a matrix.
 
+    Args:
+        axis (int): reshape the input as a matrix with the dimension
+            [0,axis) as the row, the [axis, -1) as the column.
+        input_sample_shape (tuple): shape for a single sample
+    """
     def __init__(self, name, axis=1, input_sample_shape=None):
-        """Reshape the input tensor into a matrix.
-        Args:
-            axis (int): reshape the input as a matrix with the dimension
-                [0,axis) as the row, the [axis, -1) as the column.
-            input_sample_shape (tuple): shape for a single sample
-        """
         super(Flatten, self).__init__(name)
         conf = self.conf.flatten_conf
         conf.axis = axis
@@ -511,26 +593,27 @@ class Flatten(Layer):
 
 
 class RNN(Layer):
+    '''Recurrent layer with 4 types of units, namely lstm, gru, tanh and relu.
+
+    Args:
+        hidden_size: hidden feature size, the same for all stacks of layers.
+        rnn_mode: decides the rnn unit, which could be one of 'lstm', 'gru',
+            'tanh' and 'relu', refer to cudnn manual for each mode.
+        num_stacks: num of stacks of rnn layers. It is different to the
+            unrolling seqence length.
+        input_mode: 'linear' convert the input feature x by by a linear
+            transformation to get a feature vector of size hidden_size;
+            'skip' does nothing but requires the input feature size equals
+            hidden_size
+        bidirection: True for bidirectional RNN
+        param_specs: config for initializing the RNN parameters.
+        input_sample_shape: includes a single integer for the input sample
+            feature size.
+    '''
+
     def __init__(self, name, hidden_size, rnn_mode='lstm', dropout=0.0,
                  num_stacks=1, input_mode='linear', bidirectional=False,
                  param_specs=None, input_sample_shape=None):
-        '''Wrapper for singa::RNN class.
-
-        Args:
-            hidden_size, hidden feature size, the same for all stacks of layers.
-            rnn_mode, decides the rnn unit, which could be one of 'lstm', 'gru',
-                'tanh' and 'relu', refer to cudnn manual for each mode.
-            num_stacks, num of stacks of rnn layers. It is different to the
-                unrolling seqence length.
-            input_mode, 'linear' convert the input feature x by by a linear
-                transformation to get a feature vector of size hidden_size;
-                'skip' does nothing but requires the input feature size equals
-                hidden_size
-            bidirection, True for bidirectional RNN
-            param_specs, config for initializing the RNN parameters.
-            input_sample_shape, includes a single integer for the input sample
-                feature size.
-        '''
         super(RNN, self).__init__(name)
         conf = self.conf.rnn_conf
         assert hidden_size > 0, 'Hidden feature size must > 0'
@@ -605,7 +688,7 @@ class RNN(Layer):
 
         Returns:
             <dx1, dx2, ... dxn, dhx, dcx>, where dxi is the gradient tensor for
-            the i-th input, its shape is (batch_size,
+                the i-th input, its shape is (batch_size,
                 input_feature_length). dhx is the gradient for the initial
                 hidden state. dcx is the gradient for the initial cell state,
                 which is valid only for lstm.
@@ -741,5 +824,7 @@ def _construct_param_specs_from_dict(specs):
 
 
 def get_layer_list():
-    """ Return a list of strings reprensenting the all supported layers"""
+    """ Return a list of strings which include the identifiers (tags) of all
+    supported layers
+    """
     return singa_wrap.GetRegisteredLayers()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/loss.py
----------------------------------------------------------------------
diff --git a/src/python/singa/loss.py b/src/python/singa/loss.py
index acfb813..c88290b 100644
--- a/src/python/singa/loss.py
+++ b/src/python/singa/loss.py
@@ -15,32 +15,127 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+
+'''
+Loss module includes a set of training loss implmentations. Some are converted
+from C++ implementation, and the rest are implemented directly using python
+Tensor.
+
+Example usage::
+
+    from singa import tensor
+    from singa import loss
+    from singa.proto import model_pb2
+
+    x = tensor.Tensor((3, 5))
+    x.uniform(0, 1)  # randomly genearte the prediction activation
+    y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int))  # set the truth
+
+    f = loss.SoftmaxCrossEntropy()
+    l = f.forward(model_pb2.kTrain, x, y)  # l is tensor with 3 loss values
+    g = f.backward()  # g is a tensor containing all gradients of x w.r.t l
+'''
+
 
 from . import singa_wrap as singa
 import tensor
 
 
 class Loss(object):
+    '''Base loss class.
+
+    Subclasses that wrap the C++ loss classes can use the inherited foward,
+    backward, and evaluate functions of this base class. Other subclasses need
+    to override these functions
+    '''
 
     def __init__(self):
         self.swig_loss = None
 
     def forward(self, flag, x, y):
-        """Return a tensor of floats, one per sample"""
+        '''Compute the loss values.
+
+        Args:
+            flag (int): kTrain or kEval. If it is kTrain, then the backward
+                function must be called before calling forward again.
+            x (Tensor): the prediction Tensor
+            y (Tensor): the ground truch Tensor, x.shape[0] must = y.shape[0]
+
+        Returns:
+            a tensor of floats for the loss values, one per sample
+        '''
         return tensor.from_raw_tensor(
             self.swig_loss.Forward(flag, x.singa_tensor, y.singa_tensor))
 
     def backward(self):
-        """Return the grad of x w.r.t. the loss obj"""
+        '''
+        Returns:
+            the grad of x w.r.t. the loss
+        '''
         return tensor.from_raw_tensor(self.swig_loss.Backward())
 
-    def evaluate(self, flag, x, y):
-        """Return the averaged loss for all samples in x"""
+    def evaluate(self, flag, x, y):  # TODO(wangwei) remove flag
+        '''
+        Args:
+            flag (int): must be kEval, to be removed
+            x (Tensor): the prediction Tensor
+            y (Tensor): the ground truth Tnesor
+
+        Returns:
+            the averaged loss for all samples in x.
+        '''
         return self.swig_loss.Evaluate(flag, x.singa_tensor, y.singa_tensor)
 
 
 class SoftmaxCrossEntropy(Loss):
+    '''This loss function is a combination of SoftMax and Cross-Entropy loss.
+
+    It converts the inputs via SoftMax function and then
+    computes the cross-entropy loss against the ground truth values.
+    '''
 
     def __init__(self):
         self.swig_loss = singa.SoftmaxCrossEntropy()
+
+
+class SquaredError(Loss):
+    '''This loss evaluates the squared error between the prediction and the
+    truth values.
+
+    It is implemented using Python Tensor operations.
+    '''
+    def __init__(self):
+        super(Loss, SquaredError).__init__()
+        self.err = None
+
+    def forward(self, flag, x, y):
+        '''Compute the error as 0.5 * ||x-y||^2.
+
+        Args:
+            flag (int): kTrain or kEval; if kTrain, then the backward must be
+                called before calling forward again.
+            x (Tensor): the prediction Tensor
+            y (Tensor): the truth Tensor, an integer value per sample, whose
+                value is [0, x.shape[1])
+
+        Returns:
+            a Tensor with one error value per sample
+        '''
+        self.err = x - y
+        return 0.5 * tensor.squared(self.err)
+
+    def backward(self):
+        '''Compute the gradient of x w.r.t the error.
+
+        Returns:
+            x - y
+        '''
+        return self.err
+
+    def evaluate(self, flag, x, y):
+        '''Compuate the averaged error.
+
+        Returns:
+            a float value as the averaged error
+        '''
+        return tensor.sum(0.5 * tensor.squared(x - y)) / x.size()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/metric.py
----------------------------------------------------------------------
diff --git a/src/python/singa/metric.py b/src/python/singa/metric.py
index 31b6892..3a5750d 100644
--- a/src/python/singa/metric.py
+++ b/src/python/singa/metric.py
@@ -15,28 +15,71 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+'''This module includes a set of metric classes for evaluating the model's
+performance. The specific metric classes could be converted from C++
+implmentation or implemented directly using Python.
+
+
+Example usage::
+
+    from singa import tensor
+    from singa import metric
+
+    x = tensor.Tensor((3, 5))
+    x.uniform(0, 1)  # randomly genearte the prediction activation
+    x = tensor.SoftMax(x)  # normalize the prediction into probabilities
+    y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int))  # set the truth
+
+    f = metric.Accuracy()
+    acc = f.evaluate(x, y)  # averaged accuracy over all 3 samples in x
+
+'''
 
 from . import singa_wrap as singa
 import tensor
 
 
 class Metric(object):
+    '''Base metric class.
+
+    Subclasses that wrap the C++ loss classes can use the inherited foward,
+    and evaluate functions of this base class. Other subclasses need
+    to override these functions. Users need to feed in the **predictions** and
+    ground truth to get the metric values.
+    '''
 
     def __init__(self):
         self.swig_metric = None
 
     def forward(self, x, y):
-        """Return a tensor of floats, one per sample"""
+        '''Compute the metric for each sample.
+
+        Args:
+            x (Tensor): predictions, one row per sample
+            y (Tensor): ground truth values, one row per sample
+
+        Returns:
+            a tensor of floats, one per sample
+        '''
         return tensor.from_raw_tensor(
             self.swig_metric.Forward(x.singa_tensor, y.singa_tensor))
 
     def evaluate(self, x, y):
-        """Return the averaged metric for all samples in x"""
+        '''Compute the averaged metric over all samples.
+
+        Args:
+            x (Tensor): predictions, one row per sample
+            y (Tensor): ground truth values, one row per sample
+        Returns:
+            a float value for the averaged metric
+        '''
         return self.swig_metric.Evaluate(x.singa_tensor, y.singa_tensor)
 
 
 class Accuracy(Metric):
+    '''Compute the top one accuracy for singel label prediction tasks.
 
+    It calls the C++ functions to do the calculation.
+    '''
     def __init__(self):
         self.swig_metric = singa.Accuracy()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index aa6bdd1..a964f16 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -15,7 +15,22 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+'''This module includes a set of optimizers for updating model parameters.
+
+Example usage::
+
+  from singa import optimizer
+  from singa import tensor
+
+  sgd = optimizer.SGD(lr=0.01, momentum=0.9, weight_decay=1e-4)
+  p = tensor.Tensor((3,5))
+  p.uniform(-1, 1)
+  g = tensor.Tensor((3,5))
+  g.gaussian(0, 0.01)
+
+  sgd.apply(1, g, p, 'param')  # use the global lr=0.1 for epoch 1
+  sgd.apply_with_lr(2, 0.03, g, p, 'param')  # use lr=0.03 for epoch 2
+'''
 
 from . import singa_wrap as singa
 import tensor
@@ -23,53 +38,44 @@ from proto import model_pb2
 
 
 class Optimizer(object):
-    """Base python optimizer.
-
-    Usages:
-        1. construct the optimizer
-        2. (optional) register each parameter with its specs.
-        3. use the optimizer to update parameter values given parameter
-            gradients and other optional info
-    """
-
+    '''The base python optimizer class.
+
+    Typically, an optimizer is used as follows:
+
+    1. construct the optimizer
+    2. (optional) register each parameter with its specs.
+    3. use the optimizer to update parameter values given parameter
+        gradients and other optional info
+
+    The subclasses should override the apply_with_lr function to do the real
+    parameter udpate.
+
+    Args:
+        lr (float): a constant for the learning rate, mutually exclusive with
+            'lr_gen'.
+        momentum (float): a constant for the momentum value
+        decay (float): the coefficent for L2 regularizer, which is mutually
+            exclusive with 'regularizer'.
+        lr_gen (function): a function returns the learning rate given
+            the current training step/epoch. It is mutually exclusive with lr.
+            If both are not set, the apply_with_lr function should be used for
+            param updating.
+        regularizer: an instance of Regularizer or RegularizerConf; If set,
+            regularization would be applied in apply_with_lr().
+            Users can also do regularization outside.
+        constraint: an instance of Constraint or ConstraintConf; If set,
+            constraint would be applied inside apply_with_lr(). Users can
+            also do regularization outside.
+    '''
     def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
-                 momentum_gen=None, regularizer=None, constraint=None):
-        """Constructor.
-
-        Args:
-            lr: a constant or a function that generates learning rate given a
-                step, which is mutually exclusive with 'lr_gen'.
-            momentum: a constant or a function that generates the momentum value
-                given a step.
-            decay (float): the coefficent for L2 regularizer, which is mutually
-                exclusive with 'regularizer'.
-            lr_gen (function): a function returns the learning rate given
-                the current training step. It is mutually exclusive with lr. If
-                both are not set, the apply_with_lr function should be used for
-                param updating.
-            momentum_gen (function): a function returns the momentum value given
-                the current training step. It is mutually exclusive with
-                momentum.
-            regularizer: an instance of Regularizer or RegularizerConf; If set,
-                regularization would be applied in apply_with_lr().
-                Users can also do regularization outside.
-            constraint: an instance of Constraint or ConstraintConf; If set,
-                constraint would be applied inside apply_with_lr(). Users can
-                also do regularization outside.
-        """
+                 regularizer=None, constraint=None):
         if lr is not None:
             assert lr_gen is None, 'Cannot set lr and lr_gen at the same time'
 
-            def lr_gen(step):
+            def lr_gen(epoch):
                 return lr
         self.lr_gen = lr_gen
-        if momentum is not None:
-            assert momentum_gen is None, 'Cannot set momentum and momentum_gen'\
-                ' at the same time'
-
-            def momentum_gen(step):
-                return momentum
-        self.momentum_gen = momentum_gen
+        self.momentum = momentum
         if decay is not None:
             assert regularizer is None, \
                 'Cannot set decay and regularizer at the same time'
@@ -94,14 +100,16 @@ class Optimizer(object):
         self.learning_rate_multiplier = {}
 
     def register(self, name, specs):
-        """Register the param specs, including creating regularizer and
+        '''Register the param specs, including creating regularizer and
         constraint per param object. Param specific regularizer and constraint
         have higher priority than the global ones.
 
         Args:
             name (str): parameter name
-            specs (ParamSpec): protobuf obj
-        """
+            specs (ParamSpec): protobuf obj, including regularizer and
+                constraint, multipliers for learning rate and weight decay.
+
+        '''
 	assert type(specs) == model_pb2.ParamSpec, \
 		'specs should be model_pb2.ParamSpec instance'
         if specs.HasField('regularizer'):
@@ -113,8 +121,8 @@ class Optimizer(object):
         if specs.decay_mult != 1:
             self.decay_multiplier[name] = specs.decay_mult
 
-    def apply_regularizer_constraint(self, value, grad, name=None, step=None):
-        """Apply regularization and constraint if available.
+    def apply_regularizer_constraint(self, value, grad, name=None, epoch=None):
+        '''Apply regularization and constraint if available.
 
         If there are both global regularizer (constraint) and param specific
         regularizer (constraint), it would use the param specific one.
@@ -123,46 +131,48 @@ class Optimizer(object):
             value (Tensor): parameter value Tensor
             grad (Tensor): parameter gradient Tensor
             name (string): to get parameter specific regularizer or constraint
-            step (int): some regularizer or constraint would use step
+            epoch (int): some regularizer or constraint would use epoch
 
-        Return:
+        Returns:
             the updated gradient Tensor
-        """
+        '''
         if name is not None and name in self.constraints:
-            self.constraints[name].apply(value, grad, step)
+            self.constraints[name].apply(value, grad, epoch)
         elif self.constraint is not None:
-            self.constraint.apply(step, value, grad)
+            self.constraint.apply(epoch, value, grad)
 
         if name is not None and name in self.regularizers:
-            self.regularizers[name].apply(value, grad, step)
+            self.regularizers[name].apply(value, grad, epoch)
         elif self.regularizer is not None:
-            self.regularizer.apply(step, value, grad)
+            self.regularizer.apply(epoch, value, grad)
         return grad
 
-    def apply_with_lr(self, step, lr, grad, value, name=None):
-        """Do update with given learning rate.
+    def apply_with_lr(self, epoch, lr, grad, value, name=None):
+        '''Do update with given learning rate.
 
         The subclass optimizer must override this function.
+
         Args:
-            step (int): training step (could be iteration or epoch)
+            epoch (int): training epoch (could be iteration or epoch)
             lr (float): learning rate
             grad (Tensor): parameter gradient
             value (Tesnor): parameter value
             name (string): paramter name to retrieval parameter specific
                 updating rules (including regularizer and constraint)
 
-        Return:
+        Returns:
             updated parameter value
-        """
+        '''
         assert False, 'This is the base function, pls call the subclass func'
         return value
 
-    def apply(self, step, grad, value, name=None):
-        """Do update assume the learning rate generator is set.
+    def apply(self, epoch, grad, value, name=None):
+        '''Do update assuming the learning rate generator is set.
 
         The subclass optimizer does not need to override this function.
+
         Args:
-            step (int): training step (could be iteration or epoch)
+            epoch (int): training epoch (could be iteration or epoch)
             grad (Tensor): parameter gradient
             value (Tesnor): parameter value
             name (string): paramter name to retrieval parameter specific
@@ -170,98 +180,109 @@ class Optimizer(object):
 
         Return:
             updated parameter value
-        """
-
+        '''
         assert self.lr_gen is not None, 'Learning rate generator is not set.'\
             'Either set the lr_gen in constructor or call apply_with_lr'
-        lr = self.lr_gen(step)
-        return self.apply_with_lr(step, lr, grad, value, name)
+        lr = self.lr_gen(epoch)
+        return self.apply_with_lr(epoch, lr, grad, value, name)
 
 
 class SGD(Optimizer):
+    '''The vallina Stochasitc Gradient Descent algorithm with momentum.
 
-    def __init__(self, lr=None, momentum=None, decay=None, **kwargs):
-        """The vallina Stochasitc Gradient Descent algorithm.
+    See the base Optimizer for all arguments.
+    '''
 
-        See the base Optimizer for all arguments.
-        """
-        super(SGD, self).__init__(lr, momentum, decay)
+    def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(SGD, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+                                  constraint)
         conf = model_pb2.OptimizerConf()
-        conf.momentum = momentum
+        conf.momentum = self.momentum
+        conf.type = 'sgd'
         self.opt = singa.CreateOptimizer('SGD')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class Nesterov(Optimizer):
+    '''The SGD with Nesterov momentum.
 
-    def __init__(self, lr=None, momentum=0.9, decay=None, **kwargs):
-        """The SGD with Nesterov momentum
+    See the base Optimizer for all arguments.
+    '''
 
-        See the base Optimizer for all arguments.
-        """
-        super(Nesterov, self).__init__(lr, momentum, decay, kwargs)
+    def __init__(self, lr=None, momentum=0.9, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(Nesterov, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+                                       constraint)
         conf = model_pb2.OptimizerConf()
+        conf.momentum = momentum
+        conf.type = 'nesterov'
         self.opt = singa.CreateOptimizer('Nesterov')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class AdaGrad(Optimizer):
+    '''AdaGrad optimizer.
 
-    def __init__(self, epsilon=1e-8, lr=None, decay=None, **kwargs):
-        """AdaGrad optimizer.
+    See the base Optimizer for all constructor args.
 
-        See the base Optimizer for all constructor args.
-        Args:
-            epsilon (float): small number for preventing numeric error.
-        """
-        super(RMSProp, self).__init__(lr, decay, **kwargs)
+    Args:
+        epsilon (float): small number for preventing numeric error.
+    '''
+    def __init__(self, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+                                      constraint)
         conf = model_pb2.OptimizerConf()
         conf.delta = epsilon
+        conf.type = 'adagrad'
         self.opt = singa.CreateOptimizer('AdaGrad')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        grad = self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class RMSProp(Optimizer):
+    '''RMSProp optimizer.
 
-    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, **kwargs):
-        """RMSProp optimizer.
+    See the base Optimizer for all constructor args.
 
-        See the base Optimizer for all constructor args.
-        Args:
-            rho (float): float within [0, 1]
-            epsilon (float): small value for preventing numeric error
-        """
-        super(RMSProp, self).__init__(lr, decay, kwargs)
+    Args:
+        rho (float): float within [0, 1]
+        epsilon (float): small value for preventing numeric error
+    '''
+
+    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+                                      constraint)
         conf = model_pb2.OptimizerConf()
         conf.rho = rho
         conf.delta = epsilon
         self.opt = singa.CreateOptimizer('RMSProp')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        grad = self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class Regularizer(object):
-    """Base Python regularizer for parameter gradients.
-    """
+    '''Base Python regularizer for parameter gradients.'''
 
     def apply(self, value, grad):
         assert False, 'Not Implemented. Call the subclass function.'
@@ -269,34 +290,32 @@ class Regularizer(object):
 
 
 class CppRegularizer(Regularizer):
-    """Wrapper for regularizer implemented using C++.
-    """
+    '''Wrapper for regularizer implemented using C++.
 
-    def __init__(self, conf):
-        """Constructor.
+    Args:
+        conf (RegularizerConf): protobuf message for the configuration.
+    '''
 
-        Args:
-            conf (RegularizerConf): protobuf message for the configuration.
-        """
+    def __init__(self, conf):
         self.reg = singa.CreateRegularizer(conf.type)
         self.reg.Setup(conf.SerializeToString())
 
-    def apply(self, step, value, grad):
-        self.reg.Apply(step, value.singa_tensor, grad.singa_tensor)
+    def apply(self, epoch, value, grad):
+        self.reg.Apply(epoch, value.singa_tensor, grad.singa_tensor)
         return grad
 
 
 class L2Regularizer(Regularizer):
-    """L2 regularization"""
+    '''L2 regularization
+
+    Args:
+        coefficient (float): regularization coefficient.
+    '''
 
     def __init__(self, coefficient):
-        """
-        Args:
-            coefficient (float): regularization coefficient.
-        """
         self.coefficient = coefficient
 
-    def apply(self, step, value, grad, coefficient=None):
+    def apply(self, epoch, value, grad, coefficient=None):
         if coefficient is None:
             assert self.coefficient is not None, 'Must set the coefficient'
             coefficient = self.coefficient
@@ -305,39 +324,34 @@ class L2Regularizer(Regularizer):
 
 
 class Constraint(object):
-    """Base Python constraint class for paramter gradients.
-    """
+    '''Base Python constraint class for paramter gradients'''
 
-    def apply(self, step, value, grad):
+    def apply(self, epoch, value, grad):
         return grad
 
 
 class CppConstraint(Constraint):
-    """Wrapper for constraints implemented using C++.
-    """
+    '''Wrapper for constraints implemented using C++.
 
+    Args:
+        conf (ConstraintConf): protobuf message for the configuration.
+    '''
     def __init__(self, conf):
-        """Constructor.
-
-        Args:
-            conf (ConstraintConf): protobuf message for the configuration.
-        """
         self.constraint = singa.CreateConstraint(conf.type)
         self.constraint.Setup(conf.SerializeToString())
 
-    def apply(self, step, value, grad):
-        self.constraint.Apply(step, value.singa_tensor, grad.singa_tensor)
+    def apply(self, epoch, value, grad):
+        self.constraint.Apply(epoch, value.singa_tensor, grad.singa_tensor)
         return grad
 
 
 class L2Constraint(Constraint):
-    """Rescale the gradient to make the L2 norm <= a given threshold.
-    """
+    '''Rescale the gradient to make the L2 norm <= a given threshold'''
 
     def __init__(self, threshold=None):
         self.threshold = threshold
 
-    def apply(self, step, value, grad, threshold=None):
+    def apply(self, epoch, value, grad, threshold=None):
         if threshold is None:
             assert self.threshold is not None, 'Must set the threshold'
             threshold = self.threshold


[06/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/33992c90/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index ed651e9..e2572d3 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -16,23 +16,63 @@
 # under the License.
 # =============================================================================
 """
-This script includes Tensor class and its methods for python users
-to call singa::Tensor and its methods
+Example usage::
+
+    from singa import tensor
+    from singa import device
+
+    # create a tensor with shape (2,3), default CppCPU device and float32
+    x = tensor.Tensor((2,3))
+    x.set_value(0.4)
+
+    # create a tensor from a numpy array
+    y = tensor.from_numpy((3,3), dtype=np.float32)
+    y.uniform(-1, 1)
+
+    z = mult(x, y)  # gemm -> z of shape (2, 3)
+
+    x += z # element-wise addition
+
+    dev = device.create_cuda_gpu()
+    x.to_device(dev)  # move the data to a gpu device
+
+    r = relu(x)
+
+    r.to_host()  # move the data back to host cpu
+    s = r.to_numpy()  # tensor -> numpy array, r must be on cpu
+
+
+There are two set of tensor functions,
+
+Tensor member functions
+    which would change the internal state of the Tensor instance.
+Tensor module functions
+    which accept Tensor instances as arguments and return Tensor instances.
+
+Every Tesor instance must be initialized before reading data from it.
 """
 
 import numpy as np
+from functools import reduce
 from .proto import core_pb2
 from . import singa_wrap as singa
-from functools import reduce
+import device
 
 
 class Tensor(object):
-    ''' Class and member functions for singa::Tensor
+    '''Create a Py Tensor, which wraps a swig converted Tensor from CPP Tensor
+
+    The three arguments are three attributes of the Tensor.
+
+    Args:
+        shape (list<int>): a list of integers for the tensor shape. If shape is
+            not specified, the created tensor is called a dummy tensor.
+        device: a swig converted Device instance using the device moduel . If it
+            is None, then the default host device would be used.
+        dtype: data type. currently, most operations only accept kFloat32.
     '''
 
     def __init__(self, shape=None, device=None, dtype=core_pb2.kFloat32):
-        ''' shape = (tuple)
-        '''
         if shape is None:
             # call constructor of singa::Tensor
             self.singa_tensor = singa.Tensor()
@@ -48,125 +88,230 @@ class Tensor(object):
             self.device = device
             self.dtype = dtype
 
-    def copy_from_numpy(self, np_array, offset=0):
-        ''' this method stores the values of numpy array into tensor data
-            from the position of offset
-        '''
-        assert np_array.size == self.size(), 'tensor shape should be the same'
-        if not np_array.ndim == 1:
-            np_array = np_array.flatten()
-        dt = np_array.dtype
-        if dt == np.float32:
-            self.singa_tensor.floatCopyDataFromHostPtr(np_array)
-        elif dt == np.int or dt == np.int32:
-            self.singa_tensor.intCopyDataFromHostPtr(np_array)
-        else:
-            print 'Not implemented yet for ', dt
-
-    # deprecated, access the member data_type directly
-    def data_type(self):
-        return self.singa_tensor.data_type()
-
-    # deprecated, access the member shape directly
-    def shape(self, axis=None):
-        if axis is None:
-            return self.singa_tensor.shape()
-        else:
-            return self.singa_tensor.shape(axis)
-
     def ndim(self):
+        '''
+        Returns:
+            the number of dimensions of the tensor.
+        '''
         return self.singa_tensor.nDim()
 
-    def is_transpose(self):  # TODO(wangwei) make transpose a member
+    def is_transpose(self):
+        '''
+        Returns:
+            True if the internal data is transposed; otherwise False.
+        '''
         return self.singa_tensor.transpose()
 
     def size(self):  # TODO(wangwei) compute size
+        '''
+        Returns:
+            the number of elements of the tensor.
+        '''
         return self.singa_tensor.Size()
 
     def memsize(self):
+        '''
+        Returns:
+            the number of Bytes allocated for this tensor.
+        '''
         return self.singa_tensor.MemSize()
 
     def reshape(self, shape):
+        '''Change the tensor shape.
+
+        Args:
+            shape (list<int>): new shape, which should have the same volumn as
+                the original shape.
+        '''
         assert product(self.shape) == product(shape), \
-               'product of shape should be equal'
+            'product of shape should be equal'
         self.shape = shape
-        self.singa_tensor.Reshape(_tuple_to_vector(shape))
+        self.singa_tensor.Reshape(list(shape))
 
     def reset_like(self, t):
+        '''Reset the shape, dtype and device as the given tensor.
+
+        Args:
+            t (Tensor)
+        '''
         self.singa_tensor.ResetLike(t.singa_tensor)
 
+    '''
     def as_type(self, dtype):
+        Change the data type.
+
+        Args:
+            dtype:
         self.singa_tensor.AsType(dtype)
+    '''
 
     def to_device(self, device):
+        '''Move the tensor data onto a given device.
+
+        Args:
+            device: a swig Device converted from CudaGPU or CppCPU or OpenclGPU
+        '''
         self.singa_tensor.ToDevice(device)
 
     def to_host(self):
+        '''Move the tensor data onto the default host CppCPU device.
+        '''
         self.singa_tensor.ToHost()
 
     def l2(self):
+        '''
+        Returns:
+            the L2 norm.
+        '''
         return self.singa_tensor.L2()
 
     def l1(self):
+        '''
+        Returns:
+            the L1 norm.
+        '''
         return self.singa_tensor.L1()
 
     def set_value(self, x):
+        '''Set all elements of the tensor to be the give value.
+
+        Args:
+            x (float), a float value to be set to all elements.
+        '''
         # assert type(x) == float, 'set value only accepts float input'
         # if isinstance(x, float):
         self.singa_tensor.floatSetValue(x)
 
+    def copy_from_numpy(self, np_array, offset=0):
+        ''' Copy the data from the numpy array.
+
+        Args:
+            np_array: source numpy array
+            offset (int): destination offset
+        '''
+        assert np_array.size == self.size(), 'tensor shape should be the same'
+        if not np_array.ndim == 1:
+            np_array = np_array.flatten()
+        dt = np_array.dtype
+        if dt == np.float32:
+            self.singa_tensor.floatCopyDataFromHostPtr(np_array)
+        elif dt == np.int or dt == np.int32:
+            self.singa_tensor.intCopyDataFromHostPtr(np_array)
+        else:
+            print 'Not implemented yet for ', dt
+
     def copy_data(self, t):
+        '''Copy data from other Tensor instance.
+
+        Args:
+            t (Tensor): source Tensor.
+        '''
+        assert type(t) == Tensor, 't must be a singa Tensor instance'
         self.singa_tensor.CopyData(t.singa_tensor)
 
     def clone(self):
-        ''' it does deep copy
-            call singa::Tensor::Clone()
+        '''
+        Returns:
+            a new Tensor which does deep copy of this tensor
         '''
         return _call_singa_func(self.singa_tensor.Clone)
 
-    def transpose(self):
-        ''' shallow copy, negate the transpose field
-            call singa::Tensor::T()
+    def T(self):
+        ''' shallow copy, negate the transpose field.
+
+        Returns:
+            a new Tensor which shares the underlying data memory (shallow copy)
+            but is marked as a transposed version of this tensor.
         '''
         return _call_singa_func(self.singa_tensor.T)
 
+    '''
     def copy(self):
-        ''' shallow copy
+        shallow copy
             call copy constructor of singa::Tensor
-        '''
         return _call_singa_func(singa.Tensor, self.singa_tensor)
+    '''
 
     def deepcopy(self):
-        ''' deep copy
-            call singa::Tensor::Clone()
+        '''Same as clone().
+
+        Returns:
+            a new Tensor
         '''
         return self.clone()
 
     def bernoulli(self, p):
+        '''Sample 0/1 for each element according to the given probability.
+
+        Args:
+            p (float): with probability p, each element is sample to 1.
+        '''
         singa.floatBernoulli(float(p), self.singa_tensor)
 
     def gaussian(self, mean, std):
+        '''Generate a value for each element following a Gaussian distribution.
+
+        Args:
+            mean (float): mean of the distribution
+            std (float): standard variance of the distribution
+        '''
         singa.floatGaussian(float(mean), float(std), self.singa_tensor)
 
     def uniform(self, low, high):
+        '''Generate a value for each element following a uniform distribution.
+
+        Args:
+            low (float): the lower bound
+            high (float): the hight bound
+        '''
         singa.floatUniform(float(low), float(high), self.singa_tensor)
 
     def add_column(self, v):
+        '''Add a tensor to each column of this tensor.
+
+        Args:
+            v (Tensor): a Tensor to be added as a column to this tensor.
+        '''
         singa.AddColumn(v.singa_tensor, self.singa_tensor)
 
     def add_row(self, v):
+        '''Add a tensor to each row of this tensor.
+
+        Args:
+            v (Tensor): a Tensor to be added as a row to this tensor.
+        '''
         singa.AddRow(v.singa_tensor, self.singa_tensor)
 
     def div_column(self, v):
+        '''Divide each column of this tensor by v.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the column of self.
+        '''
         singa.DivColumn(v.singa_tensor, self.singa_tensor)
 
     def div_row(self, v):
+        '''Divide each row of this tensor by v.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the row of self.
+        '''
         singa.DivRow(v.singa_tensor, self.singa_tensor)
 
     def mult_column(self, v):
+        '''Multiply each column of this tensor by v element-wisely.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the column of self.
+        '''
         singa.MultColumn(v.singa_tensor, self.singa_tensor)
 
     def mult_row(self, v):
+        '''Multiply each row of this tensor by v element-wisely.
+
+        Args:
+            v (Tensor): 1d tensor of the same length the row of self.
+        '''
         singa.MultRow(v.singa_tensor, self.singa_tensor)
 
     '''
@@ -174,6 +319,11 @@ class Tensor(object):
     '''
 
     def __iadd__(self, x):
+        ''' inplace element-wise addition with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor += x.singa_tensor
         else:
@@ -181,6 +331,12 @@ class Tensor(object):
         return self
 
     def __isub__(self, x):
+        ''' inplace element-wise subtraction with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
+
         if isinstance(x, Tensor):
             self.singa_tensor -= x.singa_tensor
         else:
@@ -188,6 +344,11 @@ class Tensor(object):
         return self
 
     def __imul__(self, x):
+        ''' inplace element-wise multiplication with a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor *= x.singa_tensor
         else:
@@ -195,6 +356,11 @@ class Tensor(object):
         return self
 
     def __idiv__(self, x):
+        ''' inplace element-wise division by a tensor or a float value.
+
+        Args:
+            x (float or Tensor):
+        '''
         if isinstance(x, Tensor):
             self.singa_tensor /= x.singa_tensor
         else:
@@ -284,29 +450,72 @@ def product(shape):
 
 
 def sizeof(dtype):
+    '''
+    Returns:
+        the number of bytes of the given SINGA data type defined in core.proto
+    '''
     return singa.SizeOf(dtype)
 
 
 def reshape(t, s):
+    '''Reshape the input tensor with the given shape.
+
+    Args:
+        t (Tensor): the tensor to be changed
+        s (list<int>): the new shape, which should have the same volumn as the
+            old shape.
+
+    Returns:
+        the new Tensor
+    '''
     return _call_singa_func(singa.Reshape, t.singa_tensor, s)
 
 
 def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0):
+    '''Copy the data between two Tensor instances which could be on different
+    devices.
+
+    Args:
+        dst (Tensor): destination Tensor
+        src (Tensor): source Tensor
+        size (int) : number of elements to copy
+        dst_offset (int): offset in terms of elements to the start of dst
+        src_offset (int): offset in terms of elements to the start of src
+    '''
     singa.CopyDataToFrom(dst.singa_tensor, src.singa_tensor, size,
                          dst_offset, src_offset)
 
 
 def from_numpy(np_array):
+    '''Create a Tensor instance with the shape, dtype and values from the numpy
+    array.
+
+    Args:
+        np_array: the numpy array.
+
+    Returns:
+        A Tensor instance allocated on the default CppCPU device.
+    '''
     ret = Tensor(np_array.shape)
     ret.copy_from_numpy(np_array)
     return ret
 
 
 def to_numpy(t):
-    ''' this method gets the values of tensor data and
-        returns it as numpy array
-        TODO(wangwei) clone t to host
+    '''Convert the tensor into a numpy array.
+
+    Since numpy array is allocated on CPU devices, the input Tensor instance
+    must be on the default CppCPU device.
+
+    Args:
+        t (Tensor), a Tensor on the default CppCPU device.
+
+    Returns:
+        a numpy array
     '''
+    assert t.device == device.get_default_device() or t.device is None, \
+        'Please move the tensor onto the default host device'
+
     if t.dtype == core_pb2.kFloat32:
         np_array = t.singa_tensor.floatGetValue(int(t.size()))
     elif t.dtype == core_pb2.kInt:
@@ -317,34 +526,96 @@ def to_numpy(t):
 
 
 def abs(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = abs(x), x is an element of t
+    '''
     return _call_singa_func(singa.Abs, t.singa_tensor)
 
 
 def exp(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = exp(x), x is an element of t
+    '''
     return _call_singa_func(singa.Exp, t.singa_tensor)
 
 
 def log(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = log(x), x is an element of t
+    '''
     return _call_singa_func(singa.Log, t.singa_tensor)
 
 
 def relu(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = x if x >0; otherwise 0; x is an element
+        of t
+    '''
     return _call_singa_func(singa.ReLU, t.singa_tensor)
 
 
 def sigmoid(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = sigmoid(x); x is an element of t
+    '''
     return _call_singa_func(singa.Sigmoid, t.singa_tensor)
 
 
 def square(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = x * x, x is an element of t
+    '''
     return _call_singa_func(singa.Square, t.singa_tensor)
 
 
 def tanh(t):
+    '''
+    Args:
+        t (Tensor): input Tensor
+
+    Returns:
+        a new Tensor whose element y = tanh(x), x is an element of t
+    '''
     return _call_singa_func(singa.Tanh, t.singa_tensor)
 
 
 def sum(t, axis=None):
+    '''Sum elements of the input tensor long the given axis.
+
+    Args:
+        t (Tensor): input Tensor
+        axis (int, optional): if None, the summation is done over all elements;
+            if axis is provided, then it is calculated along the given axis,
+            e.g. 0 -- sum each column; 1 -- sum each row.
+
+    Returns:
+        a float value as the sum of all elements, or a new Tensor
+    '''
+
     if axis is None:
         return singa.floatSum(t.singa_tensor)
     else:
@@ -352,6 +623,17 @@ def sum(t, axis=None):
 
 
 def pow(t, x, out=None):
+    '''
+    Args:
+        t (Tensor): input tensor
+        x (float or Tensor): y[i] = t[i]^x if x is a float value; otherwise,
+            y[i]= t[i]^x[i] if x is a tensor.
+        out (None or Tensor): if None, a new Tensor would be constructed to
+            store the result; otherwise, the result is put into out.
+
+    Returns:
+        the result tensor.
+    '''
     if out is None:
         if isinstance(x, Tensor):
             return _call_singa_func(singa.Pow, t.singa_tensor, x.singa_tensor)
@@ -365,7 +647,17 @@ def pow(t, x, out=None):
         return out
 
 
-def average(t, axis=0):
+def average(t, axis=None):
+    '''
+    Args:
+        t (Tensor): input Tensor
+        axis (int, optional): if None, average all elements; otherwise average
+            along the given dimension. 0 for averaging each column; 1 for
+            averaging each row.
+
+    Returns:
+        a float value if axis is None; otherwise, a new Tensor for the result.
+    '''
     if t.ndim() > 1:
         return _call_singa_func(singa.Average, t.singa_tensor, axis)
     else:
@@ -373,6 +665,15 @@ def average(t, axis=0):
 
 
 def softmax(t, out=None):
+    '''Apply SoftMax for each row of the Tensor.
+
+    Args:
+        t (Tensor): the input 1d or 2d tensor
+        out (Tensor, optional): if not None, it is used to store the result
+
+    Returns:
+        the result Tensor
+    '''
     if out is None:
         return _call_singa_func(singa.SoftMax, t.singa_tensor)
     else:
@@ -381,22 +682,73 @@ def softmax(t, out=None):
 
 
 def lt(t, x):
+    '''Elementi-wise comparison for t < x
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] < x ? 1.0f:0.0f,
+        or t[i] < x[i] ? 1.0f:0.0f
+    '''
     return t < x
 
 
 def le(t, x):
+    '''Elementi-wise comparison for t <= x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] <= x ? 1.0f:0.0f,
+        or t[i] <= x[i] ? 1.0f:0.0f
+    '''
     return t <= x
 
 
 def gt(t, x):
+    '''Elementi-wise comparison for t > x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] > x ? 1.0f:0.0f,
+        or t[i] > x[i] ? 1.0f:0.0f
+    '''
     return t > x
 
 
 def ge(t, x):
+    '''Elementi-wise comparison for t >= x.
+
+    Args:
+        t (Tensor): left hand side operand
+        x (Tensor or float): right hand side operand
+
+    Returns:
+        a Tensor with each element being t[i] >= x ? 1.0f:0.0f,
+        or t[i] >= x[i] ? 1.0f:0.0f
+    '''
     return t >= x
 
 
 def add(lhs, rhs, ret=None):
+    '''Elementi-wise addition.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__add__()
         return lhs + rhs
@@ -409,6 +761,17 @@ def add(lhs, rhs, ret=None):
 
 
 def sub(lhs, rhs, ret=None):
+    '''Elementi-wise subtraction.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__sub__()
         return lhs - rhs
@@ -421,6 +784,18 @@ def sub(lhs, rhs, ret=None):
 
 
 def eltwise_mult(lhs, rhs, ret=None):
+    '''Elementi-wise multiplication.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
+
     if ret is None:
         # call Tensor.__mul__()
         return lhs * rhs
@@ -435,8 +810,21 @@ def eltwise_mult(lhs, rhs, ret=None):
 
 
 def mult(A, B, C=None, alpha=1.0, beta=0.0):
-    '''
+    '''Do matrix-matrix or matrix-vector multiplication.
+
     This function returns C = alpha * A * B + beta * C
+
+    Args:
+        A (Tensor): 2d Tensor
+        B (Tensor): If B is a 1d Tensor, GEMV would be invoked for matrix-vector
+            multiplication; otherwise GEMM would be invoked.
+        C (Tensor, optional): for storing the result; If None, a new Tensor
+            would be created.
+        alpha (float)
+        beta (float)
+
+    Returns:
+        the result Tensor
     '''
     if C is None:
         return _call_singa_func(singa.Mult, A.singa_tensor, B.singa_tensor)
@@ -447,6 +835,17 @@ def mult(A, B, C=None, alpha=1.0, beta=0.0):
 
 
 def div(lhs, rhs, ret=None):
+    '''Elementi-wise division.
+
+    Args:
+        lhs (Tensor)
+        rhs (Tensor)
+        ret (Tensor, optional): if not None, the result is stored in it;
+            otherwise, a new Tensor would be created for the result.
+
+    Returns:
+        the result Tensor
+    '''
     if ret is None:
         # call Tensor.__div__()
         return lhs / rhs
@@ -459,51 +858,125 @@ def div(lhs, rhs, ret=None):
 
 
 def axpy(alpha, x, y):
-    if isinstance(alpha, float):
-        singa.floatAxpy(alpha, x.singa_tensor, y.singa_tensor)
+    '''Element-wise operation for y += alpha * x.
+
+    Args:
+        alpha (float)
+        x (Tensor)
+        y (Tensor)
+
+    Returns:
+        y
+    '''
+    singa.floatAxpy(float(alpha), x.singa_tensor, y.singa_tensor)
     return y
 
 
 def bernoulli(p, t):
-    if isinstance(p, float):
-        singa.floatBernoulli(p, t.singa_tensor)
+    '''Generate a binary value for each element of t.
+
+    Args:
+        p (float): each element is 1 with probability p; and 0 with 1 - p
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatBernoulli(float(p), t.singa_tensor)
     return t
 
 
 def gaussian(mean, std, t):
-    if isinstance(mean, float):
-        singa.floatGaussian(mean, std, t.singa_tensor)
+    '''Generate values following a Gaussian distribution.
+
+    Args:
+        mean (float): the mean of the Gaussian distribution.
+        std (float): the standard variance of the Gaussian distribution.
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatGaussian(float(mean), float(std), t.singa_tensor)
     return t
 
 
 def uniform(low, high, t):
-    if isinstance(low, float):
-        singa.floatUniform(low, high, t.singa_tensor)
+    '''Generate values following a Uniform distribution.
+
+    Args:
+        low (float): the lower bound
+        hight (float): the higher bound
+        t (Tensor): the results are put into t
+
+    Returns:
+        t
+    '''
+    singa.floatUniform(float(low), float(high), t.singa_tensor)
     return t
 
 
 def add_column(alpha, v, beta, M):
-    singa.floatAddColumn(alpha, beta, v.singa_tensor, M.singa_tensor)
+    '''Add v to each column of M.
+
+    Denote each column of M as m, m = alpha * v + beta * m
+
+    Args:
+        alpha (float)
+        v (Tensor)
+        beta (float)
+        M (Tensor): 2d tensor
+    Returns:
+        M
+    '''
+    singa.floatAddColumn(float(alpha), float(beta), v.singa_tensor,
+                         M.singa_tensor)
     return M
 
 
 def add_row(alpha, v, beta, M):
+    '''Add v to each row of M.
+
+    Denote each row of M as m, m = alpha * v + beta * m
+
+    Args:
+        alpha (float)
+        v (Tensor)
+        beta (float)
+        M (Tensor): 2d tensor
+    Returns:
+        M
+    '''
     singa.floatAddRow(alpha, beta, v.singa_tensor, M.singa_tensor)
     return M
 
 
 def sum_columns(M):
+    '''Sum all columns into a single column.
+
+    Args:
+        M (Tensor): the input 2d tensor.
+
+    Returns:
+        a new Tensor as the resulted column.
+    '''
     assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
-    nb_col = M.shape(0)
-    ret = Tensor((nb_col, 1))
+    ret = Tensor((M.shape[0], 1))
     singa.SumColumns(M.singa_tensor, ret.singa_tensor)
     return ret
 
 
 def sum_rows(M):
+    '''Sum all rows into a single row.
+
+    Args:
+        M (Tensor): the input 2d tensor.
+
+    Returns:
+        a new Tensor as the resulted row.
+    '''
     assert M.ndim() == 2, 'M.nDim() is supposed to be 2'
-    nb_row = M.shape(1)
-    ret = Tensor((1, nb_row))
+    ret = Tensor((1, M.shape[1]))
     singa.SumRows(M.singa_tensor, ret.singa_tensor)
     return ret
 
@@ -512,15 +985,6 @@ def sum_rows(M):
 '''
 
 
-def _tuple_to_vector(tshape):
-    ''' this function converts tuple to std::vector<int>
-    '''
-    vs = singa.Shape(len(tshape))
-    for i in range(len(tshape)):
-        vs[i] = tshape[i]
-    return vs
-
-
 def _call_singa_func(_singa_func, *args):
     ''' this function calls singa global functions that returns Tensor
         and create new python Tensor instance
@@ -528,7 +992,7 @@ def _call_singa_func(_singa_func, *args):
     '''
     new_t = Tensor()
     new_t.singa_tensor = _singa_func(*args)
-    new_t.shape = new_t.singa_tensor.shape()
+    new_t.shape = tuple(new_t.singa_tensor.shape())
     new_t.device = new_t.singa_tensor.device()
     new_t.dtype = new_t.singa_tensor.data_type()
     return new_t


[14/22] incubator-singa git commit: Fixed the bug leading to wired accuracy (nan), which was caused by forgeting to average the gradient over the whole mini-batch. That is why we need a lower learning rate and could not use momentum. Update the lr in opt

Posted by wa...@apache.org.
Fixed the bug leading to wired accuracy (nan), which was caused by forgeting
to average the gradient over the whole mini-batch. That is why we need a lower
learning rate and could not use momentum.
Update the lr in optimzier.py to time the multiplier
Fix the bug from mis-setting the pooling type of alexnet.py (max-->avg)


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6d4539ee
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6d4539ee
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6d4539ee

Branch: refs/heads/dev
Commit: 6d4539eed2ae200a3a904a70cb789fc1b39d0f38
Parents: 1db2784
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 13:13:19 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Mon Aug 15 20:16:30 2016 +0800

----------------------------------------------------------------------
 examples/cifar10/alexnet.cc   |  11 +-
 examples/cifar10/alexnet.py   |  13 +-
 examples/cifar10/train.py     |  19 ++-
 src/model/feed_forward_net.cc |   6 +-
 src/model/optimizer/sgd.cc    |   4 +-
 src/python/singa/__init__.py  | 240 -------------------------------------
 src/python/singa/layer.py     |  15 +--
 src/python/singa/net.py       |   8 +-
 src/python/singa/optimizer.py |  36 ++++--
 src/python/singa/tensor.py    |   8 +-
 10 files changed, 68 insertions(+), 292 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/examples/cifar10/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.cc b/examples/cifar10/alexnet.cc
index e1363e4..8051d1b 100644
--- a/examples/cifar10/alexnet.cc
+++ b/examples/cifar10/alexnet.cc
@@ -134,7 +134,7 @@ FeedForwardNet CreateNet() {
   return net;
 }
 
-void Train(float lr, int num_epoch, string data_dir) {
+void Train(int num_epoch, string data_dir) {
   Cifar10 data(data_dir);
   Tensor train_x, train_y, test_x, test_y;
   {
@@ -161,11 +161,11 @@ void Train(float lr, int num_epoch, string data_dir) {
   auto net = CreateNet();
   SGD sgd;
   OptimizerConf opt_conf;
-  opt_conf.set_momentum(0.9);
+  // opt_conf.set_momentum(0.9);
   auto reg = opt_conf.mutable_regularizer();
   reg->set_coefficient(0.004);
   sgd.Setup(opt_conf);
-  sgd.SetLearningRateGenerator([lr](int step) {
+  sgd.SetLearningRateGenerator([](int step) {
     if (step <= 120)
       return 0.001;
     else if (step <= 130)
@@ -193,14 +193,11 @@ int main(int argc, char **argv) {
   int pos = singa::ArgPos(argc, argv, "-epoch");
   int nEpoch = 1;
   if (pos != -1) nEpoch = atoi(argv[pos + 1]);
-  pos = singa::ArgPos(argc, argv, "-lr");
-  float lr = 0.001;
-  if (pos != -1) lr = atof(argv[pos + 1]);
   pos = singa::ArgPos(argc, argv, "-data");
   string data = "cifar-10-batches-bin";
   if (pos != -1) data = argv[pos + 1];
 
   LOG(INFO) << "Start training";
-  singa::Train(lr, nEpoch, data);
+  singa::Train(nEpoch, data);
   LOG(INFO) << "End training";
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index ddad1d5..dae129f 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -20,9 +20,6 @@ Following the same setting for hyper-parameters and data pre-processing, the fin
 validation accuracy would be about 82%.
 '''
 
-import sys
-import os
-
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
 from singa import initializer
@@ -39,18 +36,18 @@ def create_net(use_cpu=False):
     W0_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.0001}
     W1_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01}
     W2_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01, 'decay_mult': 250}
-    b_specs = {'init': 'constant', 'value': 0, 'lt_mult': 2}
+    b_specs = {'init': 'constant', 'value': 0, 'lr_mult': 2, 'decay_mult': 0}
     net.add(layer.Conv2D('conv1', 32, 5, 1, W_specs=W0_specs.copy(), b_specs=b_specs.copy(), pad=2, input_sample_shape=(3,32,32,)))
     net.add(layer.MaxPooling2D('pool1', 3, 2, pad=1))
     net.add(layer.Activation('relu1'))
-    net.add(layer.LRN(name='lrn1'))
+    net.add(layer.LRN(name='lrn1', size=3, alpha=5e-5))
     net.add(layer.Conv2D('conv2', 32, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
     net.add(layer.Activation('relu2'))
-    net.add(layer.MaxPooling2D('pool2', 3, 2,  pad=1))
-    net.add(layer.LRN('lrn2'))
+    net.add(layer.AvgPooling2D('pool2', 3, 2,  pad=1))
+    net.add(layer.LRN('lrn2', size=3, alpha=5e-5))
     net.add(layer.Conv2D('conv3', 64, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
     net.add(layer.Activation('relu3'))
-    net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
+    net.add(layer.AvgPooling2D('pool3', 3, 2, pad=1))
     net.add(layer.Flatten('flat'))
     net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
     for (p, specs) in zip(net.param_values(), net.param_specs()):

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index de03750..2091ee5 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -22,7 +22,6 @@ includes 1 label & 3072 pixels.  3072 pixels are 3 channels of a 32x32 image
 import cPickle
 import numpy as np
 import os
-import sys
 import argparse
 
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
@@ -84,7 +83,7 @@ def normalize_for_alexnet(train_x, test_x):
 
 
 def vgg_lr(epoch):
-    return 0.01 / float(1 << ((epoch / 30)))
+    return 0.1 / float(1 << ((epoch / 25)))
 
 
 def alexnet_lr(epoch):
@@ -92,7 +91,7 @@ def alexnet_lr(epoch):
         return 0.001
     elif epoch < 130:
         return 0.0001
-    elif epoch < 140:
+    else:
         return 0.00001
 
 
@@ -107,8 +106,8 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
         dev = device.create_cuda_gpu()
 
     net.to_device(dev)
-    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
-    for (p, specs) in zip(net.param_values(), net.param_specs()):
+    opt = optimizer.SGD(momentum=0.9, decay=weight_decay)
+    for (p, specs) in zip(net.param_names(), net.param_specs()):
         opt.register(p, specs)
 
     tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
@@ -129,13 +128,13 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
             grads, (l, a) = net.train(tx, ty)
             loss += l
             acc += a
-            for (s, p, g) in zip(net.param_specs(), net.param_values(), grads):
-                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name))
+            for (s, p, g) in zip(net.param_names(), net.param_values(), grads):
+                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s))
             # update progress bar
             utils.update_progress(b * 1.0 / num_train_batch,
                                   'training loss = %f, accuracy = %f' % (l, a))
-        info = '\ntraining loss = %f, training accuracy = %f' \
-            % (loss / num_train_batch, acc / num_train_batch)
+        info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \
+            % (loss / num_train_batch, acc / num_train_batch, get_lr(epoch))
         print info
 
         loss, acc = 0.0, 0.0
@@ -167,7 +166,7 @@ if __name__ == '__main__':
     if args.model == 'alexnet':
         train_x, test_x = normalize_for_alexnet(train_x, test_x)
         net = alexnet.create_net(args.use_cpu)
-        train((train_x, train_y, test_x, test_y), net, 140, alexnet_lr, 0.004,
+        train((train_x, train_y, test_x, test_y), net, 160, alexnet_lr, 0.004,
               use_cpu=args.use_cpu)
     else:
         train_x, test_x = normalize_for_vgg(train_x, test_x)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/model/feed_forward_net.cc
----------------------------------------------------------------------
diff --git a/src/model/feed_forward_net.cc b/src/model/feed_forward_net.cc
index 514d6e2..3875430 100644
--- a/src/model/feed_forward_net.cc
+++ b/src/model/feed_forward_net.cc
@@ -206,8 +206,8 @@ const std::pair<float, float> FeedForwardNet::TrainOnBatch(int epoch,
 
 const Tensor FeedForwardNet::Forward(int flag, const Tensor& data) {
   Tensor input = data, output;
+  // LOG(INFO) << data.L1();
   for (auto layer : layers_) {
-    //    LOG(INFO) << layer->name() << ": " << input.L1();
     output = layer->Forward(flag, input);
     // LOG(INFO) << layer->name() << ": " << output.L2();
     input = output;
@@ -220,13 +220,13 @@ const vector<Tensor> FeedForwardNet::Backward(int flag, const Tensor& grad) {
   std::stack<Tensor> buf;
   Tensor tmp = grad;
   for (int i = layers_.size() - 1; i >= 0; i--) {
-    //   LOG(INFO) << layers_.at(i)->name() << " : " << tmp.L1();
+    // LOG(INFO) << layers_.at(i)->name() << " : " << tmp.L1();
     auto ret = layers_.at(i)->Backward(flag, tmp);
     tmp = ret.first;
     if (ret.second.size()) {
       for (int k = ret.second.size() - 1; k >= 0; k--) {
         buf.push(ret.second[k]);
-        //       LOG(INFO) <<  "      " << buf.top().L1();
+        // LOG(INFO) <<  "      " << buf.top().L1();
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/model/optimizer/sgd.cc
----------------------------------------------------------------------
diff --git a/src/model/optimizer/sgd.cc b/src/model/optimizer/sgd.cc
index d78d5b8..ac453cd 100644
--- a/src/model/optimizer/sgd.cc
+++ b/src/model/optimizer/sgd.cc
@@ -33,6 +33,7 @@ void SGD::Setup(const OptimizerConf& conf) {
 // value = value - history
 void SGD::Apply(int step, float lr, const string& name, const Tensor& grad,
                 Tensor& value) {
+  // LOG(INFO) << "param " << name  << " lr = " << lr << " grad = " << grad.L1() << " value = " << value.L1();
   if (momentum_generator_) {
     float mom = momentum_generator_(step);
     if (mom != 0) {
@@ -46,9 +47,8 @@ void SGD::Apply(int step, float lr, const string& name, const Tensor& grad,
       value -= history;
       return;
     }
-  } else {
-    Axpy(-lr, grad, &value);
   }
+  Axpy(-lr, grad, &value);
 }
 }  // namespace singa
 #endif  // SRC_MODEL_OPTIMIZER_SGD_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/__init__.py
----------------------------------------------------------------------
diff --git a/src/python/singa/__init__.py b/src/python/singa/__init__.py
index f14c8c5..e69de29 100644
--- a/src/python/singa/__init__.py
+++ b/src/python/singa/__init__.py
@@ -1,240 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-# =============================================================================
-
-'''
-This script is the main entrance for user to run singa inside a model workspace
-
-To use this script, user sudo install these dependencies: flask pillow and protobuf
-'''
-
-import sys, glob, os, random, shutil, time
-from flask import Flask, request, redirect, url_for
-import numpy as np
-import ConfigParser
-import urllib, traceback
-
-
-from argparse import ArgumentParser
-from argparse import RawDescriptionHelpFormatter
-sys.path.append(os.getcwd())
-
-__all__ = []
-__version__ = 0.1
-__date__ = '2016-07-20'
-__updated__ = '2016-07-20'
-__shortdesc__ = '''
-welcome to singa
-'''
-
-app = Flask(__name__)
-config = ConfigParser.RawConfigParser()
-service = {}
-data_path = "data_"
-parameter_path = "parameter_"
-
-debug = False
-
-class CLIError(Exception):
-    '''Generic exception to raise and log different fatal errors.'''
-    def __init__(self, msg):
-        super(CLIError).__init__(type(self))
-        self.msg = "E: %s" % msg
-    def __str__(self):
-        return self.msg
-    def __unicode__(self):
-        return self.msg
-
-def main(argv=None): # IGNORE:C0111
-    '''Command line options.'''
-
-    from . import device
-
-    if argv is None:
-        argv = sys.argv
-    else:
-        sys.argv.extend(argv)
-
-    program_name = os.path.basename(sys.argv[0])
-    program_version = "v%s" % __version__
-    program_build_date = str(__updated__)
-    program_version_message = '%%(prog)s %s (%s)' % (program_version, program_build_date)
-    program_shortdesc = __shortdesc__
-    program_license = '''%s
-
-  Created by dbsystem group on %s.
-  Copyright 2016 NUS School of Computing. All rights reserved.
-
-  Licensed under the Apache License 2.0
-  http://www.apache.org/licenses/LICENSE-2.0
-
-  Distributed on an "AS IS" basis without warranties
-  or conditions of any kind, either express or implied.
-
-USAGE
-''' % (program_shortdesc, str(__date__))
-
-    global debug
-
-    try:
-        # Setup argument parser
-        parser = ArgumentParser(description=program_license, formatter_class=RawDescriptionHelpFormatter)
-        parser.add_argument("-p", "--port", dest="port", default=5000, help="the port to listen to, default is 5000")
-        parser.add_argument("-param", "--parameter", dest="parameter",  help="the parameter file path to be loaded")
-        parser.add_argument("-D", "--debug", dest="debug", action="store_true", help="whether need to debug")
-        parser.add_argument("-R", "--reload", dest="reload_data", action="store_true", help="whether need to reload data")
-        parser.add_argument("-C", "--cpu", dest="use_cpu", action="store_true", help="Using cpu or not, default is using gpu")
-        parser.add_argument("-m", "--mode", dest="mode", choices=['train','test','serve'], default='serve', help="On Which mode (train,test,serve) to run singa")
-        parser.add_argument('-V', '--version', action='version', version=program_version_message)
-
-        # Process arguments
-        args = parser.parse_args()
-
-        port = args.port
-        parameter_file = args.parameter
-        mode = args.mode
-        need_reload = args.reload_data
-        use_cpu = args.use_cpu
-        debug = args.debug
-
-        #prepare data files
-        config.read('file.cfg')
-        file_prepare(need_reload)
-
-
-        import network as net
-        model = net.create()
-
-        #load parameter
-        parameter_file=get_parameter(parameter_file)
-
-        if parameter_file:
-            print "load parameter file: %s" % parameter_file
-            model.load(parameter_file)
-
-        if use_cpu:
-            raise CLIError("Currently cpu is not support!")
-        else:
-            print "runing with gpu"
-            d = device.create_cuda_gpu()
-
-        model.to_device(d)
-
-        if mode == "serve":
-            print "runing singa in serve mode, listen to  port: %s " % port
-            global service
-            from serve import Service
-            service =Service(model,d)
-
-            app.debug = debug
-            app.run(host='0.0.0.0', port= port)
-        elif mode == "train":
-            print "runing singa in train mode"
-            global trainer
-            from train import Trainer
-            trainer= Trainer(model,d)
-            if not parameter_file:
-                trainer.initialize()
-            trainer.train()
-        else:
-            raise CLIError("Currently only serve mode is surpported!")
-        return 0
-    except KeyboardInterrupt:
-        ### handle keyboard interrupt ###
-        return 0
-    except Exception, e:
-        if debug:
-            traceback.print_exc()
-            raise(e)
-        indent = len(program_name) * " "
-        sys.stderr.write(program_name + ": " + str(e) + "\n")
-        sys.stderr.write(indent + "  for help use --help \n\n")
-        return 2
-
-def file_prepare(reload_data=False):
-    '''
-        download all files and generate data.py
-    '''
-    if not reload_data and os.path.exists("data_.py"):
-        return
-
-    print "download file"
-    #clean data
-    shutil.rmtree("data_.py",ignore_errors=True)
-    shutil.rmtree("data_",ignore_errors=True)
-
-    data_py=open("data_.py",'w')
-    data_py.write("#%s" % "This file is Generated by SINGA, please don't edit\n\n")
-    if config.has_section("data"):
-        file_list = config.items("data")
-        #download files
-        for f in file_list:
-            name,path=download_file(f[0],f[1],data_path)
-            data_py.write("%s=\"%s\"\n" % (name,path))
-
-    data_py.flush()
-    data_py.close()
-
-    if config.has_section("parameter"):
-        parameter_list = config.items("parameter")
-        for p in parameter_list:
-            download_file(p[0],p[1],parameter_path)
-
-def download_file(name,path,dest):
-    '''
-    download one file to dest
-    '''
-    if not os.path.exists(dest):
-        os.makedirs(dest)
-    if (path.startswith('http')):
-        file_name = path.split('/')[-1]
-        target = os.path.join(dest,file_name)
-        urllib.urlretrieve(path,target)
-    return name,target
-
-
-def get_parameter(file_name=None):
-    '''
-    get the paticular file name or get the last parameter file
-    '''
-    if not os.path.exists(parameter_path):
-        os.makedirs(parameter_path)
-        return
-
-    if file_name:
-	return os.path.join(parameter_path,file_name)
-
-    parameter_list = [ os.path.join(parameter_path,f) for f in os.listdir(parameter_path)]
-    if len(parameter_list)==0:
-        return
-    parameter_list.sort()
-
-    return parameter_list[-1]
-
-@app.route("/")
-def index():
-    return "Hello SINGA User!"
-
-@app.route('/predict', methods=['POST'])
-def predict():
-    if request.method == 'POST':
-        try:
-            response=service.serve(request)
-        except Exception as e:
-            return e
-        return response
-    return "error, should be post request"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index c8c8c05..1e9caeb 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -362,8 +362,8 @@ class BatchNormalization(Layer):
 
 
 class LRN(Layer):
-    def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel',
-                 k=1, input_sample_shape=None):
+    def __init__(self, name, size=5, alpha=1e-4, beta=0.75,
+                 mode='cross_channel', k=1, input_sample_shape=None):
         """Local response normalization.
 
         Args:
@@ -391,7 +391,7 @@ class Dense(Layer):
 
     def __init__(self, name, num_output, use_bias=True,
                  W_specs=None, b_specs=None,
-                 W_transpose=True, input_sample_shape=None):
+                 W_transpose=False, input_sample_shape=None):
         """Apply linear/affine transformation, also called inner-product or
         fully connected layer.
 
@@ -424,10 +424,10 @@ class Dense(Layer):
             W_specs['name'] = name + '_weight'
         if 'name' not in b_specs:
             b_specs['name'] = name + '_bias'
-        self.conf.param.extend([_construct_param_specs_from_dict(W_specs)])
-        self.param_specs.append(_construct_param_specs_from_dict(W_specs))
-        self.conf.param.extend([_construct_param_specs_from_dict(b_specs)])
-        self.param_specs.append(_construct_param_specs_from_dict(b_specs))
+        wspecs = _construct_param_specs_from_dict(W_specs)
+        bspecs = _construct_param_specs_from_dict(b_specs)
+        self.conf.param.extend([wspecs, bspecs])
+        self.param_specs.extend([wspecs, bspecs])
         # dense layer is transparent to engine.
         self.layer = _create_layer('singa', 'Dense')
         if input_sample_shape is not None:
@@ -712,6 +712,7 @@ def _construct_param_specs_from_dict(specs):
         a ParamSpec object
     """
     conf = model_pb2.ParamSpec()
+    print 'convert', specs
     if 'name' in specs:
         conf.name = specs['name']
     if 'lr_mult' in specs:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/net.py
----------------------------------------------------------------------
diff --git a/src/python/singa/net.py b/src/python/singa/net.py
index f040378..3a1732c 100644
--- a/src/python/singa/net.py
+++ b/src/python/singa/net.py
@@ -95,16 +95,22 @@ class FeedForwardNet(object):
         # print x.l1()
         for lyr in self.layers:
             x = lyr.forward(flag, x)
-        #    print lyr.name, x.l1()
+            # print lyr.name, x.l1()
         return x
 
     def backward(self):
         grad = self.loss.backward()
+        if len(grad.shape) > 1:
+            grad /= grad.shape[0]  # average across the batch
+        # print 'grad', grad.l1()
         pgrads = []
         for lyr in reversed(self.layers):
             grad, _pgrads = lyr.backward(kTrain, grad)
+            # disp = '%f ' % grad.l1()
             for g in reversed(_pgrads):
                 pgrads.append(g)
+                # disp = disp + ', %f ' % g.l1()
+            # print disp
         return reversed(pgrads)
 
     def save(self, f):

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index aa6bdd1..32f03d4 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -102,16 +102,19 @@ class Optimizer(object):
             name (str): parameter name
             specs (ParamSpec): protobuf obj
         """
-	assert type(specs) == model_pb2.ParamSpec, \
-		'specs should be model_pb2.ParamSpec instance'
+        assert type(specs) == model_pb2.ParamSpec, \
+            'specs should be model_pb2.ParamSpec instance'
         if specs.HasField('regularizer'):
             self.regularizers[name] = CppRegularizer(specs.regularizer)
+        elif specs.decay_mult != 1:
+            self.regularizers[name] = L2Regularizer(
+                specs.decay_mult * self.regularizer.coefficient)
+
         if specs.HasField('constraint'):
             self.constraints[name] = CppConstraint(specs.constraint)
+
         if specs.lr_mult != 1:
             self.learning_rate_multiplier[name] = specs.lr_mult
-        if specs.decay_mult != 1:
-            self.decay_multiplier[name] = specs.decay_mult
 
     def apply_regularizer_constraint(self, value, grad, name=None, step=None):
         """Apply regularization and constraint if available.
@@ -129,12 +132,12 @@ class Optimizer(object):
             the updated gradient Tensor
         """
         if name is not None and name in self.constraints:
-            self.constraints[name].apply(value, grad, step)
+            self.constraints[name].apply(step, value, grad)
         elif self.constraint is not None:
             self.constraint.apply(step, value, grad)
 
         if name is not None and name in self.regularizers:
-            self.regularizers[name].apply(value, grad, step)
+            self.regularizers[name].apply(step, value, grad)
         elif self.regularizer is not None:
             self.regularizer.apply(step, value, grad)
         return grad
@@ -175,24 +178,29 @@ class Optimizer(object):
         assert self.lr_gen is not None, 'Learning rate generator is not set.'\
             'Either set the lr_gen in constructor or call apply_with_lr'
         lr = self.lr_gen(step)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
         return self.apply_with_lr(step, lr, grad, value, name)
 
 
 class SGD(Optimizer):
 
-    def __init__(self, lr=None, momentum=None, decay=None, **kwargs):
+    def __init__(self, lr=None, momentum=None, decay=None):
         """The vallina Stochasitc Gradient Descent algorithm.
 
         See the base Optimizer for all arguments.
         """
         super(SGD, self).__init__(lr, momentum, decay)
         conf = model_pb2.OptimizerConf()
-        conf.momentum = momentum
+        if momentum is not None:
+            conf.momentum = momentum
         self.opt = singa.CreateOptimizer('SGD')
         self.opt.Setup(conf.SerializeToString())
 
     def apply_with_lr(self, step, lr, grad, value, name):
-        self.apply_regularizer_constraint(step, value, grad, name)
+        self.apply_regularizer_constraint(value, grad, name, step)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
         self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 
@@ -206,6 +214,8 @@ class Nesterov(Optimizer):
         """
         super(Nesterov, self).__init__(lr, momentum, decay, kwargs)
         conf = model_pb2.OptimizerConf()
+        if momentum is not None:
+            conf.momentum = momentum
         self.opt = singa.CreateOptimizer('Nesterov')
         self.opt.Setup(conf.SerializeToString())
 
@@ -232,6 +242,8 @@ class AdaGrad(Optimizer):
 
     def apply_with_lr(self, step, lr, grad, value, name):
         grad = self.apply_regularizer_constraint(step, value, grad, name)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
         self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
@@ -255,6 +267,8 @@ class RMSProp(Optimizer):
 
     def apply_with_lr(self, step, lr, grad, value, name):
         grad = self.apply_regularizer_constraint(step, value, grad, name)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
         self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
@@ -300,7 +314,9 @@ class L2Regularizer(Regularizer):
         if coefficient is None:
             assert self.coefficient is not None, 'Must set the coefficient'
             coefficient = self.coefficient
-        tensor.axpy(coefficient, value, grad)
+        # print coefficient, value.l1(), grad.l1()
+        if coefficient != 0:
+            tensor.axpy(coefficient, value, grad)
         return grad
 
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6d4539ee/src/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py
index ed651e9..1d04cdf 100644
--- a/src/python/singa/tensor.py
+++ b/src/python/singa/tensor.py
@@ -177,28 +177,28 @@ class Tensor(object):
         if isinstance(x, Tensor):
             self.singa_tensor += x.singa_tensor
         else:
-            self.singa_tensor += x
+            self.singa_tensor += float(x)
         return self
 
     def __isub__(self, x):
         if isinstance(x, Tensor):
             self.singa_tensor -= x.singa_tensor
         else:
-            self.singa_tensor -= x
+            self.singa_tensor -= float(x)
         return self
 
     def __imul__(self, x):
         if isinstance(x, Tensor):
             self.singa_tensor *= x.singa_tensor
         else:
-            self.singa_tensor *= x
+            self.singa_tensor *= float(x)
         return self
 
     def __idiv__(self, x):
         if isinstance(x, Tensor):
             self.singa_tensor /= x.singa_tensor
         else:
-            self.singa_tensor /= x
+            self.singa_tensor /= float(x)
         return self
 
     '''


[18/22] incubator-singa git commit: SINGA-223 Use Sphinx to create the website.

Posted by wa...@apache.org.
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/community/mail-lists.rst
----------------------------------------------------------------------
diff --git a/doc/en/community/mail-lists.rst b/doc/en/community/mail-lists.rst
new file mode 100644
index 0000000..02b39de
--- /dev/null
+++ b/doc/en/community/mail-lists.rst
@@ -0,0 +1,10 @@
+Project Mailing Lists
+=====================
+
+These are the mailing lists that have been established for this project. For each list, there is a subscribe, unsubscribe, and an archive link.
+
+.. csv-table:: Mailing Lists
+	:header: "Name", "Post", "Subscribe", "Unsubscribe", "Archive"
+
+        "Development", "dev@singa.incubator.apache.org", "`Subscribe <ma...@singa.incubator.apache.org>`_", "`Unsubscribe <ma...@singa.incubator.apache.org.>`_", "`mail-archives.apache.org <http://mail-archives.apache.org/mod_mbox/singa-dev/>`_"
+        "Commits", "commits@singa.incubator.apache.org", "`Subscribe <ma...@singa.incubator.apache.org>`_", "`Unsubscribe <ma...@singa.incubator.apache.org>`_", "`mail-archives.apache.org  <http://mail-archives.apache.org/mod_mbox/singa-commits/>`_"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/community/source-repository.md
----------------------------------------------------------------------
diff --git a/doc/en/community/source-repository.md b/doc/en/community/source-repository.md
new file mode 100644
index 0000000..8864629
--- /dev/null
+++ b/doc/en/community/source-repository.md
@@ -0,0 +1,22 @@
+# Source Repository
+
+___
+
+This project uses [Git](http://git-scm.com/) to manage its source code. Instructions on Git use can be found at [http://git-scm.com/documentation](http://git-scm.com/documentation).
+
+## Web Access
+
+The following is a link to the online source repository.
+
+* [https://git-wip-us.apache.org/repos/asf?p=incubator-singa.git;a=summary](https://git-wip-us.apache.org/repos/asf?p=incubator-singa.git;a=summary)
+
+
+## Upstream for committers
+
+Committers need to set the upstream endpoint to the Apache git (not github) repo address, e.g.,
+
+    $ git remote add asf https://git-wip-us.apache.org/repos/asf/incubator-singa.git
+
+Then you (committer) can push your code in this way,
+
+    $ git push asf <local-branch>:<remote-branch>

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/community/team-list.rst
----------------------------------------------------------------------
diff --git a/doc/en/community/team-list.rst b/doc/en/community/team-list.rst
new file mode 100644
index 0000000..a677aff
--- /dev/null
+++ b/doc/en/community/team-list.rst
@@ -0,0 +1,64 @@
+The SINGA Team
+==============
+
+A successful project requires many people to play many roles. Some members write code or documentation, while others are valuable as testers, submitting patches and suggestions.
+
+Mentors
+-------
+
+==================   ============
+Name                 Email
+==================   ============
+Daniel Dai           daijy@apache.org
+Ted Dunning	     tdunning@apache.org
+Alan Gates	     gates@apache.org
+Thejas Nair	     thejas@apache.org
+==================   ============
+
+Developers
+----------
+
++-------------------+--------------------------------+----------------------------------------------+
+| Name              |  Email                         |  Organization                                |
++-------------------+--------------------------------+----------------------------------------------+
+|Gang Chen          |  cg@zju.edu.cn                 |   Zhejiang University                        |
++-------------------+--------------------------------+----------------------------------------------+
+| Haibo Chen        | hzchenhaibo@corp.netease.com   |  NetEase                                     |
++-------------------+--------------------------------+----------------------------------------------+
+| Anh Dinh	    |     dinhtta@apache.org	     |         National University of Singapore     |                       
++-------------------+--------------------------------+----------------------------------------------+
+| Jinyang Gao	    |     jinyang@apache.org	     |         National University of Singapore	    |
++-------------------+--------------------------------+----------------------------------------------+
+| Xing Ji	    |         jixin@comp.nus.edu.sg  |          National University of Singapore    |
++-------------------+--------------------------------+----------------------------------------------+
+| Chonho Lee	    |  chonho@gmail.com              |   National University of Singapore           |
++-------------------+--------------------------------+----------------------------------------------+
+| Zhaojing Luo	    | zhaojing@apache.org	     | National University of Singapore	            |
++-------------------+--------------------------------+----------------------------------------------+
+| Beng Chin Ooi	    | ooibc@comp.nus.edu.sg          | National University of Singapore	            |
++-------------------+--------------------------------+----------------------------------------------+
+| Kian-Lee Tan	    |    tankl@apache.org            | National University of Singapore	            |
++-------------------+--------------------------------+----------------------------------------------+
+|Anthony K. H. Tung |  atung@comp.nus.edu.sg         |   National University of Singapore	    |
++-------------------+--------------------------------+----------------------------------------------+
+| Ji Wang	    |         wangji@comp.nus.edu.sg |	      National University of Singapore	    |
++-------------------+--------------------------------+----------------------------------------------+
+| Sheng Wang	    |    wangsh@apache.org           | National University of Singapore	            |
++-------------------+--------------------------------+----------------------------------------------+
+| Wei Wang	    |    wangwei@apache.org	     |         National University of Singapore	    |
++-------------------+--------------------------------+----------------------------------------------+
+| Yuan Wang         |  wangyuan@corp.netease.com     |   NetEase                                    |
++-------------------+--------------------------------+----------------------------------------------+
+| Wenfeng Wu	    |     wuwf@comp.nus.edu.sg       |  National University of Singapore            |
++-------------------+--------------------------------+----------------------------------------------+
+| Zhongle Xie	    |     zhongle@apache.org	     |        National University of Singapore      |
++-------------------+--------------------------------+----------------------------------------------+
+| Meihui Zhang	    |     meihui_zhang@sutd.edu.sg   |Singapore University of Technology and Design |
++-------------------+--------------------------------+----------------------------------------------+
+| Kaiping Zheng     |     kaiping@apache.org	     |         National University of Singapore	    |
++-------------------+--------------------------------+----------------------------------------------+
+| Ming Zhong        | hzzhongming15@corp.netease.com |   Zhejiang University                        |
++-------------------+--------------------------------+----------------------------------------------+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/conf.py
----------------------------------------------------------------------
diff --git a/doc/en/conf.py b/doc/en/conf.py
new file mode 100755
index 0000000..332a0d1
--- /dev/null
+++ b/doc/en/conf.py
@@ -0,0 +1,339 @@
+# -*- coding: utf-8 -*-
+#
+# incubator-singa documentation build configuration file, created by
+# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('.'))
+sys.path.insert(1, os.path.abspath('../build/python'))
+
+# -- General configuration ------------------------------------------------
+from recommonmark.parser import CommonMarkParser
+
+source_parsers = {
+    '.md': CommonMarkParser,
+}
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+# source_suffix = ['.rst', '.md']
+source_suffix = ['.rst', '.md']
+
+# The encoding of source files.
+#
+source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'incubator-singa'
+copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
+author = u'moaz'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+#
+# The short X.Y version.
+version = u'1.0.0'
+# The full version, including alpha/beta/rc tags.
+release = u'1.0.0'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#
+# today = ''
+#
+# Else, today_fmt is used as the format for a strftime call.
+#
+# today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This patterns also effect to html_static_path and html_extra_path
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#
+# default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#
+# add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#
+# add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#
+# show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+# modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+# keep_warnings = False
+
+# If true, `todo` and `todoList` produce output, else they produce nothing.
+todo_include_todos = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+# html_theme_path = []
+
+# The name for this set of Sphinx documents.
+# "<project> v<release> documentation" by default.
+#
+# html_title = u'Singa v1.0.0'
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#
+# html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#
+html_logo = 'image/singa.png'
+
+# The name of an image file (relative to this directory) to use as a favicon of
+# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#
+# html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['../_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#
+# html_extra_path = []
+
+# If not None, a 'Last updated on:' timestamp is inserted at every page
+# bottom, using the given strftime format.
+# The empty string is equivalent to '%b %d, %Y'.
+#
+# html_last_updated_fmt = None
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+#
+# html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#
+# html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#
+# html_additional_pages = {}
+
+# If false, no module index is generated.
+#
+# html_domain_indices = True
+
+# If false, no index is generated.
+#
+# html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#
+# html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#
+html_show_sourcelink = False
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#
+# html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#
+# html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#
+# html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+# html_file_suffix = None
+
+# Language to be used for generating the HTML full-text search index.
+# Sphinx supports the following languages:
+#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
+#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
+#
+# html_search_language = 'en'
+
+# A dictionary with options for the search language support, empty by default.
+# 'ja' uses this config value.
+# 'zh' user can custom change `jieba` dictionary path.
+#
+# html_search_options = {'type': 'default'}
+
+# The name of a javascript file (relative to the configuration directory) that
+# implements a search results scorer. If empty, the default will be used.
+#
+# html_search_scorer = 'scorer.js'
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'Singadoc'
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+     # The paper size ('letterpaper' or 'a4paper').
+     #
+     # 'papersize': 'letterpaper',
+
+     # The font size ('10pt', '11pt' or '12pt').
+     #
+     # 'pointsize': '10pt',
+
+     # Additional stuff for the LaTeX preamble.
+     #
+     # 'preamble': '',
+
+     # Latex figure (float) alignment
+     #
+     # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
+     u'moaz', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#
+# latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#
+# latex_use_parts = False
+
+# If true, show page references after internal links.
+#
+# latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#
+# latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#
+# latex_appendices = []
+
+# If false, no module index is generated.
+#
+# latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
+     [author], 1)
+]
+
+# If true, show URL addresses after external links.
+#
+# man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
+     author, 'incubator-singa', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#
+# texinfo_appendices = []
+
+# If false, no module index is generated.
+#
+# texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#
+# texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#
+# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/develop/contribute-code.md
----------------------------------------------------------------------
diff --git a/doc/en/develop/contribute-code.md b/doc/en/develop/contribute-code.md
new file mode 100644
index 0000000..98e5aee
--- /dev/null
+++ b/doc/en/develop/contribute-code.md
@@ -0,0 +1,60 @@
+## How to Contribute Code
+
+_____
+
+### Coding Style
+
+The SINGA codebase follows the [Google C++ Style Guide](http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml).
+
+To check if your code follows the style, you can use the provided cpplint tool:
+    
+    $ ./tool/cpplint.py YOUR_FILE
+
+
+### JIRA format
+
+Like other Apache projects, SINGA uses JIRA to track bugs, improvements and
+other high-level discussions (e.g., system design and features).  Github pull requests are
+used for implementation discussions, e.g., code review and code merge.
+
+* Provide a descriptive Title.
+* Write a detailed Description. For bug reports, this should ideally include a
+  short reproduction of the problem. For new features, it may include a design
+  document.
+* Set [required fields](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-JIRA)
+
+### Pull Request
+
+The work flow is
+
+* Fork the [SINGA Github repository](https://github.com/apache/incubator-singa) to
+your own Github account.
+
+* Clone your fork, create a new branch (e.g., feature-foo or fixbug-foo),
+ work on it. After finishing your job,
+ [rebase](https://git-scm.com/book/en/v2/Git-Branching-Rebasing) it to the
+ current latest master and push commits to your own Github account (the new
+ branch).
+
+* Open a pull request against the master branch of apache/incubator-singa.
+The PR title should be of the form SINGA-xxxx Title, where
+SINGA-xxxx is the relevant JIRA number, and Title may be the JIRA's title or a
+more specific title describing the PR itself, for example, "SINGA-6 Implement thread-safe singleton". Detailed description can be copied from the JIRA.
+Consider identifying committers or other contributors who have worked on the
+code being changed. Find the file(s) in Github and click "Blame" to see a
+line-by-line annotation of who changed the code last.  You can add @username in
+the PR description to ping them immediately.
+Please state that the contribution is your original work and that you license
+the work to the project under the project's open source license. Further commits (e.g., bug fix)
+to your new branch will be added to this pull request automatically by Github.
+
+* Wait for one committer to review the patch. If no conflicts, the committers will merge it with
+the master branch. The merge should a) not use rebase b) disable fast forward merge c) check the 
+commit message format and test the code/feature.
+
+* If there are too many small commit messages, you will be told to squash your commits into fewer meaningful
+commits. If your commit message does not follow the format (i.e., SINGA-xxxx), you will be told to
+reword your commit message. Both changes can be done using interactive git rebase. Once you
+get the commits corrected, push them to you own github again. Your pull request 
+will be automatically updated. For details, please refer to 
+[Rebase Pull Requests](https://github.com/edx/edx-platform/wiki/How-to-Rebase-a-Pull-Request).
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/develop/contribute-docs.md
----------------------------------------------------------------------
diff --git a/doc/en/develop/contribute-docs.md b/doc/en/develop/contribute-docs.md
new file mode 100644
index 0000000..5e21a0f
--- /dev/null
+++ b/doc/en/develop/contribute-docs.md
@@ -0,0 +1,28 @@
+# How to Contribute Documentation
+
+___
+
+
+## Website
+This document gives step-by-step instructions for deploying [Singa website](http://singa.incubator.apache.org).
+
+Singa website is built by [Sphinx](http://www.sphinx-doc.org) 1.4.4 from a source tree stored in git: https://github.com/apache/incubator-singa/tree/master/doc.
+
+To install Sphinx on Ubuntu:
+
+    $ apt-get install python-sphinx
+
+To install the markdown support for Sphinx:
+
+    $ pip install recommonmark
+
+You can build the website by executing the following command from the doc folder:
+
+    $ make html
+
+The procedure for contributing documentation is the same as [contributing code](contribute-code.html).
+
+
+## CPP API
+
+To generate docs, run "doxygen" from the doc folder (Doxygen >= 1.8 recommended)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/develop/how-contribute.md
----------------------------------------------------------------------
diff --git a/doc/en/develop/how-contribute.md b/doc/en/develop/how-contribute.md
new file mode 100644
index 0000000..8687b5a
--- /dev/null
+++ b/doc/en/develop/how-contribute.md
@@ -0,0 +1,11 @@
+# How to Contribute to SINGA
+
+___
+
+As with any open source project, there are several ways you can help:
+
+* Join the [mailing list](../community/mail-lists.html) and answer other user's questions.
+* [Build Singa](../quick-start.html) for yourself, in order to fix bugs.
+* Report bugs, feature requests and other issues in the [issue tracking](../community/issue-tracking.html) application.
+* Check SINGA's [development schedule](schedule.html) and [contribute code](contribute-code.html) by providing patches.
+* [Help with the documentation](contribute-docs.html) by updating webpages that are lacking or unclear.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/develop/schedule.rst
----------------------------------------------------------------------
diff --git a/doc/en/develop/schedule.rst b/doc/en/develop/schedule.rst
new file mode 100644
index 0000000..2afe54f
--- /dev/null
+++ b/doc/en/develop/schedule.rst
@@ -0,0 +1,40 @@
+Development Schedule
+====================
+
+.. csv-table::
+	:header: "Release", "Module", "Feature", "Status"
+
+	" 0.1 Sep 2015     "," Neural Network          "," Feed forward neural network, including CNN, MLP                                                                 "," done  "
+	"                  ","                         "," RBM-like model, including RBM                                                                                   "," done   "
+	"                  ","                         "," Recurrent neural network, including standard RNN                                                                "," done   "
+	"                  ","  Architecture           "," One worker group on single node (with data partition)                                                           "," done   "
+	"                  ","                         "," Multi worker groups on single node using [Hogwild](http://www.eecs.berkeley.edu/~brecht/papers/hogwildTR.pdf)      ","done"
+	"                  ","                         "," Distributed Hogwild","done"
+	"                  ","                         "," Multi groups across nodes, like [Downpour](http://papers.nips.cc/paper/4687-large-scale-distributed-deep-networks) ","done"
+	"                  ","                         "," All-Reduce training architecture like [DeepImage](http://arxiv.org/abs/1501.02876) ","done"
+	"                  ","                         "," Load-balance among servers "," done"
+	"                  ","  Failure recovery       "," Checkpoint and restore ","done"
+	"                  ","  Tools                  "," Installation with GNU auto tools"," done"
+	"0.2 Jan 2016      "," Neural Network          "," Feed forward neural network, including AlexNet, cuDNN layers, etc."," done "
+	"                  ","                         "," Recurrent neural network, including GRULayer and BPTT","done "
+	"                  ","                         "," Model partition and hybrid partition","done"
+	"      		   "," Tools                   "," Integration with Mesos for resource management","done"
+	"         	   ","                         "," Prepare Docker images for deployment","done"
+	"              	   ","                         "," Visualization of neural net and debug information ","done"
+	"                  "," Binding                 "," Python binding for major components ","done"
+	"                  "," GPU                     "," Single node with multiple GPUs ","done"
+	"0.3 April 2016    "," GPU                     "," Multiple nodes, each with multiple GPUs","done"
+	"                  ","                         "," Heterogeneous training using both GPU and CPU [CcT](http://arxiv.org/abs/1504.04343)","done"
+	"                  ","                         "," Support cuDNN v4 "," done"
+	"                  "," Installation            "," Remove dependency on ZeroMQ, CZMQ, Zookeeper for single node training","done"
+	"                  "," Updater                 "," Add new SGD updaters including Adam, AdamMax and AdaDelta","done"
+	"                  "," Binding                 "," Enhance Python binding for training","done"
+	"1.0 July 2016     "," Programming abstraction ","Tensor with linear algebra, neural net and random operations "," "
+	"                  ","                         ","Updater for distributed parameter updating ",""
+	"                  "," Optimization            "," Execution and memory optimization",""
+	"                  "," Hardware                "," Use Cuda and Cudnn for Nvidia GPU",""
+	"                  ","                         "," Use OpenCL for AMD GPU or other devices",""
+	"                  "," Cross-platform          "," To extend from Linux to MacOS and Windows",""
+	"                  "," Examples                "," Speech recognition example",""
+	"                  ","                         ","Large image models, e.g., [GoogLeNet](http://arxiv.org/abs/1409.4842), [VGG](https://arxiv.org/pdf/1409.1556.pdf) and [Residual Net](http://arxiv.org/abs/1512.03385)",""
+	"     "," Rafiki                  "," Deep learning as a service "," "

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs.rst b/doc/en/docs.rst
new file mode 100644
index 0000000..400b12a
--- /dev/null
+++ b/doc/en/docs.rst
@@ -0,0 +1,6 @@
+Documentation
+=============
+
+.. toctree::
+   docs/index
+   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/cnn.md
----------------------------------------------------------------------
diff --git a/doc/en/docs/cnn.md b/doc/en/docs/cnn.md
new file mode 100755
index 0000000..21ef1f7
--- /dev/null
+++ b/doc/en/docs/cnn.md
@@ -0,0 +1,141 @@
+#Quickstart - Cifar10 example
+Convolution neural network (CNN) is a type of feed-forward artificial neural network widely used for image classification. In this example, we will use a deep CNN model to do image classification for the [CIFAR10 dataset](http://www.cs.toronto.edu/~kriz/cifar.html).
+
+## Running instructions for CPP version
+Please refer to [Installation](installation.html) page for how to install SINGA. Currently, we CNN requires CUDNN, hence both CUDA and CUDNN should be installed and SINGA should be compiled with CUDA and CUDNN.
+
+The Cifar10 dataset could be downloaded by running
+
+    # switch to cifar10 directory
+    $ cd ../examples/cifar10
+    # download data for CPP version
+    $ python download_data.py bin
+
+'bin' is for downloading binary version of Cifar10 data.
+
+During downloading, you should see the detailed output like
+
+     Downloading CIFAR10 from http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
+     The tar file does exist. Extracting it now..
+     Finished!
+
+Now you have prepared the data for this Cifar10 example, the final step is to execute the `run.sh` script,
+
+    # in SINGA_ROOT/examples/cifar10/
+    $ ./run.sh
+
+You should see the detailed output as follows: first read the data files in order, show the statistics of training and testing data, then show the details of neural net structure with some parameter information, finally illustrate the performance details during training and validation process. The number of epochs can be specified in `run.sh` file.
+
+    Start training
+    Reading file cifar-10-batches-bin/data_batch_1.bin
+    Reading file cifar-10-batches-bin/data_batch_2.bin
+    Reading file cifar-10-batches-bin/data_batch_3.bin
+    Reading file cifar-10-batches-bin/data_batch_4.bin
+    Reading file cifar-10-batches-bin/data_batch_5.bin
+    Reading file cifar-10-batches-bin/test_batch.bin
+    Training samples = 50000, Test samples = 10000
+    conv1(32, 32, 32, )
+    pool1(32, 16, 16, )
+    relu1(32, 16, 16, )
+    lrn1(32, 16, 16, )
+    conv2(32, 16, 16, )
+    relu2(32, 16, 16, )
+    pool2(32, 8, 8, )
+    lrn2(32, 8, 8, )
+    conv3(64, 8, 8, )
+    relu3(64, 8, 8, )
+    pool3(64, 4, 4, )
+    flat(1024, )
+    ip(10, )
+    conv1_weight : 8.09309e-05
+    conv1_bias : 0
+    conv2_weight : 0.00797731
+    conv2_bias : 0
+    conv3_weight : 0.00795888
+    conv3_bias : 0
+    ip_weight : 0.00798683
+    ip_bias : 0
+    Messages will be appended to an existed file: train_perf
+    Messages will be appended to an existed file: val_perf
+    Epoch 0, training loss = 1.828369, accuracy = 0.329420, lr = 0.001000
+    Epoch 0, val loss = 1.561823, metric = 0.420600
+    Epoch 1, training loss = 1.465898, accuracy = 0.469940, lr = 0.001000
+    Epoch 1, val loss = 1.361778, metric = 0.513300
+    Epoch 2, training loss = 1.320708, accuracy = 0.529000, lr = 0.001000
+    Epoch 2, val loss = 1.242080, metric = 0.549100
+    Epoch 3, training loss = 1.213776, accuracy = 0.571620, lr = 0.001000
+    Epoch 3, val loss = 1.175346, metric = 0.582000
+
+The training details are stored in `train_perf` file in the same directory and the validation details in `val_perf` file.
+
+
+## Running instructions for Python version
+To run CNN example in Python version, we need to compile SINGA with Python binding,
+
+    $ mkdir build && cd build
+    $ cmake -DUSE_PYTHON=ON ..
+    $ make
+
+Now download the Cifar10 dataset,
+
+    # switch to cifar10 directory
+    $ cd ../examples/cifar10
+    # download data for Python version
+    $ python download_data.py py
+
+During downloading, you should see the detailed output like
+
+     Downloading CIFAR10 from http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
+     The tar file does exist. Extracting it now..
+     Finished!
+
+Then execute the `train.py` script to build the model
+
+    $ python train.py
+
+You should see the output as follows including the details of neural net structure with some parameter information, reading data files, and the performance details during training and testing process.
+
+    (32L, 32L, 32L)
+    (32L, 16L, 16L)
+    (32L, 16L, 16L)
+    (32L, 16L, 16L)
+    (32L, 16L, 16L)
+    (32L, 16L, 16L)
+    (32L, 8L, 8L)
+    (32L, 8L, 8L)
+    (64L, 8L, 8L)
+    (64L, 8L, 8L)
+    (64L, 4L, 4L)
+    (1024L,)
+    Start intialization............
+    conv1_weight gaussian 7.938460476e-05
+    conv1_bias constant 0.0
+    conv2_weight gaussian 0.00793507322669
+    conv2_bias constant 0.0
+    conv3_weight gaussian 0.00799657031894
+    conv3_bias constant 0.0
+    dense_weight gaussian 0.00804364029318
+    dense_bias constant 0.0
+    Loading data ..................
+    Loading data file cifar-10-batches-py/data_batch_1
+    Loading data file cifar-10-batches-py/data_batch_2
+    Loading data file cifar-10-batches-py/data_batch_3
+    Loading data file cifar-10-batches-py/data_batch_4
+    Loading data file cifar-10-batches-py/data_batch_5
+    Loading data file cifar-10-batches-py/test_batch
+    Epoch 0
+    training loss = 1.881866, training accuracy = 0.306360 accuracy = 0.420000
+    test loss = 1.602577, test accuracy = 0.412200
+    Epoch 1
+    training loss = 1.536011, training accuracy = 0.441940 accuracy = 0.500000
+    test loss = 1.378170, test accuracy = 0.507600
+    Epoch 2
+    training loss = 1.333137, training accuracy = 0.519960 accuracy = 0.520000
+    test loss = 1.272205, test accuracy = 0.540600
+    Epoch 3
+    training loss = 1.185212, training accuracy = 0.574120 accuracy = 0.540000
+    test loss = 1.211573, test accuracy = 0.567600
+
+This script will call `alexnet.py` file to build the alexnet model. After the training is finished, SINGA will save the model parameters into a checkpoint file `model.bin` in the same directory. Then we can use this `model.bin` file for prediction.
+
+    $ python predict.py

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/device.rst b/doc/en/docs/device.rst
new file mode 100644
index 0000000..e79d87a
--- /dev/null
+++ b/doc/en/docs/device.rst
@@ -0,0 +1,38 @@
+Device
+=======
+
+
+The Device abstract represents any hardware device with memory and compuation units.
+All [Tensor operations](tensor.html) are scheduled by the resident device for execution.
+Tensor memory is also managed by the device's memory manager. Therefore, optimization
+of memory and execution are implemented in the Device class.
+
+Specific devices
+----------------
+Currently, SINGA has three Device implmentations,
+
+1. CudaGPU for an Nvidia GPU card which runs Cuda code
+2. CppCPU for a CPU which runs Cpp code
+3. OpenclGPU for a GPU card which runs OpenCL code
+
+
+Python API
+----------
+
+.. automodule:: singa.device
+   :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
+
+
+The following code provides examples of creating devices,
+
+.. code:: python
+
+   from singa import device
+   cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
+   host = device.get_default_device()  # get the default host device (a CppCPU)
+   ary1 = device.create_cuda_gpus(2)  # create 2 devices, starting from ID 0
+   ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
+
+
+CPP API
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/index.rst b/doc/en/docs/index.rst
new file mode 100644
index 0000000..93315de
--- /dev/null
+++ b/doc/en/docs/index.rst
@@ -0,0 +1,10 @@
+English
+=======
+
+.. toctree::
+
+   installation
+   software_stack
+   device
+   tensor
+   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/installation.md
----------------------------------------------------------------------
diff --git a/doc/en/docs/installation.md b/doc/en/docs/installation.md
new file mode 100755
index 0000000..8ab617f
--- /dev/null
+++ b/doc/en/docs/installation.md
@@ -0,0 +1,69 @@
+# Building SINGA from source
+
+## Dependencies
+
+### Required
+* Google Protobuf (>=2.5)
+* BLAS (tested with OpenBLAS >=0.2.10)
+* CUDA (tested with 6.5, 7.0 and 7.5)
+* CUDNN (v4 and v5)
+* cmake (>=2.6)
+
+Users must install the above mandatory libraries.
+Currently CUDA and CUDNN are also mandatory, but it would become optional later.
+
+### Optional
+* Glog
+* OpenCV (tested with 2.4.8)
+* LMDB (tested with 0.9)
+
+
+## Instructions
+
+Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
+
+
+    $ git clone https://github.com/apache/incubator-singa.git
+    $ cd incubator-singa/
+    # switch to dev branch
+    $ git checkout dev
+
+
+If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
+which could be downloaded as
+
+    $ git submodule init
+    $ git submodule update
+
+
+### Linux OS
+
+GCC (>=4.8.1) is required to compile SINGA on Linux OS.
+In SINGA_ROOT, execute the following commands for compiling SINGA,
+
+    $ mkdir build && cd build
+    # generate Makefile for compilation
+    $ cmake ..
+    # compile SINGA
+    $ make
+
+Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
+
+    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
+    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
+
+You can use `ccmake ..` to configure the compilation options including using
+LMDB, GLOG, etc.
+
+After compiling SINGA, you can run the unit tests by
+
+    $ ./bin/test_singa
+
+You can see all the testing cases with testing results. If SINGA passes all
+tests, then you have successfully installed SINGA. Please proceed to try the examples!
+
+
+### MacOS
+
+
+### Windows

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/neural-net.md
----------------------------------------------------------------------
diff --git a/doc/en/docs/neural-net.md b/doc/en/docs/neural-net.md
new file mode 100644
index 0000000..c10baf8
--- /dev/null
+++ b/doc/en/docs/neural-net.md
@@ -0,0 +1,327 @@
+# Neural Net
+
+---
+
+`NeuralNet` in SINGA represents an instance of user's neural net model. As the
+neural net typically consists of a set of layers, `NeuralNet` comprises
+a set of unidirectionally connected [Layer](layer.html)s.
+This page describes how to convert an user's neural net into
+the configuration of `NeuralNet`.
+
+<img src="../_static/images/model-category.png" align="center" width="200px"/>
+<span><strong>Figure 1 - Categorization of popular deep learning models.</strong></span>
+
+## Net structure configuration
+
+Users configure the `NeuralNet` by listing all layers of the neural net and
+specifying each layer's source layer names. Popular deep learning models can be
+categorized as Figure 1. The subsequent sections give details for each
+category.
+
+### Feed-forward models
+
+<div align = "left">
+<img src="../_static/images/mlp-net.png" align="center" width="200px"/>
+<span><strong>Figure 2 - Net structure of a MLP model.</strong></span>
+</div>
+
+Feed-forward models, e.g., CNN and MLP, can easily get configured as their layer
+connections are undirected without circles. The
+configuration for the MLP model shown in Figure 1 is as follows,
+
+    net {
+      layer {
+        name : 'data"
+        type : kData
+      }
+      layer {
+        name : 'image"
+        type : kImage
+        srclayer: 'data'
+      }
+      layer {
+        name : 'label"
+        type : kLabel
+        srclayer: 'data'
+      }
+      layer {
+        name : 'hidden"
+        type : kHidden
+        srclayer: 'image'
+      }
+      layer {
+        name : 'softmax"
+        type : kSoftmaxLoss
+        srclayer: 'hidden'
+        srclayer: 'label'
+      }
+    }
+
+### Energy models
+
+<img src="../_static/images/rbm-rnn.png" align="center" width="500px"/>
+<span><strong>Figure 3 - Convert connections in RBM and RNN.</strong></span>
+
+
+For energy models including RBM, DBM,
+etc., their connections are undirected (i.e., Category B). To represent these models using
+`NeuralNet`, users can simply replace each connection with two directed
+connections, as shown in Figure 3a. In other words, for each pair of connected layers, their source
+layer field should include each other's name.
+The full [RBM example](rbm.html) has
+detailed neural net configuration for a RBM model, which looks like
+
+    net {
+      layer {
+        name : "vis"
+        type : kVisLayer
+        param {
+          name : "w1"
+        }
+        srclayer: "hid"
+      }
+      layer {
+        name : "hid"
+        type : kHidLayer
+        param {
+          name : "w2"
+          share_from: "w1"
+        }
+        srclayer: "vis"
+      }
+    }
+
+### RNN models
+
+For recurrent neural networks (RNN), users can remove the recurrent connections
+by unrolling the recurrent layer.  For example, in Figure 3b, the original
+layer is unrolled into a new layer with 4 internal layers. In this way, the
+model is like a normal feed-forward model, thus can be configured similarly.
+The [RNN example](rnn.html) has a full neural net
+configuration for a RNN model.
+
+
+## Configuration for multiple nets
+
+Typically, a training job includes three neural nets for
+training, validation and test phase respectively. The three neural nets share most
+layers except the data layer, loss layer or output layer, etc..  To avoid
+redundant configurations for the shared layers, users can uses the `exclude`
+filed to filter a layer in the neural net, e.g., the following layer will be
+filtered when creating the testing `NeuralNet`.
+
+
+    layer {
+      ...
+      exclude : kTest # filter this layer for creating test net
+    }
+
+
+
+## Neural net partitioning
+
+A neural net can be partitioned in different ways to distribute the training
+over multiple workers.
+
+### Batch and feature dimension
+
+<img src="../_static/images/partition_fc.png" align="center" width="400px"/>
+<span><strong>Figure 4 - Partitioning of a fully connected layer.</strong></span>
+
+
+Every layer's feature blob is considered a matrix whose rows are feature
+vectors. Thus, one layer can be split on two dimensions. Partitioning on
+dimension 0 (also called batch dimension) slices the feature matrix by rows.
+For instance, if the mini-batch size is 256 and the layer is partitioned into 2
+sub-layers, each sub-layer would have 128 feature vectors in its feature blob.
+Partitioning on this dimension has no effect on the parameters, as every
+[Param](param.html) object is replicated in the sub-layers. Partitioning on dimension
+1 (also called feature dimension) slices the feature matrix by columns. For
+example, suppose the original feature vector has 50 units, after partitioning
+into 2 sub-layers, each sub-layer would have 25 units. This partitioning may
+result in [Param](param.html) object being split, as shown in
+Figure 4. Both the bias vector and weight matrix are
+partitioned into two sub-layers.
+
+
+### Partitioning configuration
+
+There are 4 partitioning schemes, whose configurations are give below,
+
+  1. Partitioning each singe layer into sub-layers on batch dimension (see
+  below). It is enabled by configuring the partition dimension of the layer to
+  0, e.g.,
+
+          # with other fields omitted
+          layer {
+            partition_dim: 0
+          }
+
+  2. Partitioning each singe layer into sub-layers on feature dimension (see
+  below).  It is enabled by configuring the partition dimension of the layer to
+  1, e.g.,
+
+          # with other fields omitted
+          layer {
+            partition_dim: 1
+          }
+
+  3. Partitioning all layers into different subsets. It is enabled by
+  configuring the location ID of a layer, e.g.,
+
+          # with other fields omitted
+          layer {
+            location: 1
+          }
+          layer {
+            location: 0
+          }
+
+
+  4. Hybrid partitioning of strategy 1, 2 and 3. The hybrid partitioning is
+  useful for large models. An example application is to implement the
+  [idea proposed by Alex](http://arxiv.org/abs/1404.5997).
+  Hybrid partitioning is configured like,
+
+          # with other fields omitted
+          layer {
+            location: 1
+          }
+          layer {
+            location: 0
+          }
+          layer {
+            partition_dim: 0
+            location: 0
+          }
+          layer {
+            partition_dim: 1
+            location: 0
+          }
+
+Currently SINGA supports strategy-2 well. Other partitioning strategies are
+are under test and will be released in later version.
+
+## Parameter sharing
+
+Parameters can be shared in two cases,
+
+  * sharing parameters among layers via user configuration. For example, the
+  visible layer and hidden layer of a RBM shares the weight matrix, which is configured through
+  the `share_from` field as shown in the above RBM configuration. The
+  configurations must be the same (except name) for shared parameters.
+
+  * due to neural net partitioning, some `Param` objects are replicated into
+  different workers, e.g., partitioning one layer on batch dimension. These
+  workers share parameter values. SINGA controls this kind of parameter
+  sharing automatically, users do not need to do any configuration.
+
+  * the `NeuralNet` for training and testing (and validation) share most layers
+  , thus share `Param` values.
+
+If the shared `Param` instances resident in the same process (may in different
+threads), they use the same chunk of memory space for their values. But they
+would have different memory spaces for their gradients. In fact, their
+gradients will be averaged by the stub or server.
+
+## Advanced user guide
+
+### Creation
+
+    static NeuralNet* NeuralNet::Create(const NetProto& np, Phase phase, int num);
+
+The above function creates a `NeuralNet` for a given phase, and returns a
+pointer to the `NeuralNet` instance. The phase is in {kTrain,
+kValidation, kTest}. `num` is used for net partitioning which indicates the
+number of partitions.  Typically, a training job includes three neural nets for
+training, validation and test phase respectively. The three neural nets share most
+layers except the data layer, loss layer or output layer, etc.. The `Create`
+function takes in the full net configuration including layers for training,
+validation and test.  It removes layers for phases other than the specified
+phase based on the `exclude` field in
+[layer configuration](layer.html):
+
+    layer {
+      ...
+      exclude : kTest # filter this layer for creating test net
+    }
+
+The filtered net configuration is passed to the constructor of `NeuralNet`:
+
+    NeuralNet::NeuralNet(NetProto netproto, int npartitions);
+
+The constructor creates a graph representing the net structure firstly in
+
+    Graph* NeuralNet::CreateGraph(const NetProto& netproto, int npartitions);
+
+Next, it creates a layer for each node and connects layers if their nodes are
+connected.
+
+    void NeuralNet::CreateNetFromGraph(Graph* graph, int npartitions);
+
+Since the `NeuralNet` instance may be shared among multiple workers, the
+`Create` function returns a pointer to the `NeuralNet` instance .
+
+### Parameter sharing
+
+ `Param` sharing
+is enabled by first sharing the Param configuration (in `NeuralNet::Create`)
+to create two similar (e.g., the same shape) Param objects, and then calling
+(in `NeuralNet::CreateNetFromGraph`),
+
+    void Param::ShareFrom(const Param& from);
+
+It is also possible to share `Param`s of two nets, e.g., sharing parameters of
+the training net and the test net,
+
+    void NeuralNet:ShareParamsFrom(NeuralNet* other);
+
+It will call `Param::ShareFrom` for each Param object.
+
+### Access functions
+`NeuralNet` provides a couple of access function to get the layers and params
+of the net:
+
+    const std::vector<Layer*>& layers() const;
+    const std::vector<Param*>& params() const ;
+    Layer* name2layer(string name) const;
+    Param* paramid2param(int id) const;
+
+
+### Partitioning
+
+
+#### Implementation
+
+SINGA partitions the neural net in `CreateGraph` function, which creates one
+node for each (partitioned) layer. For example, if one layer's partition
+dimension is 0 or 1, then it creates `npartition` nodes for it; if the
+partition dimension is -1, a single node is created, i.e., no partitioning.
+Each node is assigned a partition (or location) ID. If the original layer is
+configured with a location ID, then the ID is assigned to each newly created node.
+These nodes are connected according to the connections of the original layers.
+Some connection layers will be added automatically.
+For instance, if two connected sub-layers are located at two
+different workers, then a pair of bridge layers is inserted to transfer the
+feature (and gradient) blob between them. When two layers are partitioned on
+different dimensions, a concatenation layer which concatenates feature rows (or
+columns) and a slice layer which slices feature rows (or columns) would be
+inserted. These connection layers help making the network communication and
+synchronization transparent to the users.
+
+#### Dispatching partitions to workers
+
+Each (partitioned) layer is assigned a location ID, based on which it is dispatched to one
+worker. Particularly, the pointer to the `NeuralNet` instance is passed
+to every worker within the same group, but each worker only computes over the
+layers that have the same partition (or location) ID as the worker's ID.  When
+every worker computes the gradients of the entire model parameters
+(strategy-2), we refer to this process as data parallelism.  When different
+workers compute the gradients of different parameters (strategy-3 or
+strategy-1), we call this process model parallelism.  The hybrid partitioning
+leads to hybrid parallelism where some workers compute the gradients of the
+same subset of model parameters while other workers compute on different model
+parameters.  For example, to implement the hybrid parallelism in for the
+[DCNN model](http://arxiv.org/abs/1404.5997), we set `partition_dim = 0` for
+lower layers and `partition_dim = 1` for higher layers.
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/overview.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/overview.rst b/doc/en/docs/overview.rst
new file mode 100644
index 0000000..18ad62b
--- /dev/null
+++ b/doc/en/docs/overview.rst
@@ -0,0 +1,99 @@
+Introduction
+==============
+
+
+SINGA is a general distributed deep learning platform for training big deep
+learning models over large datasets. It is designed with an intuitive
+programming model based on the layer abstraction. A variety
+of popular deep learning models are supported, namely feed-forward models including
+convolutional neural networks (CNN), energy models like restricted Boltzmann
+machine (RBM), and recurrent neural networks (RNN). Many built-in layers are
+provided for users. SINGA architecture is
+sufficiently flexible to run synchronous, asynchronous and hybrid training
+frameworks.  SINGA
+also supports different neural net partitioning schemes to parallelize the
+training of large models, namely partitioning on batch dimension, feature
+dimension or hybrid partitioning.
+
+
+Goals
+-----
+
+As a distributed system, the first goal of SINGA is to have good scalability. In other
+words, SINGA is expected to reduce the total training time to achieve certain
+accuracy with more computing resources (i.e., machines).
+
+
+The second goal is to make SINGA easy to use.
+It is non-trivial for programmers to develop and train models with deep and
+complex model structures.  Distributed training further increases the burden of
+programmers, e.g., data and model partitioning, and network communication.  Hence it is essential to
+provide an easy to use programming model so that users can implement their deep
+learning models/algorithms without much awareness of the underlying distributed
+platform.
+
+Principles
+----------
+
+Scalability is a challenging research problem for distributed deep learning
+training. SINGA provides a general architecture to exploit the scalability of
+different training frameworks. Synchronous training frameworks improve the
+efficiency of one training iteration, and
+asynchronous training frameworks improve the convergence rate. Given a fixed budget
+(e.g., cluster size), users can run a hybrid framework that maximizes the
+scalability by trading off between efficiency and convergence rate.
+
+SINGA comes with a programming model designed based on the layer abstraction, which
+is intuitive for deep learning models.  A variety of
+popular deep learning models can be expressed and trained using this programming model.
+
+System overview
+---------------
+
+.. figure:: /image/sgd.png
+
+            Figure 1 - SGD flow
+
+Training a deep learning model is to find the optimal parameters involved in
+the transformation functions that generate good features for specific tasks.
+The goodness of a set of parameters is measured by a loss function, e.g.,
+`Cross-Entropy Loss <https://en.wikipedia.org/wiki/Cross_entropy>`_ . Since the
+loss functions are usually non-linear and non-convex, it is difficult to get a
+closed form solution. Typically, people use the stochastic gradient descent
+(SGD) algorithm, which randomly
+initializes the parameters and then iteratively updates them to reduce the loss
+as shown in Figure 1.
+
+.. figure:: /image/overview.png
+
+           Figure 2 - SINGA overview
+
+SGD is used in SINGA to train
+parameters of deep learning models. The training workload is distributed over
+worker and server units as shown in Figure 2. In each
+iteration, every worker calls *TrainOneBatch* function to compute
+parameter gradients. *TrainOneBatch* takes a *NeuralNet* object
+representing the neural net, and visits layers of the *NeuralNet* in
+certain order. The resultant gradients are sent to the local stub that
+aggregates the requests and forwards them to corresponding servers for
+updating. Servers reply to workers with the updated parameters for the next
+iteration.
+
+
+Job submission
+--------------
+
+To submit a job in SINGA (i.e., training a deep learning model),
+users pass the job configuration to SINGA driver in the
+`main function <programming-guide.html>`_ . The job configuration
+specifies the four major components in Figure 2,
+
+  * a `NeuralNet <neural-net.html>`_ describing the neural net structure with the detailed layer setting and their connections;
+  * a `TrainOneBatch <train-one-batch.html>`_  algorithm which is tailored for different model categories;
+  * an `Updater <updater.html>`_  defining the protocol for updating parameters at the server side;
+  * a `Cluster Topology <distributed-training.html>`_ specifying the distributed architecture of workers and servers.
+
+This process is like the job submission in Hadoop, where users configure their
+jobs in the main function to set the mapper, reducer, etc.
+In Hadoop, users can configure their jobs with their own (or built-in) mapper and reducer; in SINGA, users
+can configure their jobs with their own (or built-in) layer, updater, etc.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/software_stack.md
----------------------------------------------------------------------
diff --git a/doc/en/docs/software_stack.md b/doc/en/docs/software_stack.md
new file mode 100644
index 0000000..c60b6a5
--- /dev/null
+++ b/doc/en/docs/software_stack.md
@@ -0,0 +1,99 @@
+# Software Stack
+
+SINGA's software stack includes three major components, namely, core, IO and
+model. Figure 1 illustrates these components together with the hardware.
+The core component provides memory management and tensor operations;
+IO has classes for reading (and writing) data from (to) disk and network; The
+model component provides data structures and algorithms for machine learning models,
+e.g., layers for neural network models, optimizers/initializer/metric/loss for
+general machine learning models.
+
+
+<img src="../_static/images/singav1-sw.png" align="center" width="500px"/>
+<br/>
+<span><strong>Figure 1 - SINGA V1 software stack.</strong></span>
+
+## Core
+
+[Tensor](tensor.html) and [Device](device.html) are two core abstractions in SINGA. Tensor class represents a
+multi-dimensional array, which stores model variables and provides linear algebra
+operations for machine learning
+algorithms, including matrix multiplication and random functions. Each tensor
+instance (i.e. a tensor) is allocated on a Device instance.
+Each Device instance (i.e. a device) is created against one hardware device,
+e.g. a GPU card or a CPU core. Devices manage the memory of tensors and execute
+tensor operations on its execution units, e.g. CPU threads or CUDA streams.
+
+Depending on the hardware and the programming language, SINGA have implemented
+the following specific device classes:
+
+* **CudaGPU** represents an Nvidia GPU card. The execution units are the CUDA streams.
+* **CppCPU** represents a normal CPU. The execution units are the CPU threads.
+* **OpenclGPU** represents normal GPU card from both Nvidia and AMD.
+  The execution units are the CommandQueues. Given that OpenCL is compatible with
+  many hardware devices, e.g. FPGA and ARM, the OpenclGPU has the potential to be
+  extended for other devices.
+
+Different types of devices use different programming languages to write the kernel
+functions for tensor operations,
+
+* CppMath (tensor_math_cpp.h) implements the tensor operations using Cpp for CppCPU
+* CudaMath (tensor_math_cuda.h) implements the tensor operations using CUDA for CudaGPU
+* OpenclMath (tensor_math_opencl.h) implements the tensor operations using OpenCL for OpenclGPU
+
+In addition, different types of data, such as float32 and float16, could be supported by adding
+the corresponding tensor functions.
+
+Typically, users would create a device instance and pass it to create multiple
+tensor instances. When users call the Tensor functions, these function would invoke
+the corresponding implementation (CppMath/CudaMath/OpenclMath) automatically. In
+other words, the implementation of Tensor operations is transparent to users.
+
+Most machine learning algorithms could be expressed using (dense or sparse) tensors.
+Therefore, with the Tensor abstraction, SINGA would be able to run a wide range of models,
+including deep learning models and other traditional machine learning models.
+
+The Tensor and Device abstractions are extensible to support a wide range of hardware device
+using different programming languages. A new hardware device would be supported by
+adding a new Device subclass and the corresponding implementation of the Tensor
+operations (xxxMath).
+
+Optimizations in terms of speed and memory could be implemented by Device, which
+manages both operation execution and memory malloc/free. More optimization details
+would be described in the [Device page](device.html).
+
+
+## Model
+
+On top of the Tensor and Device abstractions, SINGA provides some higher level
+classes for machine learning modules.
+
+* [Layer](layer.html) and its subclasses are specific for neural networks. Every layer provides
+  functions for forward propagating features and backward propagating gradients w.r.t the training loss functions.
+  They wraps the complex layer operations so that users can easily create neural nets
+  by connecting a set of layers.
+
+* [Initializer](initializer.html) and its subclasses provide variant methods of initializing
+  model parameters (stored in Tensor instances), following Uniform, Gaussian, etc.
+
+* [Loss](loss.html) and its subclasses defines the training objective loss functions.
+  Both functions of computing the loss values and computing the gradient of the prediction w.r.t the
+  objective loss are implemented. Example loss functions include squared error and cross entropy.
+
+* [Metric](metric.html) and its subclasses provide the function to measure the
+  performance of the model, e.g., the accuracy.
+
+* [Optimizer](optimizer.html) and its subclasses implement the methods for updating
+  model parameter values using parameter gradients, including SGD, AdaGrad, RMSProp etc.
+
+
+## IO
+
+The IO module consists of classes for data loading, data preprocessing and message passing.
+
+* Reader and its subclasses load string records from disk files
+* Writer and its subclasses write string records to disk files
+* Encoder and its subclasses encode Tensor instances into string records
+* Decoder and its subclasses decodes string records into Tensor instances
+* Endpoint represents a communication endpoint which provides functions for passing messages to each other.
+* Message represents communication messages between Endpoint instances. It carries both meta data and payload.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/tensor.rst b/doc/en/docs/tensor.rst
new file mode 100644
index 0000000..87d26ea
--- /dev/null
+++ b/doc/en/docs/tensor.rst
@@ -0,0 +1,54 @@
+Tensor
+========
+
+Each Tensor instance is a multi-dimensional array allocated on a specific
+Device instance. Tensor instances store variables and provide
+linear algebra operations over different types of hardware devices without user
+awareness. Note that users need to make sure the tensor operands are
+allocated on the same device except copy functions.
+
+
+Tensor implementation
+---------------------
+
+SINGA has three different sets of implmentations of Tensor functions, one for each
+type of Device.
+
+* 'tensor_math_cpp.h' implements operations using Cpp (with CBLAS) for CppGPU devices.
+* 'tensor_math_cuda.h' implements operations using Cuda (with cuBLAS) for CudaGPU devices.
+* 'tensor_math_opencl.h' implements operations using OpenCL for OpenclGPU devices.
+
+Python API
+----------
+
+There are two set of tensor functions,
+1. Tensor member functions, which would change the internal state of the Tensor instance.
+2. tensor module functions, which accepts Tensor instances as arguments and return
+Tensor instances.
+
+
+Create Tensor instances
+~~~~~~~~~~~~~~~~~~~~~~~
+
+.. autoclass:: singa.tensor.Tensor
+
+
+Tensor instances can be constructed from Numpy array,
+
+.. automodule:: singa.tensor
+   :members: from_numpy
+
+
+Set Tensor values
+~~~~~~~~~~~~~~~~~
+
+
+
+
+
+
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/downloads.md
----------------------------------------------------------------------
diff --git a/doc/en/downloads.md b/doc/en/downloads.md
new file mode 100644
index 0000000..31e7274
--- /dev/null
+++ b/doc/en/downloads.md
@@ -0,0 +1,67 @@
+## Download SINGA
+---
+
+* Latest code: please clone the dev branch from [Github](https://github.com/apache/incubator-singa)
+
+* v0.3.0 (20 April 2016):
+    * [Apache SINGA 0.3.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/0.3.0/apache-singa-incubating-0.3.0.tar.gz)
+      [\[MD5\]](https://dist.apache.org/repos/dist/release/incubator/singa/0.3.0/apache-singa-incubating-0.3.0.tar.gz.md5)
+      [\[KEYS\]](https://dist.apache.org/repos/dist/release/incubator/singa/0.3.0/KEYS)
+    * [Release Notes 0.3.0](releases/RELEASE_NOTES_0.3.0.html)
+    * New features and major updates,
+        * [Training on GPU cluster](v0.3.0/gpu.html) enables training of deep learning models over a GPU cluster.
+        * [Python wrapper improvement](v0.3.0/python.html) makes it easy to configure the job, including neural net and SGD algorithm.
+        * [New SGD updaters](v0.3.0/updater.html) are added, including Adam, AdaDelta and AdaMax.
+        * [Installation](v0.3.0/installation.html) has fewer dependent libraries for single node training.
+        * Heterogeneous training with CPU and GPU.
+        * Support cuDNN V4.
+        * Data prefetching.
+        * Fix some bugs.
+
+
+
+* v0.2.0 (14 January 2016):
+    * [Apache SINGA 0.2.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/0.2.0/apache-singa-incubating-0.2.0.tar.gz)
+      [\[MD5\]](https://archive.apache.org/dist/incubator/singa/0.2.0/apache-singa-incubating-0.2.0.tar.gz.md5)
+      [\[KEYS\]](https://archive.apache.org/dist/incubator/singa/0.2.0/KEYS)
+    * [Release Notes 0.2.0](releases/RELEASE_NOTES_0.2.0.html)
+    * New features and major updates,
+        * [Training on GPU](v0.2.0/gpu.html) enables training of complex models on a single node with multiple GPU cards.
+        * [Hybrid neural net partitioning](v0.2.0/hybrid.html) supports data and model parallelism at the same time.
+        * [Python wrapper](v0.2.0/python.html) makes it easy to configure the job, including neural net and SGD algorithm.
+        * [RNN model and BPTT algorithm](v0.2.0/general-rnn.html) are implemented to support applications based on RNN models, e.g., GRU.
+        * [Cloud software integration](v0.2.0/distributed-training.html) includes Mesos, Docker and HDFS.
+        * Visualization of neural net structure and layer information, which is helpful for debugging.
+        * Linear algebra functions and random functions against Blobs and raw data pointers.
+        * New layers, including SoftmaxLayer, ArgSortLayer, DummyLayer, RNN layers and cuDNN layers.
+        * Update Layer class to carry multiple data/grad Blobs.
+        * Extract features and test performance for new data by loading previously trained model parameters.
+        * Add Store class for IO operations.
+
+
+* v0.1.0 (8 October 2015):
+    * [Apache SINGA 0.1.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/apache-singa-incubating-0.1.0.tar.gz)
+      [\[MD5\]](https://archive.apache.org/dist/incubator/singa/apache-singa-incubating-0.1.0.tar.gz.md5)
+      [\[KEYS\]](https://archive.apache.org/dist/incubator/singa/KEYS)
+    * [Amazon EC2 image](https://console.aws.amazon.com/ec2/v2/home?region=ap-southeast-1#LaunchInstanceWizard:ami=ami-b41001e6)
+    * [Release Notes 0.1.0](releases/RELEASE_NOTES_0.1.0.html)
+    * Major features include,
+        * Installation using GNU build utility
+        * Scripts for job management with zookeeper
+        * Programming model based on NeuralNet and Layer abstractions.
+        * System architecture based on Worker, Server and Stub.
+        * Training models from three different model categories, namely, feed-forward models, energy models and RNN models.
+        * Synchronous and asynchronous distributed training frameworks using CPU
+        * Checkpoint and restore
+        * Unit test using gtest
+
+**Disclaimer**
+
+Apache SINGA is an effort undergoing incubation at The Apache Software
+Foundation (ASF), sponsored by the name of Apache Incubator PMC. Incubation is
+required of all newly accepted projects until a further review indicates that
+the infrastructure, communications, and decision making process have stabilized
+in a manner consistent with other successful ASF projects. While incubation
+status is not necessarily a reflection of the completeness or stability of the
+code, it does indicate that the project has yet to be fully endorsed by the
+ASF.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/index.rst
----------------------------------------------------------------------
diff --git a/doc/en/index.rst b/doc/en/index.rst
new file mode 100755
index 0000000..50c65d7
--- /dev/null
+++ b/doc/en/index.rst
@@ -0,0 +1,109 @@
+.. Singa documentation master file, created by
+   sphinx-quickstart on Sat Jul  9 20:36:57 2016.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to Apache Singa
+=======================
+
+Recent News
+-----------
+
+* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
+
+* The **second release** is now available, 14 Jan, 2016. `Download SINGA v0.2.0 <downloads.html>`_.
+
+* SINGA will be presented at `Strata+Hadoop <http://strataconf.com/big-data-conference-sg-2015/public/schedule/detail/45123>`_ on 2 Dec, 2015
+
+* SINGA was presented at `ACM Multimedia <http://www.acmmm.org/2015/at-a-glance/>`_ Best Paper session and Open Source Software Competition session, 26-30 Oct, 2015 (`Slides <files/mm2015.ppt>`_)
+
+* The **first release** is now available, 8 Oct, 2015. `Download SINGA v0.1.0 <downloads.html>`_.
+
+* SINGA was presented at `workshop on deep learning <http://www.comp.nus.edu.sg/~dbsystem/singa/workshop>`_  held on 16 Sep, 2015
+
+* SINGA was presented at `BOSS <http://boss.dima.tu-berlin.de/>`_ of `VLDB 2015 <http://www.vldb.org/2015/>`_ at Hawaii, 4 Sep, 2015. (slides: `overview <files/singa-vldb-boss.pptx>`_, `basic <files/basic-user-guide.pptx>`_, `advanced <files/advanced-user-guide.pptx>`_)
+
+* SINGA was presented at `ADSC/I2R Deep Learning Workshop <http://adsc.illinois.edu/contact-us>`_, 25 Aug, 2015.
+
+* A tutorial on SINGA was given at VLDB summer school at Tsinghua University,  25-31 July, 2015.
+
+* A half day tutorial on SINGA was given at I2R, 29 June, 2015.
+
+* SINGA was presented at `DanaC <http://danac.org/>`_ of `SIGMOD 2015 <http://www.sigmod2015.org/index.shtml>`_ at Melbourne, 31 May - 4 June, 2015.
+
+* SINGA has been accepted by `Apache Incubator <http://incubator.apache.org/>`_, 17 March, 2015.
+
+Getting Started
+---------------
+* The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
+
+* The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
+
+* Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
+
+Documentation
+-------------
+
+* Documentations are listed `here <docs.html>`_.
+
+* Code API can be found `here <api/index.html>`_.
+
+* Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
+
+How to contribute
+----------------------
+
+* Please subscribe to our development mailing list dev-subscribe@singa.incubator.apache.org.
+
+* If you find any issues using SINGA, please report it to the `Issue Tracker <https://issues.apache.org/jira/browse/singa>`_.
+
+* You can also contact with `SINGA committers <community.html>`_ directly.
+
+More details on contributing to SINGA is described `here <develop/how-contribute.html>`_ .
+
+Citing SINGA
+------------
+
+Please cite the following two papers if you use SINGA in your research:
+
+* B. C. Ooi, K.-L. Tan, S. Wang, W. Wang, Q. Cai, G. Chen, J. Gao, Z. Luo, A. K. H. Tung, Y. Wang, Z. Xie, M. Zhang, and K. Zheng. `SINGA: A distributed deep learning platform <http://www.comp.nus.edu.sg/~ooibc/singaopen-mm15.pdf>`_. ACM Multimedia (Open Source Software Competition) 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-oss.txt>`_).
+
+* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
+
+.. toctree::
+   :hidden:
+
+   downloads
+   docs
+
+.. toctree::
+   :hidden:
+   :maxdepth: 2
+   :caption: Development
+
+   develop/schedule
+   develop/how-contribute
+   develop/contribute-code
+   develop/contribute-docs
+
+.. toctree::
+   :hidden:
+   :maxdepth: 2
+   :caption: Community
+
+   community/source-repository
+   community/mail-lists
+   community/issue-tracking
+   community/team-list
+
+
+
+License
+----------
+SINGA is released under `Apache License Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
+
+Disclaimers
+-----------
+
+Apache SINGA is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/index.rst
----------------------------------------------------------------------
diff --git a/doc/index.rst b/doc/index.rst
deleted file mode 100755
index 50c65d7..0000000
--- a/doc/index.rst
+++ /dev/null
@@ -1,109 +0,0 @@
-.. Singa documentation master file, created by
-   sphinx-quickstart on Sat Jul  9 20:36:57 2016.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
-
-Welcome to Apache Singa
-=======================
-
-Recent News
------------
-
-* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
-
-* The **second release** is now available, 14 Jan, 2016. `Download SINGA v0.2.0 <downloads.html>`_.
-
-* SINGA will be presented at `Strata+Hadoop <http://strataconf.com/big-data-conference-sg-2015/public/schedule/detail/45123>`_ on 2 Dec, 2015
-
-* SINGA was presented at `ACM Multimedia <http://www.acmmm.org/2015/at-a-glance/>`_ Best Paper session and Open Source Software Competition session, 26-30 Oct, 2015 (`Slides <files/mm2015.ppt>`_)
-
-* The **first release** is now available, 8 Oct, 2015. `Download SINGA v0.1.0 <downloads.html>`_.
-
-* SINGA was presented at `workshop on deep learning <http://www.comp.nus.edu.sg/~dbsystem/singa/workshop>`_  held on 16 Sep, 2015
-
-* SINGA was presented at `BOSS <http://boss.dima.tu-berlin.de/>`_ of `VLDB 2015 <http://www.vldb.org/2015/>`_ at Hawaii, 4 Sep, 2015. (slides: `overview <files/singa-vldb-boss.pptx>`_, `basic <files/basic-user-guide.pptx>`_, `advanced <files/advanced-user-guide.pptx>`_)
-
-* SINGA was presented at `ADSC/I2R Deep Learning Workshop <http://adsc.illinois.edu/contact-us>`_, 25 Aug, 2015.
-
-* A tutorial on SINGA was given at VLDB summer school at Tsinghua University,  25-31 July, 2015.
-
-* A half day tutorial on SINGA was given at I2R, 29 June, 2015.
-
-* SINGA was presented at `DanaC <http://danac.org/>`_ of `SIGMOD 2015 <http://www.sigmod2015.org/index.shtml>`_ at Melbourne, 31 May - 4 June, 2015.
-
-* SINGA has been accepted by `Apache Incubator <http://incubator.apache.org/>`_, 17 March, 2015.
-
-Getting Started
----------------
-* The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
-
-* The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
-
-* Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
-
-Documentation
--------------
-
-* Documentations are listed `here <docs.html>`_.
-
-* Code API can be found `here <api/index.html>`_.
-
-* Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
-
-How to contribute
-----------------------
-
-* Please subscribe to our development mailing list dev-subscribe@singa.incubator.apache.org.
-
-* If you find any issues using SINGA, please report it to the `Issue Tracker <https://issues.apache.org/jira/browse/singa>`_.
-
-* You can also contact with `SINGA committers <community.html>`_ directly.
-
-More details on contributing to SINGA is described `here <develop/how-contribute.html>`_ .
-
-Citing SINGA
-------------
-
-Please cite the following two papers if you use SINGA in your research:
-
-* B. C. Ooi, K.-L. Tan, S. Wang, W. Wang, Q. Cai, G. Chen, J. Gao, Z. Luo, A. K. H. Tung, Y. Wang, Z. Xie, M. Zhang, and K. Zheng. `SINGA: A distributed deep learning platform <http://www.comp.nus.edu.sg/~ooibc/singaopen-mm15.pdf>`_. ACM Multimedia (Open Source Software Competition) 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-oss.txt>`_).
-
-* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
-
-.. toctree::
-   :hidden:
-
-   downloads
-   docs
-
-.. toctree::
-   :hidden:
-   :maxdepth: 2
-   :caption: Development
-
-   develop/schedule
-   develop/how-contribute
-   develop/contribute-code
-   develop/contribute-docs
-
-.. toctree::
-   :hidden:
-   :maxdepth: 2
-   :caption: Community
-
-   community/source-repository
-   community/mail-lists
-   community/issue-tracking
-   community/team-list
-
-
-
-License
-----------
-SINGA is released under `Apache License Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
-
-Disclaimers
------------
-
-Apache SINGA is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/make.bat
----------------------------------------------------------------------
diff --git a/doc/make.bat b/doc/make.bat
deleted file mode 100644
index 624a328..0000000
--- a/doc/make.bat
+++ /dev/null
@@ -1,281 +0,0 @@
-@ECHO OFF
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set BUILDDIR=_build
-set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
-set I18NSPHINXOPTS=%SPHINXOPTS% .
-if NOT "%PAPER%" == "" (
-	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
-	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
-)
-
-if "%1" == "" goto help
-
-if "%1" == "help" (
-	:help
-	echo.Please use `make ^<target^>` where ^<target^> is one of
-	echo.  html       to make standalone HTML files
-	echo.  dirhtml    to make HTML files named index.html in directories
-	echo.  singlehtml to make a single large HTML file
-	echo.  pickle     to make pickle files
-	echo.  json       to make JSON files
-	echo.  htmlhelp   to make HTML files and a HTML help project
-	echo.  qthelp     to make HTML files and a qthelp project
-	echo.  devhelp    to make HTML files and a Devhelp project
-	echo.  epub       to make an epub
-	echo.  epub3      to make an epub3
-	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
-	echo.  text       to make text files
-	echo.  man        to make manual pages
-	echo.  texinfo    to make Texinfo files
-	echo.  gettext    to make PO message catalogs
-	echo.  changes    to make an overview over all changed/added/deprecated items
-	echo.  xml        to make Docutils-native XML files
-	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
-	echo.  linkcheck  to check all external links for integrity
-	echo.  doctest    to run all doctests embedded in the documentation if enabled
-	echo.  coverage   to run coverage check of the documentation if enabled
-	echo.  dummy      to check syntax errors of document sources
-	goto end
-)
-
-if "%1" == "clean" (
-	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
-	del /q /s %BUILDDIR%\*
-	goto end
-)
-
-
-REM Check if sphinx-build is available and fallback to Python version if any
-%SPHINXBUILD% 1>NUL 2>NUL
-if errorlevel 9009 goto sphinx_python
-goto sphinx_ok
-
-:sphinx_python
-
-set SPHINXBUILD=python -m sphinx.__init__
-%SPHINXBUILD% 2> nul
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.http://sphinx-doc.org/
-	exit /b 1
-)
-
-:sphinx_ok
-
-
-if "%1" == "html" (
-	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
-	goto end
-)
-
-if "%1" == "dirhtml" (
-	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
-	goto end
-)
-
-if "%1" == "singlehtml" (
-	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
-	goto end
-)
-
-if "%1" == "pickle" (
-	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the pickle files.
-	goto end
-)
-
-if "%1" == "json" (
-	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can process the JSON files.
-	goto end
-)
-
-if "%1" == "htmlhelp" (
-	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run HTML Help Workshop with the ^
-.hhp project file in %BUILDDIR%/htmlhelp.
-	goto end
-)
-
-if "%1" == "qthelp" (
-	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; now you can run "qcollectiongenerator" with the ^
-.qhcp project file in %BUILDDIR%/qthelp, like this:
-	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\Singa.qhcp
-	echo.To view the help file:
-	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\Singa.ghc
-	goto end
-)
-
-if "%1" == "devhelp" (
-	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished.
-	goto end
-)
-
-if "%1" == "epub" (
-	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The epub file is in %BUILDDIR%/epub.
-	goto end
-)
-
-if "%1" == "epub3" (
-	%SPHINXBUILD% -b epub3 %ALLSPHINXOPTS% %BUILDDIR%/epub3
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The epub3 file is in %BUILDDIR%/epub3.
-	goto end
-)
-
-if "%1" == "latex" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdf" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf
-	cd %~dp0
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "latexpdfja" (
-	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
-	cd %BUILDDIR%/latex
-	make all-pdf-ja
-	cd %~dp0
-	echo.
-	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
-	goto end
-)
-
-if "%1" == "text" (
-	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The text files are in %BUILDDIR%/text.
-	goto end
-)
-
-if "%1" == "man" (
-	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The manual pages are in %BUILDDIR%/man.
-	goto end
-)
-
-if "%1" == "texinfo" (
-	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
-	goto end
-)
-
-if "%1" == "gettext" (
-	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
-	goto end
-)
-
-if "%1" == "changes" (
-	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.The overview file is in %BUILDDIR%/changes.
-	goto end
-)
-
-if "%1" == "linkcheck" (
-	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Link check complete; look for any errors in the above output ^
-or in %BUILDDIR%/linkcheck/output.txt.
-	goto end
-)
-
-if "%1" == "doctest" (
-	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Testing of doctests in the sources finished, look at the ^
-results in %BUILDDIR%/doctest/output.txt.
-	goto end
-)
-
-if "%1" == "coverage" (
-	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Testing of coverage in the sources finished, look at the ^
-results in %BUILDDIR%/coverage/python.txt.
-	goto end
-)
-
-if "%1" == "xml" (
-	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The XML files are in %BUILDDIR%/xml.
-	goto end
-)
-
-if "%1" == "pseudoxml" (
-	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
-	goto end
-)
-
-if "%1" == "dummy" (
-	%SPHINXBUILD% -b dummy %ALLSPHINXOPTS% %BUILDDIR%/dummy
-	if errorlevel 1 exit /b 1
-	echo.
-	echo.Build finished. Dummy builder generates no files.
-	goto end
-)
-
-:end


[22/22] incubator-singa git commit: Minor updates to pass tests and run examples

Posted by wa...@apache.org.
Minor updates to pass tests and run examples


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/0a764257
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/0a764257
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/0a764257

Branch: refs/heads/dev
Commit: 0a7642576cb0df87c6f08ff00227658c0e03f69f
Parents: 72d736a
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 21:40:55 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Tue Aug 16 00:12:27 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                           |  7 +-
 doc/en/conf.py                         |  2 +-
 doc/en/docs.rst                        |  1 -
 doc/en/docs/device.rst                 |  4 +-
 doc/en/docs/index.rst                  |  9 ++-
 doc/en/docs/initializer.rst            | 12 ++++
 doc/en/docs/layer.rst                  | 14 ++++
 doc/en/docs/loss.rst                   |  7 ++
 doc/en/docs/metric.rst                 |  8 +++
 doc/en/docs/optimizer.rst              | 11 ++++
 doc/en/docs/overview.rst               | 99 -----------------------------
 doc/en/docs/tensor.rst                 | 30 +--------
 doc/en/docs/utils.rst                  |  6 ++
 doc/en/downloads.md                    |  1 -
 doc/en/index.rst                       |  9 +--
 doc/en/releases/RELEASE_NOTES_0.1.0.md | 99 +++++++++++++++++++++++++++++
 doc/en/releases/RELEASE_NOTES_0.2.0.md | 84 ++++++++++++++++++++++++
 doc/en/releases/RELEASE_NOTES_0.3.0.md | 37 +++++++++++
 doc/zh/conf.py                         |  2 +-
 examples/char-rnn/train.py             |  7 +-
 examples/cifar10/README.md             | 69 ++++++++++++++++++++
 examples/cifar10/alexnet.cc            |  2 +-
 examples/cifar10/train.py              |  2 +-
 examples/mnist/train.py                |  1 -
 src/python/singa/optimizer.py          |  2 +
 test/CMakeLists.txt                    |  2 +-
 26 files changed, 373 insertions(+), 154 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/Makefile
----------------------------------------------------------------------
diff --git a/doc/Makefile b/doc/Makefile
index b5282b7..f02595b 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -26,14 +26,9 @@ clean:
 
 .PHONY: html
 html:
-<<<<<<< HEAD
-	cp -rf ../examples docs/
-	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
-=======
 	cp -rf ../examples en/docs/
-	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) en $(BUILDDIR)/html
+	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) en $(BUILDDIR)/html/en
 	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) zh $(BUILDDIR)/html/zh
->>>>>>> v1doc
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/conf.py
----------------------------------------------------------------------
diff --git a/doc/en/conf.py b/doc/en/conf.py
index 36080d9..46a48f6 100755
--- a/doc/en/conf.py
+++ b/doc/en/conf.py
@@ -19,7 +19,7 @@
 import os
 import sys
 sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, os.path.abspath('../build/python'))
+sys.path.insert(1, os.path.abspath('../../build/python'))
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs.rst b/doc/en/docs.rst
index 400b12a..c1b143b 100644
--- a/doc/en/docs.rst
+++ b/doc/en/docs.rst
@@ -3,4 +3,3 @@ Documentation
 
 .. toctree::
    docs/index
-   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/device.rst b/doc/en/docs/device.rst
index e79d87a..53faf48 100644
--- a/doc/en/docs/device.rst
+++ b/doc/en/docs/device.rst
@@ -23,9 +23,7 @@ Python API
    :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
 
 
-The following code provides examples of creating devices,
-
-.. code:: python
+The following code provides examples of creating devices::
 
    from singa import device
    cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/index.rst b/doc/en/docs/index.rst
index 93315de..a2ea540 100644
--- a/doc/en/docs/index.rst
+++ b/doc/en/docs/index.rst
@@ -1,5 +1,5 @@
-English
-=======
+Documentation
+=============
 
 .. toctree::
 
@@ -7,4 +7,9 @@ English
    software_stack
    device
    tensor
+   layer
+   initializer
+   loss
+   metric
+   optimizer
    examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/initializer.rst b/doc/en/docs/initializer.rst
new file mode 100644
index 0000000..f334497
--- /dev/null
+++ b/doc/en/docs/initializer.rst
@@ -0,0 +1,12 @@
+Initializer
+===========
+
+Python API
+----------
+
+.. automodule:: singa.initializer
+   :members: uniform, gaussian
+   :member-order: bysource
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/layer.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/layer.rst b/doc/en/docs/layer.rst
new file mode 100644
index 0000000..62ef3c3
--- /dev/null
+++ b/doc/en/docs/layer.rst
@@ -0,0 +1,14 @@
+Layer
+======
+
+Python API
+-----------
+.. automodule:: singa.layer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/loss.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/loss.rst b/doc/en/docs/loss.rst
new file mode 100644
index 0000000..27872dd
--- /dev/null
+++ b/doc/en/docs/loss.rst
@@ -0,0 +1,7 @@
+Loss
+=========
+
+
+.. automodule:: singa.loss
+   :members:
+   :show-inheritance:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/metric.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/metric.rst b/doc/en/docs/metric.rst
new file mode 100644
index 0000000..35fa24e
--- /dev/null
+++ b/doc/en/docs/metric.rst
@@ -0,0 +1,8 @@
+Metric
+=========
+
+
+.. automodule:: singa.metric
+   :members:
+   :show-inheritance:
+   :member-order: bysource

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/optimizer.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/optimizer.rst b/doc/en/docs/optimizer.rst
new file mode 100644
index 0000000..486c01e
--- /dev/null
+++ b/doc/en/docs/optimizer.rst
@@ -0,0 +1,11 @@
+Optimizer
+=========
+
+
+.. automodule:: singa.optimizer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/overview.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/overview.rst b/doc/en/docs/overview.rst
deleted file mode 100644
index 18ad62b..0000000
--- a/doc/en/docs/overview.rst
+++ /dev/null
@@ -1,99 +0,0 @@
-Introduction
-==============
-
-
-SINGA is a general distributed deep learning platform for training big deep
-learning models over large datasets. It is designed with an intuitive
-programming model based on the layer abstraction. A variety
-of popular deep learning models are supported, namely feed-forward models including
-convolutional neural networks (CNN), energy models like restricted Boltzmann
-machine (RBM), and recurrent neural networks (RNN). Many built-in layers are
-provided for users. SINGA architecture is
-sufficiently flexible to run synchronous, asynchronous and hybrid training
-frameworks.  SINGA
-also supports different neural net partitioning schemes to parallelize the
-training of large models, namely partitioning on batch dimension, feature
-dimension or hybrid partitioning.
-
-
-Goals
------
-
-As a distributed system, the first goal of SINGA is to have good scalability. In other
-words, SINGA is expected to reduce the total training time to achieve certain
-accuracy with more computing resources (i.e., machines).
-
-
-The second goal is to make SINGA easy to use.
-It is non-trivial for programmers to develop and train models with deep and
-complex model structures.  Distributed training further increases the burden of
-programmers, e.g., data and model partitioning, and network communication.  Hence it is essential to
-provide an easy to use programming model so that users can implement their deep
-learning models/algorithms without much awareness of the underlying distributed
-platform.
-
-Principles
-----------
-
-Scalability is a challenging research problem for distributed deep learning
-training. SINGA provides a general architecture to exploit the scalability of
-different training frameworks. Synchronous training frameworks improve the
-efficiency of one training iteration, and
-asynchronous training frameworks improve the convergence rate. Given a fixed budget
-(e.g., cluster size), users can run a hybrid framework that maximizes the
-scalability by trading off between efficiency and convergence rate.
-
-SINGA comes with a programming model designed based on the layer abstraction, which
-is intuitive for deep learning models.  A variety of
-popular deep learning models can be expressed and trained using this programming model.
-
-System overview
----------------
-
-.. figure:: /image/sgd.png
-
-            Figure 1 - SGD flow
-
-Training a deep learning model is to find the optimal parameters involved in
-the transformation functions that generate good features for specific tasks.
-The goodness of a set of parameters is measured by a loss function, e.g.,
-`Cross-Entropy Loss <https://en.wikipedia.org/wiki/Cross_entropy>`_ . Since the
-loss functions are usually non-linear and non-convex, it is difficult to get a
-closed form solution. Typically, people use the stochastic gradient descent
-(SGD) algorithm, which randomly
-initializes the parameters and then iteratively updates them to reduce the loss
-as shown in Figure 1.
-
-.. figure:: /image/overview.png
-
-           Figure 2 - SINGA overview
-
-SGD is used in SINGA to train
-parameters of deep learning models. The training workload is distributed over
-worker and server units as shown in Figure 2. In each
-iteration, every worker calls *TrainOneBatch* function to compute
-parameter gradients. *TrainOneBatch* takes a *NeuralNet* object
-representing the neural net, and visits layers of the *NeuralNet* in
-certain order. The resultant gradients are sent to the local stub that
-aggregates the requests and forwards them to corresponding servers for
-updating. Servers reply to workers with the updated parameters for the next
-iteration.
-
-
-Job submission
---------------
-
-To submit a job in SINGA (i.e., training a deep learning model),
-users pass the job configuration to SINGA driver in the
-`main function <programming-guide.html>`_ . The job configuration
-specifies the four major components in Figure 2,
-
-  * a `NeuralNet <neural-net.html>`_ describing the neural net structure with the detailed layer setting and their connections;
-  * a `TrainOneBatch <train-one-batch.html>`_  algorithm which is tailored for different model categories;
-  * an `Updater <updater.html>`_  defining the protocol for updating parameters at the server side;
-  * a `Cluster Topology <distributed-training.html>`_ specifying the distributed architecture of workers and servers.
-
-This process is like the job submission in Hadoop, where users configure their
-jobs in the main function to set the mapper, reducer, etc.
-In Hadoop, users can configure their jobs with their own (or built-in) mapper and reducer; in SINGA, users
-can configure their jobs with their own (or built-in) layer, updater, etc.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/tensor.rst b/doc/en/docs/tensor.rst
index 87d26ea..ff6142e 100644
--- a/doc/en/docs/tensor.rst
+++ b/doc/en/docs/tensor.rst
@@ -21,34 +21,10 @@ type of Device.
 Python API
 ----------
 
-There are two set of tensor functions,
-1. Tensor member functions, which would change the internal state of the Tensor instance.
-2. tensor module functions, which accepts Tensor instances as arguments and return
-Tensor instances.
-
-
-Create Tensor instances
-~~~~~~~~~~~~~~~~~~~~~~~
-
-.. autoclass:: singa.tensor.Tensor
-
-
-Tensor instances can be constructed from Numpy array,
 
 .. automodule:: singa.tensor
-   :members: from_numpy
-
-
-Set Tensor values
-~~~~~~~~~~~~~~~~~
-
-
-
-
-
-
-
-
-
+   :members:
 
 
+CPP API
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/utils.rst
----------------------------------------------------------------------
diff --git a/doc/en/docs/utils.rst b/doc/en/docs/utils.rst
new file mode 100644
index 0000000..5306719
--- /dev/null
+++ b/doc/en/docs/utils.rst
@@ -0,0 +1,6 @@
+Misc.
+=========
+
+
+.. automodule:: singa.utils
+   :members:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/downloads.md
----------------------------------------------------------------------
diff --git a/doc/en/downloads.md b/doc/en/downloads.md
index 31e7274..fe0c30a 100644
--- a/doc/en/downloads.md
+++ b/doc/en/downloads.md
@@ -1,5 +1,4 @@
 ## Download SINGA
----
 
 * Latest code: please clone the dev branch from [Github](https://github.com/apache/incubator-singa)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/index.rst
----------------------------------------------------------------------
diff --git a/doc/en/index.rst b/doc/en/index.rst
index 50c65d7..1bbbe9a 100755
--- a/doc/en/index.rst
+++ b/doc/en/index.rst
@@ -2,7 +2,6 @@
    sphinx-quickstart on Sat Jul  9 20:36:57 2016.
    You can adapt this file completely to your liking, but it should at least
    contain the root `toctree` directive.
-
 Welcome to Apache Singa
 =======================
 
@@ -35,19 +34,17 @@ Recent News
 
 Getting Started
 ---------------
-* The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
+* The `Software stack <docs/software_stack.html>`_ page gives an overview of SINGA.
 
 * The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
 
-* Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
+* Please follow the `Examples <docs/examples/index.html>`_ guide to run simple applications on SINGA.
 
 Documentation
 -------------
 
 * Documentations are listed `here <docs.html>`_.
 
-* Code API can be found `here <api/index.html>`_.
-
 * Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
 
 How to contribute
@@ -74,7 +71,7 @@ Please cite the following two papers if you use SINGA in your research:
    :hidden:
 
    downloads
-   docs
+   docs/index
 
 .. toctree::
    :hidden:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/releases/RELEASE_NOTES_0.1.0.md
----------------------------------------------------------------------
diff --git a/doc/en/releases/RELEASE_NOTES_0.1.0.md b/doc/en/releases/RELEASE_NOTES_0.1.0.md
new file mode 100644
index 0000000..2674d90
--- /dev/null
+++ b/doc/en/releases/RELEASE_NOTES_0.1.0.md
@@ -0,0 +1,99 @@
+#singa-incubating-0.1.0 Release Notes
+
+---
+
+SINGA is a general distributed deep learning platform for training big deep learning models over large datasets. It is
+designed with an intuitive programming model based on the layer abstraction. SINGA supports a wide variety of popular
+deep learning models.
+
+This release includes following features:
+
+  * Job management
+    * [SINGA-3](https://issues.apache.org/jira/browse/SINGA-3)  Use Zookeeper to check stopping (finish) time of the system
+    * [SINGA-16](https://issues.apache.org/jira/browse/SINGA-16)  Runtime Process id Management
+    * [SINGA-25](https://issues.apache.org/jira/browse/SINGA-25)  Setup glog output path
+    * [SINGA-26](https://issues.apache.org/jira/browse/SINGA-26)  Run distributed training in a single command
+    * [SINGA-30](https://issues.apache.org/jira/browse/SINGA-30)  Enhance easy-to-use feature and support concurrent jobs
+    * [SINGA-33](https://issues.apache.org/jira/browse/SINGA-33)  Automatically launch a number of processes in the cluster
+    * [SINGA-34](https://issues.apache.org/jira/browse/SINGA-34)  Support external zookeeper service
+    * [SINGA-38](https://issues.apache.org/jira/browse/SINGA-38)  Support concurrent jobs
+    * [SINGA-39](https://issues.apache.org/jira/browse/SINGA-39)  Avoid ssh in scripts for single node environment
+    * [SINGA-43](https://issues.apache.org/jira/browse/SINGA-43)  Remove Job-related output from workspace
+    * [SINGA-56](https://issues.apache.org/jira/browse/SINGA-56)  No automatic launching of zookeeper service
+    * [SINGA-73](https://issues.apache.org/jira/browse/SINGA-73)  Refine the selection of available hosts from host list
+
+
+  * Installation with GNU Auto tool
+    * [SINGA-4](https://issues.apache.org/jira/browse/SINGA-4)  Refine thirdparty-dependency installation
+    * [SINGA-13](https://issues.apache.org/jira/browse/SINGA-13)  Separate intermediate files of compilation from source files
+    * [SINGA-17](https://issues.apache.org/jira/browse/SINGA-17)  Add root permission within thirdparty/install.
+    * [SINGA-27](https://issues.apache.org/jira/browse/SINGA-27)  Generate python modules for proto objects
+    * [SINGA-53](https://issues.apache.org/jira/browse/SINGA-53)  Add lmdb compiling options
+    * [SINGA-62](https://issues.apache.org/jira/browse/SINGA-62)  Remove building scrips and auxiliary files
+    * [SINGA-67](https://issues.apache.org/jira/browse/SINGA-67)  Add singatest into build targets
+
+
+  * Distributed training
+    * [SINGA-7](https://issues.apache.org/jira/browse/SINGA-7)  Implement shared memory Hogwild algorithm
+    * [SINGA-8](https://issues.apache.org/jira/browse/SINGA-8)  Implement distributed Hogwild
+    * [SINGA-19](https://issues.apache.org/jira/browse/SINGA-19)  Slice large Param objects for load-balance
+    * [SINGA-29](https://issues.apache.org/jira/browse/SINGA-29)  Update NeuralNet class to enable layer partition type customization
+    * [SINGA-24](https://issues.apache.org/jira/browse/SINGA-24)  Implement Downpour training framework
+    * [SINGA-32](https://issues.apache.org/jira/browse/SINGA-32)  Implement AllReduce training framework
+    * [SINGA-57](https://issues.apache.org/jira/browse/SINGA-57)  Improve Distributed Hogwild
+
+
+  * Training algorithms for different model categories
+    * [SINGA-9](https://issues.apache.org/jira/browse/SINGA-9)  Add Support for Restricted Boltzman Machine (RBM) model
+    * [SINGA-10](https://issues.apache.org/jira/browse/SINGA-10)  Add Support for Recurrent Neural Networks (RNN)
+
+
+  * Checkpoint and restore
+    * [SINGA-12](https://issues.apache.org/jira/browse/SINGA-12)  Support Checkpoint and Restore
+
+
+  * Unit test
+    * [SINGA-64](https://issues.apache.org/jira/browse/SINGA-64)  Add the test module for utils/common
+
+
+  * Programming model
+    * [SINGA-36](https://issues.apache.org/jira/browse/SINGA-36)  Refactor job configuration, driver program and scripts
+    * [SINGA-37](https://issues.apache.org/jira/browse/SINGA-37)  Enable users to set parameter sharing in model configuration
+    * [SINGA-54](https://issues.apache.org/jira/browse/SINGA-54)  Refactor job configuration to move fields in ModelProto out
+    * [SINGA-55](https://issues.apache.org/jira/browse/SINGA-55)  Refactor main.cc and singa.h
+    * [SINGA-61](https://issues.apache.org/jira/browse/SINGA-61)  Support user defined classes
+    * [SINGA-65](https://issues.apache.org/jira/browse/SINGA-65)  Add an example of writing user-defined layers
+
+
+  * Other features
+    * [SINGA-6](https://issues.apache.org/jira/browse/SINGA-6)  Implement thread-safe singleton
+    * [SINGA-18](https://issues.apache.org/jira/browse/SINGA-18)  Update API for displaying performance metric
+    * [SINGA-77](https://issues.apache.org/jira/browse/SINGA-77)  Integrate with Apache RAT
+
+
+Some bugs are fixed during the development of this release
+
+  * [SINGA-2](https://issues.apache.org/jira/browse/SINGA-2) Check failed: zsock_connect
+  * [SINGA-5](https://issues.apache.org/jira/browse/SINGA-5) Server early terminate when zookeeper singa folder is not initially empty
+  * [SINGA-15](https://issues.apache.org/jira/browse/SINGA-15) Fixg a bug from ConnectStub function which gets stuck for connecting layer_dealer_
+  * [SINGA-22](https://issues.apache.org/jira/browse/SINGA-22) Cannot find openblas library when it is installed in default path
+  * [SINGA-23](https://issues.apache.org/jira/browse/SINGA-23) Libtool version mismatch error.
+  * [SINGA-28](https://issues.apache.org/jira/browse/SINGA-28) Fix a bug from topology sort of Graph
+  * [SINGA-42](https://issues.apache.org/jira/browse/SINGA-42) Issue when loading checkpoints
+  * [SINGA-44](https://issues.apache.org/jira/browse/SINGA-44) A bug when reseting metric values
+  * [SINGA-46](https://issues.apache.org/jira/browse/SINGA-46) Fix a bug in updater.cc to scale the gradients
+  * [SINGA-47](https://issues.apache.org/jira/browse/SINGA-47) Fix a bug in data layers that leads to out-of-memory when group size is too large
+  * [SINGA-48](https://issues.apache.org/jira/browse/SINGA-48) Fix a bug in trainer.cc that assigns the same NeuralNet instance to workers from diff groups
+  * [SINGA-49](https://issues.apache.org/jira/browse/SINGA-49) Fix a bug in HandlePutMsg func that sets param fields to invalid values
+  * [SINGA-66](https://issues.apache.org/jira/browse/SINGA-66) Fix bugs in Worker::RunOneBatch function and ClusterProto
+  * [SINGA-79](https://issues.apache.org/jira/browse/SINGA-79) Fix bug in singatool that can not parse -conf flag
+
+
+Features planned for the next release
+
+  * [SINGA-11](https://issues.apache.org/jira/browse/SINGA-11) Start SINGA using Mesos
+  * [SINGA-31](https://issues.apache.org/jira/browse/SINGA-31) Extend Blob to support xpu (cpu or gpu)
+  * [SINGA-35](https://issues.apache.org/jira/browse/SINGA-35) Add random number generators
+  * [SINGA-40](https://issues.apache.org/jira/browse/SINGA-40) Support sparse Param update
+  * [SINGA-41](https://issues.apache.org/jira/browse/SINGA-41) Support single node single GPU training
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/releases/RELEASE_NOTES_0.2.0.md
----------------------------------------------------------------------
diff --git a/doc/en/releases/RELEASE_NOTES_0.2.0.md b/doc/en/releases/RELEASE_NOTES_0.2.0.md
new file mode 100644
index 0000000..38f498a
--- /dev/null
+++ b/doc/en/releases/RELEASE_NOTES_0.2.0.md
@@ -0,0 +1,84 @@
+#singa-incubating-0.2.0 Release Notes
+
+---
+
+SINGA is a general distributed deep learning platform for training big deep
+learning models over large datasets. It is designed with an intuitive
+programming model based on the layer abstraction. SINGA supports a wide variety
+of popular deep learning models.
+
+This release includes the following **major features**:
+
+* [Training on GPU](../docs/gpu.html) enables training of complex models on a single node with multiple GPU cards.
+* [Hybrid neural net partitioning](../docs/hybrid.html) supports data and model parallelism at the same time.
+* [Python wrapper](../docs/python.html) makes it easy to configure the job, including neural net and SGD algorithm.
+* [RNN model and BPTT algorithm](../docs/general-rnn.html) are implemented to support applications based on RNN models, e.g., GRU.
+* [Cloud software integration](../docs/distributed-training.md) includes Mesos, Docker and HDFS.
+
+
+**More details** are listed as follows,
+
+  * Programming model
+    * [SINGA-80] New Blob Level and Address Level Math Operation Interface
+    * [SINGA-82] Refactor input layers using data store abstraction
+    * [SINGA-87] Replace exclude field to include field for layer configuration
+    * [SINGA-110] Add Layer member datavec_ and gradvec_
+    * [SINGA-120] Implemented GRU and BPTT (BPTTWorker)
+
+
+  * Neuralnet layers
+    * [SINGA-91] Add SoftmaxLayer and ArgSortLayer
+    * [SINGA-106] Add dummy layer for test purpose
+    * [SINGA-120] Implemented GRU and BPTT (GRULayer and OneHotLayer)
+
+
+  * GPU training support
+    * [SINGA-100] Implement layers using CUDNN for GPU training
+    * [SINGA-104] Add Context Class
+    * [SINGA-105] Update GUN make files for compiling cuda related code
+    * [SINGA-98] Add Support for AlexNet ImageNet Classification Model
+
+
+  * Model/Hybrid partition
+    * [SINGA-109] Refine bridge layers
+    * [SINGA-111] Add slice, concate and split layers
+    * [SINGA-113] Model/Hybrid Partition Support
+
+
+  * Python binding
+    * [SINGA-108] Add Python wrapper to singa
+
+
+  * Predict-only mode
+    * [SINGA-85] Add functions for extracting features and test new data
+
+
+  * Integrate with third-party tools
+    * [SINGA-11] Start SINGA on Apache Mesos
+    * [SINGA-78] Use Doxygen to generate documentation
+    * [SINGA-89] Add Docker support
+
+
+  * Unit test
+    * [SINGA-95] Add make test after building
+
+
+  * Other improvment
+    * [SINGA-84] Header Files Rearrange
+    * [SINGA-93] Remove the asterisk in the log tcp://169.254.12.152:*:49152
+    * [SINGA-94] Move call to google::InitGoogleLogging() from Driver::Init() to main()
+    * [SINGA-96] Add Momentum to Cifar10 Example
+    * [SINGA-101] Add ll (ls -l) command in .bashrc file when using docker
+    * [SINGA-114] Remove short logs in tmp directory
+    * [SINGA-115] Print layer debug information in the neural net graph file
+    * [SINGA-118] Make protobuf LayerType field id easy to assign
+    * [SIGNA-97] Add HDFS Store
+
+
+  * Bugs fixed
+    * [SINGA-85] Fix compilation errors in examples
+    * [SINGA-90] Miscellaneous trivial bug fixes
+    * [SINGA-107] Error from loading pre-trained params for training stacked RBMs
+    * [SINGA-116] Fix a bug in InnerProductLayer caused by weight matrix sharing
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/releases/RELEASE_NOTES_0.3.0.md
----------------------------------------------------------------------
diff --git a/doc/en/releases/RELEASE_NOTES_0.3.0.md b/doc/en/releases/RELEASE_NOTES_0.3.0.md
new file mode 100644
index 0000000..c169e12
--- /dev/null
+++ b/doc/en/releases/RELEASE_NOTES_0.3.0.md
@@ -0,0 +1,37 @@
+#singa-incubating-0.3.0 Release Notes
+
+---
+
+SINGA is a general distributed deep learning platform for training big deep
+learning models over large datasets. It is designed with an intuitive
+programming model based on the layer abstraction. SINGA supports a wide variety
+of popular deep learning models.
+
+This release includes following features:
+
+  * GPU Support
+    * [SINGA-131] Implement and optimize hybrid training using both CPU and GPU
+    * [SINGA-136] Support cuDNN v4
+    * [SINGA-134] Extend SINGA to run over a GPU cluster
+    * [Singa-157] Change the priority of cudnn library and install libsingagpu.so
+
+  * Remove Dependences
+    * [SINGA-156] Remove the dependency on ZMQ for single process training
+    * [SINGA-155] Remove zookeeper for single-process training
+
+  * Python Binding
+    * [SINGA-126] Python Binding for Interactive Training
+
+  * Other Improvements
+    * [SINGA-80] New Blob Level and Address Level Math Operation Interface
+    * [SINGA-130] Data Prefetching
+    * [SINGA-145] New SGD based optimization Updaters: AdaDelta, Adam, AdamMax
+
+  * Bugs Fixed
+    * [SINGA-148] Race condition between Worker threads and Driver
+    * [SINGA-150] Mesos Docker container failed
+    * [SIGNA-141] Undesired Hash collision when locating process id to worker\u2026
+    * [SINGA-149] Docker build fail
+    * [Singa-143] The compilation cannot detect libsingagpu.so file
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/zh/conf.py
----------------------------------------------------------------------
diff --git a/doc/zh/conf.py b/doc/zh/conf.py
index 332a0d1..921a27a 100755
--- a/doc/zh/conf.py
+++ b/doc/zh/conf.py
@@ -19,7 +19,7 @@
 import os
 import sys
 sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, os.path.abspath('../build/python'))
+sys.path.insert(1, os.path.abspath('../../build/python'))
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index 83771c2..137df80 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -98,9 +98,9 @@ def get_lr(epoch):
 
 
 def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
-          num_stacks=1, lr=0.001, dropout=0.5, model_path='model.bin'):
+          num_stacks=1, dropout=0.5, model_path='model.bin'):
     # SGD with L2 gradient normalization
-    opt = optimizer.SGD(constraint=optimizer.L2Constraint(5))
+    opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))
     cuda = device.create_cuda_gpu()
     rnn = layer.LSTM(
         name='lstm',
@@ -126,7 +126,7 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
     dense_b = dense.param_values()[1]
     print 'dense w ', dense_w.shape
     print 'dense b ', dense_b.shape
-    initializer.uniform(dense_w, dense_w.shape[0], dense_w.shape[1])
+    initializer.uniform(dense_w, dense_w.shape[0], 0)
     print 'dense weight l1 = %f' % (dense_w.l1())
     dense_b.set_value(0)
     print 'dense b l1 = %f' % (dense_b.l1())
@@ -154,6 +154,7 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
                 lvalue = lossfun.forward(model_pb2.kTrain, act, label)
                 batch_loss += lvalue.l1()
                 grad = lossfun.backward()
+                grad /= batch_size
                 grad, gwb = dense.backward(model_pb2.kTrain, grad)
                 grads.append(grad)
                 g_dense_w += gwb[0]

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/cifar10/README.md
----------------------------------------------------------------------
diff --git a/examples/cifar10/README.md b/examples/cifar10/README.md
new file mode 100644
index 0000000..5333e6f
--- /dev/null
+++ b/examples/cifar10/README.md
@@ -0,0 +1,69 @@
+# Train CNN over Cifar-10
+
+
+Convolution neural network (CNN) is a type of feed-forward artificial neural
+network widely used for image and video classification. In this example, we
+will train three deep CNN models to do image classification for the CIFAR-10 dataset,
+
+1. [AlexNet](https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-18pct.cfg)
+the best validation accuracy (without data augmentation) we achieved was about 82%.
+
+2. [VGGNet](http://torch.ch/blog/2015/07/30/cifar.html), the best validation accuracy (without data augmentation) we achieved was about 89%.
+3. [ResNet](https://github.com/facebook/fb.resnet.torch), the best validation accuracy (without data augmentation) we achieved was about 83%.
+
+
+## Instructions
+
+
+### SINGA installation
+
+Users can compile and install SINGA from source or install the Python version.
+The code can ran on both CPU and GPU. For GPU training, CUDA and CUDNN (V4 or V5)
+are required. Please refer to the installation page for detailed instructions.
+
+
+
+### Training
+
+There are four training programs
+
+1. train.py. The following command would train the VGG model using the python
+version of the Cifar-10 dataset in 'cifar-10-batches-py' folder.
+
+        python train.py vgg cifar-10-batches-py
+
+    To train other models, please replace 'vgg' to 'alexnet' or 'resnet'. By default
+    the training would run on a CudaGPU device, to run it on CppCPU, add an additional
+    argument
+
+        python train.py vgg cifar-10-batches-py  --use_cpu
+
+2. alexnet.cc. It trains the AlexNet model using the CPP APIs on a CudaGPU,
+
+        run.sh
+
+3. alexnet-parallel.cc. It trains the AlexNet model using the CPP APIs on two CudaGPU devices.
+The two devices run synchronously to compute the gradients of the mode parameters, which are
+averaged on the host CPU device and then be applied to update the parameters.
+
+        run-parallel.sh
+
+4. vgg-parallel.cc. It train the VGG model using the CPP APIs on two CudaGPU devices similar to alexnet-parallel.cc.
+
+### Prediction
+
+predict.py includes the prediction function
+
+        def predict(net, images, dev, topk=5)
+
+The net is created by loading the previously trained model; Images consist of
+a numpy array of images (one row per image); dev is the training device, e.g.,
+a CudaGPU device or the host CppCPU device; topk labels of each image would be
+returned.
+
+
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/cifar10/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.cc b/examples/cifar10/alexnet.cc
index 9e8a7d8..8a506d2 100644
--- a/examples/cifar10/alexnet.cc
+++ b/examples/cifar10/alexnet.cc
@@ -161,7 +161,7 @@ void Train(int num_epoch, string data_dir) {
   auto net = CreateNet();
   SGD sgd;
   OptimizerConf opt_conf;
-  // opt_conf.set_momentum(0.9);
+  opt_conf.set_momentum(0.9);
   auto reg = opt_conf.mutable_regularizer();
   reg->set_coefficient(0.004);
   sgd.Setup(opt_conf);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index 2091ee5..8f596e5 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -106,7 +106,7 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
         dev = device.create_cuda_gpu()
 
     net.to_device(dev)
-    opt = optimizer.SGD(momentum=0.9, decay=weight_decay)
+    opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay)
     for (p, specs) in zip(net.param_names(), net.param_specs()):
         opt.register(p, specs)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/mnist/train.py
----------------------------------------------------------------------
diff --git a/examples/mnist/train.py b/examples/mnist/train.py
index 55c7cbb..0a00358 100644
--- a/examples/mnist/train.py
+++ b/examples/mnist/train.py
@@ -70,7 +70,6 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
     print "num_train_batch = %d " % (num_train_batch)
     for epoch in range(num_epoch):
         trainerrorsum = 0.0
-        validerrorsum = 0.0
         print 'Epoch %d' % epoch
         for b in range(num_train_batch):
             # positive phase

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 74e6ade..00380e0 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -234,6 +234,8 @@ class Nesterov(Optimizer):
 
     def apply_with_lr(self, epoch, lr, grad, value, name):
         self.apply_regularizer_constraint(epoch, value, grad, name)
+        if name is not None and name in self.learning_rate_multiplier:
+            lr = lr * self.learning_rate_multiplier[name]
         self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/test/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 6c21034..593cfd6 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -22,7 +22,7 @@ ADD_EXECUTABLE(test_singa "gtest/gtest_main.cc" ${singa_test_source})
 ADD_DEPENDENCIES(test_singa singa_core singa_utils)
 #MESSAGE(STATUS "link libs" ${singa_linker_libs})
 TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model
-    singa_io proto protobuf ${SINGA_LINKER_LIBS})
+    singa_io singa_proto protobuf ${SINGA_LINKER_LIBS})
 IF(UNIX AND (NOT APPLE))
     LIST(APPEND LINK_FLAGS "-pthread")
 ENDIF()



[19/22] incubator-singa git commit: SINGA-223 Use Sphinx to create the website.

Posted by wa...@apache.org.
SINGA-223 Use Sphinx to create the website.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/31ae6bd4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/31ae6bd4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/31ae6bd4

Branch: refs/heads/dev
Commit: 31ae6bd46bed53c2bccebaf691bfe18b6addb5e1
Parents: e963363
Author: Moaz Reyad <mo...@gmail.com>
Authored: Fri Aug 12 15:13:37 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 21:02:50 2016 +0800

----------------------------------------------------------------------
 doc/Makefile                          | 199 +----------------
 doc/_static/style.css                 |   3 +
 doc/_templates/layout.html            |  58 -----
 doc/build.sh                          |  33 ---
 doc/community/issue-tracking.md       |   9 -
 doc/community/mail-lists.rst          |  10 -
 doc/community/source-repository.md    |  22 --
 doc/community/team-list.rst           |  64 ------
 doc/conf.py                           | 339 -----------------------------
 doc/develop/contribute-code.md        |  60 -----
 doc/develop/contribute-docs.md        |  28 ---
 doc/develop/how-contribute.md         |  11 -
 doc/develop/schedule.rst              |  40 ----
 doc/docs.rst                          |   6 -
 doc/docs/cnn.md                       | 141 ------------
 doc/docs/device.rst                   |  38 ----
 doc/docs/examples.rst                 |   6 -
 doc/docs/index.rst                    |  10 -
 doc/docs/installation.md              |  69 ------
 doc/docs/neural-net.md                | 327 ----------------------------
 doc/docs/overview.rst                 |  99 ---------
 doc/docs/software_stack.md            |  99 ---------
 doc/docs/tensor.rst                   |  54 -----
 doc/docs/zh/index.md                  |   9 -
 doc/downloads.md                      |  67 ------
 doc/en/_templates/layout.html         |  61 ++++++
 doc/en/community/issue-tracking.md    |   9 +
 doc/en/community/mail-lists.rst       |  10 +
 doc/en/community/source-repository.md |  22 ++
 doc/en/community/team-list.rst        |  64 ++++++
 doc/en/conf.py                        | 339 +++++++++++++++++++++++++++++
 doc/en/develop/contribute-code.md     |  60 +++++
 doc/en/develop/contribute-docs.md     |  28 +++
 doc/en/develop/how-contribute.md      |  11 +
 doc/en/develop/schedule.rst           |  40 ++++
 doc/en/docs.rst                       |   6 +
 doc/en/docs/cnn.md                    | 141 ++++++++++++
 doc/en/docs/device.rst                |  38 ++++
 doc/en/docs/index.rst                 |  10 +
 doc/en/docs/installation.md           |  69 ++++++
 doc/en/docs/neural-net.md             | 327 ++++++++++++++++++++++++++++
 doc/en/docs/overview.rst              |  99 +++++++++
 doc/en/docs/software_stack.md         |  99 +++++++++
 doc/en/docs/tensor.rst                |  54 +++++
 doc/en/downloads.md                   |  67 ++++++
 doc/en/index.rst                      | 109 ++++++++++
 doc/index.rst                         | 109 ----------
 doc/make.bat                          | 281 ------------------------
 doc/zh/_templates/layout.html         |  61 ++++++
 doc/zh/conf.py                        | 339 +++++++++++++++++++++++++++++
 doc/zh/index.md                       |   9 +
 examples/index.rst                    |   3 +
 52 files changed, 2082 insertions(+), 2184 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/Makefile
----------------------------------------------------------------------
diff --git a/doc/Makefile b/doc/Makefile
index c6eddf1..436a661 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -18,209 +18,18 @@ I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
 	@echo "  html       to make standalone HTML files"
-	@echo "  dirhtml    to make HTML files named index.html in directories"
-	@echo "  singlehtml to make a single large HTML file"
-	@echo "  pickle     to make pickle files"
-	@echo "  json       to make JSON files"
-	@echo "  htmlhelp   to make HTML files and a HTML help project"
-	@echo "  qthelp     to make HTML files and a qthelp project"
-	@echo "  applehelp  to make an Apple Help Book"
-	@echo "  devhelp    to make HTML files and a Devhelp project"
-	@echo "  epub       to make an epub"
-	@echo "  epub3      to make an epub3"
-	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
-	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
-	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
-	@echo "  text       to make text files"
-	@echo "  man        to make manual pages"
-	@echo "  texinfo    to make Texinfo files"
-	@echo "  info       to make Texinfo files and run them through makeinfo"
-	@echo "  gettext    to make PO message catalogs"
-	@echo "  changes    to make an overview of all changed/added/deprecated items"
-	@echo "  xml        to make Docutils-native XML files"
-	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
-	@echo "  linkcheck  to check all external links for integrity"
-	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
-	@echo "  coverage   to run coverage check of the documentation (if enabled)"
-	@echo "  dummy      to check syntax errors of document sources"
 
 .PHONY: clean
 clean:
 	rm -rf $(BUILDDIR)/*
+	rm -rf en/docs/examples
 
 .PHONY: html
 html:
-	cp -rf ../examples docs/
-	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	cp -rf ../examples en/docs/
+	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) en $(BUILDDIR)/html
+	$(SPHINXBUILD) -b html  -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) zh $(BUILDDIR)/html/zh
 	@echo
 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 
-.PHONY: dirhtml
-dirhtml:
-	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
-	@echo
-	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
-
-.PHONY: singlehtml
-singlehtml:
-	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
-	@echo
-	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
-
-.PHONY: pickle
-pickle:
-	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
-	@echo
-	@echo "Build finished; now you can process the pickle files."
-
-.PHONY: json
-json:
-	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
-	@echo
-	@echo "Build finished; now you can process the JSON files."
-
-.PHONY: htmlhelp
-htmlhelp:
-	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
-	@echo
-	@echo "Build finished; now you can run HTML Help Workshop with the" \
-	      ".hhp project file in $(BUILDDIR)/htmlhelp."
-
-.PHONY: qthelp
-qthelp:
-	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
-	@echo
-	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
-	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
-	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Singa.qhcp"
-	@echo "To view the help file:"
-	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Singa.qhc"
-
-.PHONY: applehelp
-applehelp:
-	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
-	@echo
-	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
-	@echo "N.B. You won't be able to view it unless you put it in" \
-	      "~/Library/Documentation/Help or install it in your application" \
-	      "bundle."
-
-.PHONY: devhelp
-devhelp:
-	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
-	@echo
-	@echo "Build finished."
-	@echo "To view the help file:"
-	@echo "# mkdir -p $$HOME/.local/share/devhelp/Singa"
-	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Singa"
-	@echo "# devhelp"
-
-.PHONY: epub
-epub:
-	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
-	@echo
-	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
-
-.PHONY: epub3
-epub3:
-	$(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3
-	@echo
-	@echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3."
-
-.PHONY: latex
-latex:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo
-	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
-	@echo "Run \`make' in that directory to run these through (pdf)latex" \
-	      "(use \`make latexpdf' here to do that automatically)."
-
-.PHONY: latexpdf
-latexpdf:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo "Running LaTeX files through pdflatex..."
-	$(MAKE) -C $(BUILDDIR)/latex all-pdf
-	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
 
-.PHONY: latexpdfja
-latexpdfja:
-	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
-	@echo "Running LaTeX files through platex and dvipdfmx..."
-	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
-	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
-
-.PHONY: text
-text:
-	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
-	@echo
-	@echo "Build finished. The text files are in $(BUILDDIR)/text."
-
-.PHONY: man
-man:
-	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
-	@echo
-	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
-
-.PHONY: texinfo
-texinfo:
-	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
-	@echo
-	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
-	@echo "Run \`make' in that directory to run these through makeinfo" \
-	      "(use \`make info' here to do that automatically)."
-
-.PHONY: info
-info:
-	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
-	@echo "Running Texinfo files through makeinfo..."
-	make -C $(BUILDDIR)/texinfo info
-	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
-
-.PHONY: gettext
-gettext:
-	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
-	@echo
-	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
-
-.PHONY: changes
-changes:
-	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
-	@echo
-	@echo "The overview file is in $(BUILDDIR)/changes."
-
-.PHONY: linkcheck
-linkcheck:
-	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
-	@echo
-	@echo "Link check complete; look for any errors in the above output " \
-	      "or in $(BUILDDIR)/linkcheck/output.txt."
-
-.PHONY: doctest
-doctest:
-	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
-	@echo "Testing of doctests in the sources finished, look at the " \
-	      "results in $(BUILDDIR)/doctest/output.txt."
-
-.PHONY: coverage
-coverage:
-	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
-	@echo "Testing of coverage in the sources finished, look at the " \
-	      "results in $(BUILDDIR)/coverage/python.txt."
-
-.PHONY: xml
-xml:
-	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
-	@echo
-	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
-
-.PHONY: pseudoxml
-pseudoxml:
-	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
-	@echo
-	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
-
-.PHONY: dummy
-dummy:
-	$(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy
-	@echo
-	@echo "Build finished. Dummy builder generates no files."

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/_static/style.css
----------------------------------------------------------------------
diff --git a/doc/_static/style.css b/doc/_static/style.css
new file mode 100644
index 0000000..b07bdb1
--- /dev/null
+++ b/doc/_static/style.css
@@ -0,0 +1,3 @@
+.wy-nav-content {
+    max-width: none;
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/_templates/layout.html
----------------------------------------------------------------------
diff --git a/doc/_templates/layout.html b/doc/_templates/layout.html
deleted file mode 100755
index 800a74d..0000000
--- a/doc/_templates/layout.html
+++ /dev/null
@@ -1,58 +0,0 @@
-{#
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements.  See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership.  The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-#}
-{% extends "!layout.html" %}
-     
-{% block footer %}
-
-
-<div class="rst-versions shift-up" data-toggle="rst-versions" role="note" aria-label="versions">
-<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">  
- 
-  <span class="rst-current-version" data-toggle="rst-current-version">
-    <span class="fa fa-book"> incubator-singa </span>
-    v: {{ version }}
-    <span class="fa fa-caret-down"></span>
-  </span>
-  <div class="rst-other-versions">
-    <dl>
-      <dt>Languages</dt>
-      <dd><a href="/en/latest/">English</a></dd>
-      <dd><a href="/zh/latest/">\u4e2d\u6587</a></dd>	  
-	  <dd><a href="/jp/latest/">\u65e5\u672c\u8a9e</a></dd>
-	  <dd><a href="/kr/latest/">\ud55c\uad6d\uc5b4</a></dd>
-	  <dd><a href="/it/latest/">Italiano</a></dd>
-	  <dd><a href="/ar/latest/">\u0627\u0644\u0639\u0631\u0628\u064a\u0629</a></dd>
-    </dl>
-    <dl>
-      <dt>Versions</dt>
-      <dd><a href="/{{ language }}/latest/">latest</a></dd>
-      <dd><a href="/{{ language }}/0.3.0/">v0.3.0</a></dd>
-      <dd><a href="/{{ language }}/0.2.0/">v0.2.0</a></dd>
-	  <dd><a href="/{{ language }}/0.1.0/">v0.1.0</a></dd>
-    </dl>
-  </div>
-</div>
-
- <a href="https://github.com/apache/incubator-singa">
-    <img style="position: absolute; top: 0; right: 0; border: 0; z-index: 10000;"
-        src="https://s3.amazonaws.com/github/ribbons/forkme_right_orange_ff7600.png"
-        alt="Fork me on GitHub">
-</a>
-
-{{ super() }}
-{% endblock %}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/build.sh
----------------------------------------------------------------------
diff --git a/doc/build.sh b/doc/build.sh
deleted file mode 100755
index 3af6ec1..0000000
--- a/doc/build.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-#!/bin/sh
-
-#
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-echo "Building English version"
-make -e SPHINXOPTS="-D language='en'" html
-
-#echo "Building Chinese version"
-#make -e SPHINXOPTS="-D language='zh'" html
-
-#echo "Building Japanese version"
-#make -e SPHINXOPTS="-D language='jp'" html
-
-#echo "Building Italian version"
-#make -e SPHINXOPTS="-D language='it'" html 
-
-#echo "Building Arabic version"
-#make -e SPHINXOPTS="-D language='ar'" html

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/community/issue-tracking.md
----------------------------------------------------------------------
diff --git a/doc/community/issue-tracking.md b/doc/community/issue-tracking.md
deleted file mode 100644
index 26b23dd..0000000
--- a/doc/community/issue-tracking.md
+++ /dev/null
@@ -1,9 +0,0 @@
-## Issue Tracking
-
-___
-
-SINGA uses [JIRA](https://www.atlassian.com/software/jira) a J2EE-based, issue tracking and project management application.
-
-Issues, bugs, and feature requests should be submitted to the following issue tracking system for this project.
-
-* https://issues.apache.org/jira/browse/singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/community/mail-lists.rst
----------------------------------------------------------------------
diff --git a/doc/community/mail-lists.rst b/doc/community/mail-lists.rst
deleted file mode 100644
index b1ba6f9..0000000
--- a/doc/community/mail-lists.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-Project Mailing Lists
-=====================
-
-These are the mailing lists that have been established for this project. For each list, there is a subscribe, unsubscribe, and an archive link.
-
-.. csv-table:: Mailing Lists
-	:header: "Name", "Post", "Subscribe", "Unsubscribe", "Archive"
-
-        "Development", "dev@singa.incubator.apache.org", "[Subscribe](mailto:dev-subscribe@singa.incubator.apache.org)", "[Unsubscribe](mailto:dev-unsubscribe@singa.incubator.apache.org)", "[mail-archives.apache.org](http://mail-archives.apache.org/mod_mbox/singa-dev/)"
-        "Commits", "commits@singa.incubator.apache.org", "[Subscribe](mailto:commits-subscribe@singa.incubator.apache.org)", "[Unsubscribe](mailto:commits-unsubscribe@singa.incubator.apache.org)", "[mail-archives.apache.org](http://mail-archives.apache.org/mod_mbox/singa-commits/)"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/community/source-repository.md
----------------------------------------------------------------------
diff --git a/doc/community/source-repository.md b/doc/community/source-repository.md
deleted file mode 100644
index 8864629..0000000
--- a/doc/community/source-repository.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# Source Repository
-
-___
-
-This project uses [Git](http://git-scm.com/) to manage its source code. Instructions on Git use can be found at [http://git-scm.com/documentation](http://git-scm.com/documentation).
-
-## Web Access
-
-The following is a link to the online source repository.
-
-* [https://git-wip-us.apache.org/repos/asf?p=incubator-singa.git;a=summary](https://git-wip-us.apache.org/repos/asf?p=incubator-singa.git;a=summary)
-
-
-## Upstream for committers
-
-Committers need to set the upstream endpoint to the Apache git (not github) repo address, e.g.,
-
-    $ git remote add asf https://git-wip-us.apache.org/repos/asf/incubator-singa.git
-
-Then you (committer) can push your code in this way,
-
-    $ git push asf <local-branch>:<remote-branch>

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/community/team-list.rst
----------------------------------------------------------------------
diff --git a/doc/community/team-list.rst b/doc/community/team-list.rst
deleted file mode 100644
index a677aff..0000000
--- a/doc/community/team-list.rst
+++ /dev/null
@@ -1,64 +0,0 @@
-The SINGA Team
-==============
-
-A successful project requires many people to play many roles. Some members write code or documentation, while others are valuable as testers, submitting patches and suggestions.
-
-Mentors
--------
-
-==================   ============
-Name                 Email
-==================   ============
-Daniel Dai           daijy@apache.org
-Ted Dunning	     tdunning@apache.org
-Alan Gates	     gates@apache.org
-Thejas Nair	     thejas@apache.org
-==================   ============
-
-Developers
-----------
-
-+-------------------+--------------------------------+----------------------------------------------+
-| Name              |  Email                         |  Organization                                |
-+-------------------+--------------------------------+----------------------------------------------+
-|Gang Chen          |  cg@zju.edu.cn                 |   Zhejiang University                        |
-+-------------------+--------------------------------+----------------------------------------------+
-| Haibo Chen        | hzchenhaibo@corp.netease.com   |  NetEase                                     |
-+-------------------+--------------------------------+----------------------------------------------+
-| Anh Dinh	    |     dinhtta@apache.org	     |         National University of Singapore     |                       
-+-------------------+--------------------------------+----------------------------------------------+
-| Jinyang Gao	    |     jinyang@apache.org	     |         National University of Singapore	    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Xing Ji	    |         jixin@comp.nus.edu.sg  |          National University of Singapore    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Chonho Lee	    |  chonho@gmail.com              |   National University of Singapore           |
-+-------------------+--------------------------------+----------------------------------------------+
-| Zhaojing Luo	    | zhaojing@apache.org	     | National University of Singapore	            |
-+-------------------+--------------------------------+----------------------------------------------+
-| Beng Chin Ooi	    | ooibc@comp.nus.edu.sg          | National University of Singapore	            |
-+-------------------+--------------------------------+----------------------------------------------+
-| Kian-Lee Tan	    |    tankl@apache.org            | National University of Singapore	            |
-+-------------------+--------------------------------+----------------------------------------------+
-|Anthony K. H. Tung |  atung@comp.nus.edu.sg         |   National University of Singapore	    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Ji Wang	    |         wangji@comp.nus.edu.sg |	      National University of Singapore	    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Sheng Wang	    |    wangsh@apache.org           | National University of Singapore	            |
-+-------------------+--------------------------------+----------------------------------------------+
-| Wei Wang	    |    wangwei@apache.org	     |         National University of Singapore	    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Yuan Wang         |  wangyuan@corp.netease.com     |   NetEase                                    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Wenfeng Wu	    |     wuwf@comp.nus.edu.sg       |  National University of Singapore            |
-+-------------------+--------------------------------+----------------------------------------------+
-| Zhongle Xie	    |     zhongle@apache.org	     |        National University of Singapore      |
-+-------------------+--------------------------------+----------------------------------------------+
-| Meihui Zhang	    |     meihui_zhang@sutd.edu.sg   |Singapore University of Technology and Design |
-+-------------------+--------------------------------+----------------------------------------------+
-| Kaiping Zheng     |     kaiping@apache.org	     |         National University of Singapore	    |
-+-------------------+--------------------------------+----------------------------------------------+
-| Ming Zhong        | hzzhongming15@corp.netease.com |   Zhejiang University                        |
-+-------------------+--------------------------------+----------------------------------------------+
-
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
deleted file mode 100755
index 9d4480e..0000000
--- a/doc/conf.py
+++ /dev/null
@@ -1,339 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# incubator-singa documentation build configuration file, created by
-# sphinx-quickstart on Sat Jul  9 20:36:57 2016.
-#
-# This file is execfile()d with the current directory set to its
-# containing dir.
-#
-# Note that not all possible configuration values are present in this
-# autogenerated file.
-#
-# All configuration values have a default; values that are commented out
-# serve to show the default.
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
-import sys
-sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, os.path.abspath('../build/python'))
-
-# -- General configuration ------------------------------------------------
-from recommonmark.parser import CommonMarkParser
-
-source_parsers = {
-    '.md': CommonMarkParser,
-}
-
-# If your documentation needs a minimal Sphinx version, state it here.
-#
-# needs_sphinx = '1.0'
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
-napoleon_google_docstring = True
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# The suffix(es) of source filenames.
-# You can specify multiple suffix as a list of string:
-#
-# source_suffix = ['.rst', '.md']
-source_suffix = ['.rst', '.md']
-
-# The encoding of source files.
-#
-source_encoding = 'utf-8-sig'
-
-# The master toctree document.
-master_doc = 'index'
-
-# General information about the project.
-project = u'incubator-singa'
-copyright = u'2016 The Apache Software Foundation. All rights reserved. Apache Singa, Apache, the Apache feather logo, and the Apache Singa project logos are trademarks of The Apache Software Foundation. All other marks mentioned may be trademarks or registered trademarks of their respective owners.'
-author = u'moaz'
-
-# The version info for the project you're documenting, acts as replacement for
-# |version| and |release|, also used in various other places throughout the
-# built documents.
-#
-# The short X.Y version.
-version = u'1.0.0'
-# The full version, including alpha/beta/rc tags.
-release = u'1.0.0'
-
-# The language for content autogenerated by Sphinx. Refer to documentation
-# for a list of supported languages.
-#
-# This is also used if you do content translation via gettext catalogs.
-# Usually you set "language" from the command line for these cases.
-language = None
-
-# There are two options for replacing |today|: either, you set today to some
-# non-false value, then it is used:
-#
-# today = ''
-#
-# Else, today_fmt is used as the format for a strftime call.
-#
-# today_fmt = '%B %d, %Y'
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This patterns also effect to html_static_path and html_extra_path
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
-
-# The reST default role (used for this markup: `text`) to use for all
-# documents.
-#
-# default_role = None
-
-# If true, '()' will be appended to :func: etc. cross-reference text.
-#
-# add_function_parentheses = True
-
-# If true, the current module name will be prepended to all description
-# unit titles (such as .. function::).
-#
-# add_module_names = True
-
-# If true, sectionauthor and moduleauthor directives will be shown in the
-# output. They are ignored by default.
-#
-# show_authors = False
-
-# The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
-
-# A list of ignored prefixes for module index sorting.
-# modindex_common_prefix = []
-
-# If true, keep warnings as "system message" paragraphs in the built documents.
-# keep_warnings = False
-
-# If true, `todo` and `todoList` produce output, else they produce nothing.
-todo_include_todos = False
-
-
-# -- Options for HTML output ----------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_theme = 'sphinx_rtd_theme'
-
-# Theme options are theme-specific and customize the look and feel of a theme
-# further.  For a list of options available for each theme, see the
-# documentation.
-#
-# html_theme_options = {}
-
-# Add any paths that contain custom themes here, relative to this directory.
-# html_theme_path = []
-
-# The name for this set of Sphinx documents.
-# "<project> v<release> documentation" by default.
-#
-# html_title = u'Singa v1.0.0'
-
-# A shorter title for the navigation bar.  Default is the same as html_title.
-#
-# html_short_title = None
-
-# The name of an image file (relative to this directory) to place at the top
-# of the sidebar.
-#
-html_logo = 'image/singa.png'
-
-# The name of an image file (relative to this directory) to use as a favicon of
-# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
-#
-# html_favicon = None
-
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
-
-# Add any extra paths that contain custom files (such as robots.txt or
-# .htaccess) here, relative to this directory. These files are copied
-# directly to the root of the documentation.
-#
-# html_extra_path = []
-
-# If not None, a 'Last updated on:' timestamp is inserted at every page
-# bottom, using the given strftime format.
-# The empty string is equivalent to '%b %d, %Y'.
-#
-# html_last_updated_fmt = None
-
-# If true, SmartyPants will be used to convert quotes and dashes to
-# typographically correct entities.
-#
-# html_use_smartypants = True
-
-# Custom sidebar templates, maps document names to template names.
-#
-# html_sidebars = {}
-
-# Additional templates that should be rendered to pages, maps page names to
-# template names.
-#
-# html_additional_pages = {}
-
-# If false, no module index is generated.
-#
-# html_domain_indices = True
-
-# If false, no index is generated.
-#
-# html_use_index = True
-
-# If true, the index is split into individual pages for each letter.
-#
-# html_split_index = False
-
-# If true, links to the reST sources are added to the pages.
-#
-# html_show_sourcelink = True
-
-# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#
-# html_show_sphinx = True
-
-# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#
-# html_show_copyright = True
-
-# If true, an OpenSearch description file will be output, and all pages will
-# contain a <link> tag referring to it.  The value of this option must be the
-# base URL from which the finished HTML is served.
-#
-# html_use_opensearch = ''
-
-# This is the file name suffix for HTML files (e.g. ".xhtml").
-# html_file_suffix = None
-
-# Language to be used for generating the HTML full-text search index.
-# Sphinx supports the following languages:
-#   'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja'
-#   'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh'
-#
-# html_search_language = 'en'
-
-# A dictionary with options for the search language support, empty by default.
-# 'ja' uses this config value.
-# 'zh' user can custom change `jieba` dictionary path.
-#
-# html_search_options = {'type': 'default'}
-
-# The name of a javascript file (relative to the configuration directory) that
-# implements a search results scorer. If empty, the default will be used.
-#
-# html_search_scorer = 'scorer.js'
-
-# Output file base name for HTML help builder.
-htmlhelp_basename = 'Singadoc'
-
-# -- Options for LaTeX output ---------------------------------------------
-
-latex_elements = {
-     # The paper size ('letterpaper' or 'a4paper').
-     #
-     # 'papersize': 'letterpaper',
-
-     # The font size ('10pt', '11pt' or '12pt').
-     #
-     # 'pointsize': '10pt',
-
-     # Additional stuff for the LaTeX preamble.
-     #
-     # 'preamble': '',
-
-     # Latex figure (float) alignment
-     #
-     # 'figure_align': 'htbp',
-}
-
-# Grouping the document tree into LaTeX files. List of tuples
-# (source start file, target name, title,
-#  author, documentclass [howto, manual, or own class]).
-latex_documents = [
-    (master_doc, 'incubator-singa.tex', u'incubator-singa Documentation',
-     u'moaz', 'manual'),
-]
-
-# The name of an image file (relative to this directory) to place at the top of
-# the title page.
-#
-# latex_logo = None
-
-# For "manual" documents, if this is true, then toplevel headings are parts,
-# not chapters.
-#
-# latex_use_parts = False
-
-# If true, show page references after internal links.
-#
-# latex_show_pagerefs = False
-
-# If true, show URL addresses after external links.
-#
-# latex_show_urls = False
-
-# Documents to append as an appendix to all manuals.
-#
-# latex_appendices = []
-
-# If false, no module index is generated.
-#
-# latex_domain_indices = True
-
-
-# -- Options for manual page output ---------------------------------------
-
-# One entry per manual page. List of tuples
-# (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
-     [author], 1)
-]
-
-# If true, show URL addresses after external links.
-#
-# man_show_urls = False
-
-
-# -- Options for Texinfo output -------------------------------------------
-
-# Grouping the document tree into Texinfo files. List of tuples
-# (source start file, target name, title, author,
-#  dir menu entry, description, category)
-texinfo_documents = [
-    (master_doc, 'incubator-singa', u'incubator-singa Documentation',
-     author, 'incubator-singa', 'One line description of project.',
-     'Miscellaneous'),
-]
-
-# Documents to append as an appendix to all manuals.
-#
-# texinfo_appendices = []
-
-# If false, no module index is generated.
-#
-# texinfo_domain_indices = True
-
-# How to display URL addresses: 'footnote', 'no', or 'inline'.
-#
-# texinfo_show_urls = 'footnote'
-
-# If true, do not generate a @detailmenu in the "Top" node's menu.
-#
-# texinfo_no_detailmenu = False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/develop/contribute-code.md
----------------------------------------------------------------------
diff --git a/doc/develop/contribute-code.md b/doc/develop/contribute-code.md
deleted file mode 100644
index 98e5aee..0000000
--- a/doc/develop/contribute-code.md
+++ /dev/null
@@ -1,60 +0,0 @@
-## How to Contribute Code
-
-_____
-
-### Coding Style
-
-The SINGA codebase follows the [Google C++ Style Guide](http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml).
-
-To check if your code follows the style, you can use the provided cpplint tool:
-    
-    $ ./tool/cpplint.py YOUR_FILE
-
-
-### JIRA format
-
-Like other Apache projects, SINGA uses JIRA to track bugs, improvements and
-other high-level discussions (e.g., system design and features).  Github pull requests are
-used for implementation discussions, e.g., code review and code merge.
-
-* Provide a descriptive Title.
-* Write a detailed Description. For bug reports, this should ideally include a
-  short reproduction of the problem. For new features, it may include a design
-  document.
-* Set [required fields](https://cwiki.apache.org/confluence/display/SPARK/Contributing+to+Spark#ContributingtoSpark-JIRA)
-
-### Pull Request
-
-The work flow is
-
-* Fork the [SINGA Github repository](https://github.com/apache/incubator-singa) to
-your own Github account.
-
-* Clone your fork, create a new branch (e.g., feature-foo or fixbug-foo),
- work on it. After finishing your job,
- [rebase](https://git-scm.com/book/en/v2/Git-Branching-Rebasing) it to the
- current latest master and push commits to your own Github account (the new
- branch).
-
-* Open a pull request against the master branch of apache/incubator-singa.
-The PR title should be of the form SINGA-xxxx Title, where
-SINGA-xxxx is the relevant JIRA number, and Title may be the JIRA's title or a
-more specific title describing the PR itself, for example, "SINGA-6 Implement thread-safe singleton". Detailed description can be copied from the JIRA.
-Consider identifying committers or other contributors who have worked on the
-code being changed. Find the file(s) in Github and click "Blame" to see a
-line-by-line annotation of who changed the code last.  You can add @username in
-the PR description to ping them immediately.
-Please state that the contribution is your original work and that you license
-the work to the project under the project's open source license. Further commits (e.g., bug fix)
-to your new branch will be added to this pull request automatically by Github.
-
-* Wait for one committer to review the patch. If no conflicts, the committers will merge it with
-the master branch. The merge should a) not use rebase b) disable fast forward merge c) check the 
-commit message format and test the code/feature.
-
-* If there are too many small commit messages, you will be told to squash your commits into fewer meaningful
-commits. If your commit message does not follow the format (i.e., SINGA-xxxx), you will be told to
-reword your commit message. Both changes can be done using interactive git rebase. Once you
-get the commits corrected, push them to you own github again. Your pull request 
-will be automatically updated. For details, please refer to 
-[Rebase Pull Requests](https://github.com/edx/edx-platform/wiki/How-to-Rebase-a-Pull-Request).
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/develop/contribute-docs.md
----------------------------------------------------------------------
diff --git a/doc/develop/contribute-docs.md b/doc/develop/contribute-docs.md
deleted file mode 100644
index 5e21a0f..0000000
--- a/doc/develop/contribute-docs.md
+++ /dev/null
@@ -1,28 +0,0 @@
-# How to Contribute Documentation
-
-___
-
-
-## Website
-This document gives step-by-step instructions for deploying [Singa website](http://singa.incubator.apache.org).
-
-Singa website is built by [Sphinx](http://www.sphinx-doc.org) 1.4.4 from a source tree stored in git: https://github.com/apache/incubator-singa/tree/master/doc.
-
-To install Sphinx on Ubuntu:
-
-    $ apt-get install python-sphinx
-
-To install the markdown support for Sphinx:
-
-    $ pip install recommonmark
-
-You can build the website by executing the following command from the doc folder:
-
-    $ make html
-
-The procedure for contributing documentation is the same as [contributing code](contribute-code.html).
-
-
-## CPP API
-
-To generate docs, run "doxygen" from the doc folder (Doxygen >= 1.8 recommended)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/develop/how-contribute.md
----------------------------------------------------------------------
diff --git a/doc/develop/how-contribute.md b/doc/develop/how-contribute.md
deleted file mode 100644
index 8687b5a..0000000
--- a/doc/develop/how-contribute.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# How to Contribute to SINGA
-
-___
-
-As with any open source project, there are several ways you can help:
-
-* Join the [mailing list](../community/mail-lists.html) and answer other user's questions.
-* [Build Singa](../quick-start.html) for yourself, in order to fix bugs.
-* Report bugs, feature requests and other issues in the [issue tracking](../community/issue-tracking.html) application.
-* Check SINGA's [development schedule](schedule.html) and [contribute code](contribute-code.html) by providing patches.
-* [Help with the documentation](contribute-docs.html) by updating webpages that are lacking or unclear.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/develop/schedule.rst
----------------------------------------------------------------------
diff --git a/doc/develop/schedule.rst b/doc/develop/schedule.rst
deleted file mode 100644
index 2afe54f..0000000
--- a/doc/develop/schedule.rst
+++ /dev/null
@@ -1,40 +0,0 @@
-Development Schedule
-====================
-
-.. csv-table::
-	:header: "Release", "Module", "Feature", "Status"
-
-	" 0.1 Sep 2015     "," Neural Network          "," Feed forward neural network, including CNN, MLP                                                                 "," done  "
-	"                  ","                         "," RBM-like model, including RBM                                                                                   "," done   "
-	"                  ","                         "," Recurrent neural network, including standard RNN                                                                "," done   "
-	"                  ","  Architecture           "," One worker group on single node (with data partition)                                                           "," done   "
-	"                  ","                         "," Multi worker groups on single node using [Hogwild](http://www.eecs.berkeley.edu/~brecht/papers/hogwildTR.pdf)      ","done"
-	"                  ","                         "," Distributed Hogwild","done"
-	"                  ","                         "," Multi groups across nodes, like [Downpour](http://papers.nips.cc/paper/4687-large-scale-distributed-deep-networks) ","done"
-	"                  ","                         "," All-Reduce training architecture like [DeepImage](http://arxiv.org/abs/1501.02876) ","done"
-	"                  ","                         "," Load-balance among servers "," done"
-	"                  ","  Failure recovery       "," Checkpoint and restore ","done"
-	"                  ","  Tools                  "," Installation with GNU auto tools"," done"
-	"0.2 Jan 2016      "," Neural Network          "," Feed forward neural network, including AlexNet, cuDNN layers, etc."," done "
-	"                  ","                         "," Recurrent neural network, including GRULayer and BPTT","done "
-	"                  ","                         "," Model partition and hybrid partition","done"
-	"      		   "," Tools                   "," Integration with Mesos for resource management","done"
-	"         	   ","                         "," Prepare Docker images for deployment","done"
-	"              	   ","                         "," Visualization of neural net and debug information ","done"
-	"                  "," Binding                 "," Python binding for major components ","done"
-	"                  "," GPU                     "," Single node with multiple GPUs ","done"
-	"0.3 April 2016    "," GPU                     "," Multiple nodes, each with multiple GPUs","done"
-	"                  ","                         "," Heterogeneous training using both GPU and CPU [CcT](http://arxiv.org/abs/1504.04343)","done"
-	"                  ","                         "," Support cuDNN v4 "," done"
-	"                  "," Installation            "," Remove dependency on ZeroMQ, CZMQ, Zookeeper for single node training","done"
-	"                  "," Updater                 "," Add new SGD updaters including Adam, AdamMax and AdaDelta","done"
-	"                  "," Binding                 "," Enhance Python binding for training","done"
-	"1.0 July 2016     "," Programming abstraction ","Tensor with linear algebra, neural net and random operations "," "
-	"                  ","                         ","Updater for distributed parameter updating ",""
-	"                  "," Optimization            "," Execution and memory optimization",""
-	"                  "," Hardware                "," Use Cuda and Cudnn for Nvidia GPU",""
-	"                  ","                         "," Use OpenCL for AMD GPU or other devices",""
-	"                  "," Cross-platform          "," To extend from Linux to MacOS and Windows",""
-	"                  "," Examples                "," Speech recognition example",""
-	"                  ","                         ","Large image models, e.g., [GoogLeNet](http://arxiv.org/abs/1409.4842), [VGG](https://arxiv.org/pdf/1409.1556.pdf) and [Residual Net](http://arxiv.org/abs/1512.03385)",""
-	"     "," Rafiki                  "," Deep learning as a service "," "

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs.rst
----------------------------------------------------------------------
diff --git a/doc/docs.rst b/doc/docs.rst
deleted file mode 100644
index 400b12a..0000000
--- a/doc/docs.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Documentation
-=============
-
-.. toctree::
-   docs/index
-   docs/zh/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/cnn.md
----------------------------------------------------------------------
diff --git a/doc/docs/cnn.md b/doc/docs/cnn.md
deleted file mode 100755
index 21ef1f7..0000000
--- a/doc/docs/cnn.md
+++ /dev/null
@@ -1,141 +0,0 @@
-#Quickstart - Cifar10 example
-Convolution neural network (CNN) is a type of feed-forward artificial neural network widely used for image classification. In this example, we will use a deep CNN model to do image classification for the [CIFAR10 dataset](http://www.cs.toronto.edu/~kriz/cifar.html).
-
-## Running instructions for CPP version
-Please refer to [Installation](installation.html) page for how to install SINGA. Currently, we CNN requires CUDNN, hence both CUDA and CUDNN should be installed and SINGA should be compiled with CUDA and CUDNN.
-
-The Cifar10 dataset could be downloaded by running
-
-    # switch to cifar10 directory
-    $ cd ../examples/cifar10
-    # download data for CPP version
-    $ python download_data.py bin
-
-'bin' is for downloading binary version of Cifar10 data.
-
-During downloading, you should see the detailed output like
-
-     Downloading CIFAR10 from http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
-     The tar file does exist. Extracting it now..
-     Finished!
-
-Now you have prepared the data for this Cifar10 example, the final step is to execute the `run.sh` script,
-
-    # in SINGA_ROOT/examples/cifar10/
-    $ ./run.sh
-
-You should see the detailed output as follows: first read the data files in order, show the statistics of training and testing data, then show the details of neural net structure with some parameter information, finally illustrate the performance details during training and validation process. The number of epochs can be specified in `run.sh` file.
-
-    Start training
-    Reading file cifar-10-batches-bin/data_batch_1.bin
-    Reading file cifar-10-batches-bin/data_batch_2.bin
-    Reading file cifar-10-batches-bin/data_batch_3.bin
-    Reading file cifar-10-batches-bin/data_batch_4.bin
-    Reading file cifar-10-batches-bin/data_batch_5.bin
-    Reading file cifar-10-batches-bin/test_batch.bin
-    Training samples = 50000, Test samples = 10000
-    conv1(32, 32, 32, )
-    pool1(32, 16, 16, )
-    relu1(32, 16, 16, )
-    lrn1(32, 16, 16, )
-    conv2(32, 16, 16, )
-    relu2(32, 16, 16, )
-    pool2(32, 8, 8, )
-    lrn2(32, 8, 8, )
-    conv3(64, 8, 8, )
-    relu3(64, 8, 8, )
-    pool3(64, 4, 4, )
-    flat(1024, )
-    ip(10, )
-    conv1_weight : 8.09309e-05
-    conv1_bias : 0
-    conv2_weight : 0.00797731
-    conv2_bias : 0
-    conv3_weight : 0.00795888
-    conv3_bias : 0
-    ip_weight : 0.00798683
-    ip_bias : 0
-    Messages will be appended to an existed file: train_perf
-    Messages will be appended to an existed file: val_perf
-    Epoch 0, training loss = 1.828369, accuracy = 0.329420, lr = 0.001000
-    Epoch 0, val loss = 1.561823, metric = 0.420600
-    Epoch 1, training loss = 1.465898, accuracy = 0.469940, lr = 0.001000
-    Epoch 1, val loss = 1.361778, metric = 0.513300
-    Epoch 2, training loss = 1.320708, accuracy = 0.529000, lr = 0.001000
-    Epoch 2, val loss = 1.242080, metric = 0.549100
-    Epoch 3, training loss = 1.213776, accuracy = 0.571620, lr = 0.001000
-    Epoch 3, val loss = 1.175346, metric = 0.582000
-
-The training details are stored in `train_perf` file in the same directory and the validation details in `val_perf` file.
-
-
-## Running instructions for Python version
-To run CNN example in Python version, we need to compile SINGA with Python binding,
-
-    $ mkdir build && cd build
-    $ cmake -DUSE_PYTHON=ON ..
-    $ make
-
-Now download the Cifar10 dataset,
-
-    # switch to cifar10 directory
-    $ cd ../examples/cifar10
-    # download data for Python version
-    $ python download_data.py py
-
-During downloading, you should see the detailed output like
-
-     Downloading CIFAR10 from http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
-     The tar file does exist. Extracting it now..
-     Finished!
-
-Then execute the `train.py` script to build the model
-
-    $ python train.py
-
-You should see the output as follows including the details of neural net structure with some parameter information, reading data files, and the performance details during training and testing process.
-
-    (32L, 32L, 32L)
-    (32L, 16L, 16L)
-    (32L, 16L, 16L)
-    (32L, 16L, 16L)
-    (32L, 16L, 16L)
-    (32L, 16L, 16L)
-    (32L, 8L, 8L)
-    (32L, 8L, 8L)
-    (64L, 8L, 8L)
-    (64L, 8L, 8L)
-    (64L, 4L, 4L)
-    (1024L,)
-    Start intialization............
-    conv1_weight gaussian 7.938460476e-05
-    conv1_bias constant 0.0
-    conv2_weight gaussian 0.00793507322669
-    conv2_bias constant 0.0
-    conv3_weight gaussian 0.00799657031894
-    conv3_bias constant 0.0
-    dense_weight gaussian 0.00804364029318
-    dense_bias constant 0.0
-    Loading data ..................
-    Loading data file cifar-10-batches-py/data_batch_1
-    Loading data file cifar-10-batches-py/data_batch_2
-    Loading data file cifar-10-batches-py/data_batch_3
-    Loading data file cifar-10-batches-py/data_batch_4
-    Loading data file cifar-10-batches-py/data_batch_5
-    Loading data file cifar-10-batches-py/test_batch
-    Epoch 0
-    training loss = 1.881866, training accuracy = 0.306360 accuracy = 0.420000
-    test loss = 1.602577, test accuracy = 0.412200
-    Epoch 1
-    training loss = 1.536011, training accuracy = 0.441940 accuracy = 0.500000
-    test loss = 1.378170, test accuracy = 0.507600
-    Epoch 2
-    training loss = 1.333137, training accuracy = 0.519960 accuracy = 0.520000
-    test loss = 1.272205, test accuracy = 0.540600
-    Epoch 3
-    training loss = 1.185212, training accuracy = 0.574120 accuracy = 0.540000
-    test loss = 1.211573, test accuracy = 0.567600
-
-This script will call `alexnet.py` file to build the alexnet model. After the training is finished, SINGA will save the model parameters into a checkpoint file `model.bin` in the same directory. Then we can use this `model.bin` file for prediction.
-
-    $ python predict.py

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
deleted file mode 100644
index e79d87a..0000000
--- a/doc/docs/device.rst
+++ /dev/null
@@ -1,38 +0,0 @@
-Device
-=======
-
-
-The Device abstract represents any hardware device with memory and compuation units.
-All [Tensor operations](tensor.html) are scheduled by the resident device for execution.
-Tensor memory is also managed by the device's memory manager. Therefore, optimization
-of memory and execution are implemented in the Device class.
-
-Specific devices
-----------------
-Currently, SINGA has three Device implmentations,
-
-1. CudaGPU for an Nvidia GPU card which runs Cuda code
-2. CppCPU for a CPU which runs Cpp code
-3. OpenclGPU for a GPU card which runs OpenCL code
-
-
-Python API
-----------
-
-.. automodule:: singa.device
-   :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
-
-
-The following code provides examples of creating devices,
-
-.. code:: python
-
-   from singa import device
-   cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
-   host = device.get_default_device()  # get the default host device (a CppCPU)
-   ary1 = device.create_cuda_gpus(2)  # create 2 devices, starting from ID 0
-   ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
-
-
-CPP API
----------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/examples.rst
----------------------------------------------------------------------
diff --git a/doc/docs/examples.rst b/doc/docs/examples.rst
deleted file mode 100644
index b0b2af8..0000000
--- a/doc/docs/examples.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Examples
-========
-
-.. toctree::
-
-   examples/index

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
deleted file mode 100644
index 2f6352e..0000000
--- a/doc/docs/index.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-English
-=======
-
-.. toctree::
-
-   installation
-   software_stack
-   device
-   tensor
-   examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/installation.md
----------------------------------------------------------------------
diff --git a/doc/docs/installation.md b/doc/docs/installation.md
deleted file mode 100755
index 8ab617f..0000000
--- a/doc/docs/installation.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# Building SINGA from source
-
-## Dependencies
-
-### Required
-* Google Protobuf (>=2.5)
-* BLAS (tested with OpenBLAS >=0.2.10)
-* CUDA (tested with 6.5, 7.0 and 7.5)
-* CUDNN (v4 and v5)
-* cmake (>=2.6)
-
-Users must install the above mandatory libraries.
-Currently CUDA and CUDNN are also mandatory, but it would become optional later.
-
-### Optional
-* Glog
-* OpenCV (tested with 2.4.8)
-* LMDB (tested with 0.9)
-
-
-## Instructions
-
-Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
-
-
-    $ git clone https://github.com/apache/incubator-singa.git
-    $ cd incubator-singa/
-    # switch to dev branch
-    $ git checkout dev
-
-
-If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
-which could be downloaded as
-
-    $ git submodule init
-    $ git submodule update
-
-
-### Linux OS
-
-GCC (>=4.8.1) is required to compile SINGA on Linux OS.
-In SINGA_ROOT, execute the following commands for compiling SINGA,
-
-    $ mkdir build && cd build
-    # generate Makefile for compilation
-    $ cmake ..
-    # compile SINGA
-    $ make
-
-Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
-
-    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
-    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
-
-You can use `ccmake ..` to configure the compilation options including using
-LMDB, GLOG, etc.
-
-After compiling SINGA, you can run the unit tests by
-
-    $ ./bin/test_singa
-
-You can see all the testing cases with testing results. If SINGA passes all
-tests, then you have successfully installed SINGA. Please proceed to try the examples!
-
-
-### MacOS
-
-
-### Windows

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/neural-net.md
----------------------------------------------------------------------
diff --git a/doc/docs/neural-net.md b/doc/docs/neural-net.md
deleted file mode 100644
index c10baf8..0000000
--- a/doc/docs/neural-net.md
+++ /dev/null
@@ -1,327 +0,0 @@
-# Neural Net
-
----
-
-`NeuralNet` in SINGA represents an instance of user's neural net model. As the
-neural net typically consists of a set of layers, `NeuralNet` comprises
-a set of unidirectionally connected [Layer](layer.html)s.
-This page describes how to convert an user's neural net into
-the configuration of `NeuralNet`.
-
-<img src="../_static/images/model-category.png" align="center" width="200px"/>
-<span><strong>Figure 1 - Categorization of popular deep learning models.</strong></span>
-
-## Net structure configuration
-
-Users configure the `NeuralNet` by listing all layers of the neural net and
-specifying each layer's source layer names. Popular deep learning models can be
-categorized as Figure 1. The subsequent sections give details for each
-category.
-
-### Feed-forward models
-
-<div align = "left">
-<img src="../_static/images/mlp-net.png" align="center" width="200px"/>
-<span><strong>Figure 2 - Net structure of a MLP model.</strong></span>
-</div>
-
-Feed-forward models, e.g., CNN and MLP, can easily get configured as their layer
-connections are undirected without circles. The
-configuration for the MLP model shown in Figure 1 is as follows,
-
-    net {
-      layer {
-        name : 'data"
-        type : kData
-      }
-      layer {
-        name : 'image"
-        type : kImage
-        srclayer: 'data'
-      }
-      layer {
-        name : 'label"
-        type : kLabel
-        srclayer: 'data'
-      }
-      layer {
-        name : 'hidden"
-        type : kHidden
-        srclayer: 'image'
-      }
-      layer {
-        name : 'softmax"
-        type : kSoftmaxLoss
-        srclayer: 'hidden'
-        srclayer: 'label'
-      }
-    }
-
-### Energy models
-
-<img src="../_static/images/rbm-rnn.png" align="center" width="500px"/>
-<span><strong>Figure 3 - Convert connections in RBM and RNN.</strong></span>
-
-
-For energy models including RBM, DBM,
-etc., their connections are undirected (i.e., Category B). To represent these models using
-`NeuralNet`, users can simply replace each connection with two directed
-connections, as shown in Figure 3a. In other words, for each pair of connected layers, their source
-layer field should include each other's name.
-The full [RBM example](rbm.html) has
-detailed neural net configuration for a RBM model, which looks like
-
-    net {
-      layer {
-        name : "vis"
-        type : kVisLayer
-        param {
-          name : "w1"
-        }
-        srclayer: "hid"
-      }
-      layer {
-        name : "hid"
-        type : kHidLayer
-        param {
-          name : "w2"
-          share_from: "w1"
-        }
-        srclayer: "vis"
-      }
-    }
-
-### RNN models
-
-For recurrent neural networks (RNN), users can remove the recurrent connections
-by unrolling the recurrent layer.  For example, in Figure 3b, the original
-layer is unrolled into a new layer with 4 internal layers. In this way, the
-model is like a normal feed-forward model, thus can be configured similarly.
-The [RNN example](rnn.html) has a full neural net
-configuration for a RNN model.
-
-
-## Configuration for multiple nets
-
-Typically, a training job includes three neural nets for
-training, validation and test phase respectively. The three neural nets share most
-layers except the data layer, loss layer or output layer, etc..  To avoid
-redundant configurations for the shared layers, users can uses the `exclude`
-filed to filter a layer in the neural net, e.g., the following layer will be
-filtered when creating the testing `NeuralNet`.
-
-
-    layer {
-      ...
-      exclude : kTest # filter this layer for creating test net
-    }
-
-
-
-## Neural net partitioning
-
-A neural net can be partitioned in different ways to distribute the training
-over multiple workers.
-
-### Batch and feature dimension
-
-<img src="../_static/images/partition_fc.png" align="center" width="400px"/>
-<span><strong>Figure 4 - Partitioning of a fully connected layer.</strong></span>
-
-
-Every layer's feature blob is considered a matrix whose rows are feature
-vectors. Thus, one layer can be split on two dimensions. Partitioning on
-dimension 0 (also called batch dimension) slices the feature matrix by rows.
-For instance, if the mini-batch size is 256 and the layer is partitioned into 2
-sub-layers, each sub-layer would have 128 feature vectors in its feature blob.
-Partitioning on this dimension has no effect on the parameters, as every
-[Param](param.html) object is replicated in the sub-layers. Partitioning on dimension
-1 (also called feature dimension) slices the feature matrix by columns. For
-example, suppose the original feature vector has 50 units, after partitioning
-into 2 sub-layers, each sub-layer would have 25 units. This partitioning may
-result in [Param](param.html) object being split, as shown in
-Figure 4. Both the bias vector and weight matrix are
-partitioned into two sub-layers.
-
-
-### Partitioning configuration
-
-There are 4 partitioning schemes, whose configurations are give below,
-
-  1. Partitioning each singe layer into sub-layers on batch dimension (see
-  below). It is enabled by configuring the partition dimension of the layer to
-  0, e.g.,
-
-          # with other fields omitted
-          layer {
-            partition_dim: 0
-          }
-
-  2. Partitioning each singe layer into sub-layers on feature dimension (see
-  below).  It is enabled by configuring the partition dimension of the layer to
-  1, e.g.,
-
-          # with other fields omitted
-          layer {
-            partition_dim: 1
-          }
-
-  3. Partitioning all layers into different subsets. It is enabled by
-  configuring the location ID of a layer, e.g.,
-
-          # with other fields omitted
-          layer {
-            location: 1
-          }
-          layer {
-            location: 0
-          }
-
-
-  4. Hybrid partitioning of strategy 1, 2 and 3. The hybrid partitioning is
-  useful for large models. An example application is to implement the
-  [idea proposed by Alex](http://arxiv.org/abs/1404.5997).
-  Hybrid partitioning is configured like,
-
-          # with other fields omitted
-          layer {
-            location: 1
-          }
-          layer {
-            location: 0
-          }
-          layer {
-            partition_dim: 0
-            location: 0
-          }
-          layer {
-            partition_dim: 1
-            location: 0
-          }
-
-Currently SINGA supports strategy-2 well. Other partitioning strategies are
-are under test and will be released in later version.
-
-## Parameter sharing
-
-Parameters can be shared in two cases,
-
-  * sharing parameters among layers via user configuration. For example, the
-  visible layer and hidden layer of a RBM shares the weight matrix, which is configured through
-  the `share_from` field as shown in the above RBM configuration. The
-  configurations must be the same (except name) for shared parameters.
-
-  * due to neural net partitioning, some `Param` objects are replicated into
-  different workers, e.g., partitioning one layer on batch dimension. These
-  workers share parameter values. SINGA controls this kind of parameter
-  sharing automatically, users do not need to do any configuration.
-
-  * the `NeuralNet` for training and testing (and validation) share most layers
-  , thus share `Param` values.
-
-If the shared `Param` instances resident in the same process (may in different
-threads), they use the same chunk of memory space for their values. But they
-would have different memory spaces for their gradients. In fact, their
-gradients will be averaged by the stub or server.
-
-## Advanced user guide
-
-### Creation
-
-    static NeuralNet* NeuralNet::Create(const NetProto& np, Phase phase, int num);
-
-The above function creates a `NeuralNet` for a given phase, and returns a
-pointer to the `NeuralNet` instance. The phase is in {kTrain,
-kValidation, kTest}. `num` is used for net partitioning which indicates the
-number of partitions.  Typically, a training job includes three neural nets for
-training, validation and test phase respectively. The three neural nets share most
-layers except the data layer, loss layer or output layer, etc.. The `Create`
-function takes in the full net configuration including layers for training,
-validation and test.  It removes layers for phases other than the specified
-phase based on the `exclude` field in
-[layer configuration](layer.html):
-
-    layer {
-      ...
-      exclude : kTest # filter this layer for creating test net
-    }
-
-The filtered net configuration is passed to the constructor of `NeuralNet`:
-
-    NeuralNet::NeuralNet(NetProto netproto, int npartitions);
-
-The constructor creates a graph representing the net structure firstly in
-
-    Graph* NeuralNet::CreateGraph(const NetProto& netproto, int npartitions);
-
-Next, it creates a layer for each node and connects layers if their nodes are
-connected.
-
-    void NeuralNet::CreateNetFromGraph(Graph* graph, int npartitions);
-
-Since the `NeuralNet` instance may be shared among multiple workers, the
-`Create` function returns a pointer to the `NeuralNet` instance .
-
-### Parameter sharing
-
- `Param` sharing
-is enabled by first sharing the Param configuration (in `NeuralNet::Create`)
-to create two similar (e.g., the same shape) Param objects, and then calling
-(in `NeuralNet::CreateNetFromGraph`),
-
-    void Param::ShareFrom(const Param& from);
-
-It is also possible to share `Param`s of two nets, e.g., sharing parameters of
-the training net and the test net,
-
-    void NeuralNet:ShareParamsFrom(NeuralNet* other);
-
-It will call `Param::ShareFrom` for each Param object.
-
-### Access functions
-`NeuralNet` provides a couple of access function to get the layers and params
-of the net:
-
-    const std::vector<Layer*>& layers() const;
-    const std::vector<Param*>& params() const ;
-    Layer* name2layer(string name) const;
-    Param* paramid2param(int id) const;
-
-
-### Partitioning
-
-
-#### Implementation
-
-SINGA partitions the neural net in `CreateGraph` function, which creates one
-node for each (partitioned) layer. For example, if one layer's partition
-dimension is 0 or 1, then it creates `npartition` nodes for it; if the
-partition dimension is -1, a single node is created, i.e., no partitioning.
-Each node is assigned a partition (or location) ID. If the original layer is
-configured with a location ID, then the ID is assigned to each newly created node.
-These nodes are connected according to the connections of the original layers.
-Some connection layers will be added automatically.
-For instance, if two connected sub-layers are located at two
-different workers, then a pair of bridge layers is inserted to transfer the
-feature (and gradient) blob between them. When two layers are partitioned on
-different dimensions, a concatenation layer which concatenates feature rows (or
-columns) and a slice layer which slices feature rows (or columns) would be
-inserted. These connection layers help making the network communication and
-synchronization transparent to the users.
-
-#### Dispatching partitions to workers
-
-Each (partitioned) layer is assigned a location ID, based on which it is dispatched to one
-worker. Particularly, the pointer to the `NeuralNet` instance is passed
-to every worker within the same group, but each worker only computes over the
-layers that have the same partition (or location) ID as the worker's ID.  When
-every worker computes the gradients of the entire model parameters
-(strategy-2), we refer to this process as data parallelism.  When different
-workers compute the gradients of different parameters (strategy-3 or
-strategy-1), we call this process model parallelism.  The hybrid partitioning
-leads to hybrid parallelism where some workers compute the gradients of the
-same subset of model parameters while other workers compute on different model
-parameters.  For example, to implement the hybrid parallelism in for the
-[DCNN model](http://arxiv.org/abs/1404.5997), we set `partition_dim = 0` for
-lower layers and `partition_dim = 1` for higher layers.
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/overview.rst
----------------------------------------------------------------------
diff --git a/doc/docs/overview.rst b/doc/docs/overview.rst
deleted file mode 100644
index 18ad62b..0000000
--- a/doc/docs/overview.rst
+++ /dev/null
@@ -1,99 +0,0 @@
-Introduction
-==============
-
-
-SINGA is a general distributed deep learning platform for training big deep
-learning models over large datasets. It is designed with an intuitive
-programming model based on the layer abstraction. A variety
-of popular deep learning models are supported, namely feed-forward models including
-convolutional neural networks (CNN), energy models like restricted Boltzmann
-machine (RBM), and recurrent neural networks (RNN). Many built-in layers are
-provided for users. SINGA architecture is
-sufficiently flexible to run synchronous, asynchronous and hybrid training
-frameworks.  SINGA
-also supports different neural net partitioning schemes to parallelize the
-training of large models, namely partitioning on batch dimension, feature
-dimension or hybrid partitioning.
-
-
-Goals
------
-
-As a distributed system, the first goal of SINGA is to have good scalability. In other
-words, SINGA is expected to reduce the total training time to achieve certain
-accuracy with more computing resources (i.e., machines).
-
-
-The second goal is to make SINGA easy to use.
-It is non-trivial for programmers to develop and train models with deep and
-complex model structures.  Distributed training further increases the burden of
-programmers, e.g., data and model partitioning, and network communication.  Hence it is essential to
-provide an easy to use programming model so that users can implement their deep
-learning models/algorithms without much awareness of the underlying distributed
-platform.
-
-Principles
-----------
-
-Scalability is a challenging research problem for distributed deep learning
-training. SINGA provides a general architecture to exploit the scalability of
-different training frameworks. Synchronous training frameworks improve the
-efficiency of one training iteration, and
-asynchronous training frameworks improve the convergence rate. Given a fixed budget
-(e.g., cluster size), users can run a hybrid framework that maximizes the
-scalability by trading off between efficiency and convergence rate.
-
-SINGA comes with a programming model designed based on the layer abstraction, which
-is intuitive for deep learning models.  A variety of
-popular deep learning models can be expressed and trained using this programming model.
-
-System overview
----------------
-
-.. figure:: /image/sgd.png
-
-            Figure 1 - SGD flow
-
-Training a deep learning model is to find the optimal parameters involved in
-the transformation functions that generate good features for specific tasks.
-The goodness of a set of parameters is measured by a loss function, e.g.,
-`Cross-Entropy Loss <https://en.wikipedia.org/wiki/Cross_entropy>`_ . Since the
-loss functions are usually non-linear and non-convex, it is difficult to get a
-closed form solution. Typically, people use the stochastic gradient descent
-(SGD) algorithm, which randomly
-initializes the parameters and then iteratively updates them to reduce the loss
-as shown in Figure 1.
-
-.. figure:: /image/overview.png
-
-           Figure 2 - SINGA overview
-
-SGD is used in SINGA to train
-parameters of deep learning models. The training workload is distributed over
-worker and server units as shown in Figure 2. In each
-iteration, every worker calls *TrainOneBatch* function to compute
-parameter gradients. *TrainOneBatch* takes a *NeuralNet* object
-representing the neural net, and visits layers of the *NeuralNet* in
-certain order. The resultant gradients are sent to the local stub that
-aggregates the requests and forwards them to corresponding servers for
-updating. Servers reply to workers with the updated parameters for the next
-iteration.
-
-
-Job submission
---------------
-
-To submit a job in SINGA (i.e., training a deep learning model),
-users pass the job configuration to SINGA driver in the
-`main function <programming-guide.html>`_ . The job configuration
-specifies the four major components in Figure 2,
-
-  * a `NeuralNet <neural-net.html>`_ describing the neural net structure with the detailed layer setting and their connections;
-  * a `TrainOneBatch <train-one-batch.html>`_  algorithm which is tailored for different model categories;
-  * an `Updater <updater.html>`_  defining the protocol for updating parameters at the server side;
-  * a `Cluster Topology <distributed-training.html>`_ specifying the distributed architecture of workers and servers.
-
-This process is like the job submission in Hadoop, where users configure their
-jobs in the main function to set the mapper, reducer, etc.
-In Hadoop, users can configure their jobs with their own (or built-in) mapper and reducer; in SINGA, users
-can configure their jobs with their own (or built-in) layer, updater, etc.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/software_stack.md
----------------------------------------------------------------------
diff --git a/doc/docs/software_stack.md b/doc/docs/software_stack.md
deleted file mode 100644
index c60b6a5..0000000
--- a/doc/docs/software_stack.md
+++ /dev/null
@@ -1,99 +0,0 @@
-# Software Stack
-
-SINGA's software stack includes three major components, namely, core, IO and
-model. Figure 1 illustrates these components together with the hardware.
-The core component provides memory management and tensor operations;
-IO has classes for reading (and writing) data from (to) disk and network; The
-model component provides data structures and algorithms for machine learning models,
-e.g., layers for neural network models, optimizers/initializer/metric/loss for
-general machine learning models.
-
-
-<img src="../_static/images/singav1-sw.png" align="center" width="500px"/>
-<br/>
-<span><strong>Figure 1 - SINGA V1 software stack.</strong></span>
-
-## Core
-
-[Tensor](tensor.html) and [Device](device.html) are two core abstractions in SINGA. Tensor class represents a
-multi-dimensional array, which stores model variables and provides linear algebra
-operations for machine learning
-algorithms, including matrix multiplication and random functions. Each tensor
-instance (i.e. a tensor) is allocated on a Device instance.
-Each Device instance (i.e. a device) is created against one hardware device,
-e.g. a GPU card or a CPU core. Devices manage the memory of tensors and execute
-tensor operations on its execution units, e.g. CPU threads or CUDA streams.
-
-Depending on the hardware and the programming language, SINGA have implemented
-the following specific device classes:
-
-* **CudaGPU** represents an Nvidia GPU card. The execution units are the CUDA streams.
-* **CppCPU** represents a normal CPU. The execution units are the CPU threads.
-* **OpenclGPU** represents normal GPU card from both Nvidia and AMD.
-  The execution units are the CommandQueues. Given that OpenCL is compatible with
-  many hardware devices, e.g. FPGA and ARM, the OpenclGPU has the potential to be
-  extended for other devices.
-
-Different types of devices use different programming languages to write the kernel
-functions for tensor operations,
-
-* CppMath (tensor_math_cpp.h) implements the tensor operations using Cpp for CppCPU
-* CudaMath (tensor_math_cuda.h) implements the tensor operations using CUDA for CudaGPU
-* OpenclMath (tensor_math_opencl.h) implements the tensor operations using OpenCL for OpenclGPU
-
-In addition, different types of data, such as float32 and float16, could be supported by adding
-the corresponding tensor functions.
-
-Typically, users would create a device instance and pass it to create multiple
-tensor instances. When users call the Tensor functions, these function would invoke
-the corresponding implementation (CppMath/CudaMath/OpenclMath) automatically. In
-other words, the implementation of Tensor operations is transparent to users.
-
-Most machine learning algorithms could be expressed using (dense or sparse) tensors.
-Therefore, with the Tensor abstraction, SINGA would be able to run a wide range of models,
-including deep learning models and other traditional machine learning models.
-
-The Tensor and Device abstractions are extensible to support a wide range of hardware device
-using different programming languages. A new hardware device would be supported by
-adding a new Device subclass and the corresponding implementation of the Tensor
-operations (xxxMath).
-
-Optimizations in terms of speed and memory could be implemented by Device, which
-manages both operation execution and memory malloc/free. More optimization details
-would be described in the [Device page](device.html).
-
-
-## Model
-
-On top of the Tensor and Device abstractions, SINGA provides some higher level
-classes for machine learning modules.
-
-* [Layer](layer.html) and its subclasses are specific for neural networks. Every layer provides
-  functions for forward propagating features and backward propagating gradients w.r.t the training loss functions.
-  They wraps the complex layer operations so that users can easily create neural nets
-  by connecting a set of layers.
-
-* [Initializer](initializer.html) and its subclasses provide variant methods of initializing
-  model parameters (stored in Tensor instances), following Uniform, Gaussian, etc.
-
-* [Loss](loss.html) and its subclasses defines the training objective loss functions.
-  Both functions of computing the loss values and computing the gradient of the prediction w.r.t the
-  objective loss are implemented. Example loss functions include squared error and cross entropy.
-
-* [Metric](metric.html) and its subclasses provide the function to measure the
-  performance of the model, e.g., the accuracy.
-
-* [Optimizer](optimizer.html) and its subclasses implement the methods for updating
-  model parameter values using parameter gradients, including SGD, AdaGrad, RMSProp etc.
-
-
-## IO
-
-The IO module consists of classes for data loading, data preprocessing and message passing.
-
-* Reader and its subclasses load string records from disk files
-* Writer and its subclasses write string records to disk files
-* Encoder and its subclasses encode Tensor instances into string records
-* Decoder and its subclasses decodes string records into Tensor instances
-* Endpoint represents a communication endpoint which provides functions for passing messages to each other.
-* Message represents communication messages between Endpoint instances. It carries both meta data and payload.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.rst b/doc/docs/tensor.rst
deleted file mode 100644
index 87d26ea..0000000
--- a/doc/docs/tensor.rst
+++ /dev/null
@@ -1,54 +0,0 @@
-Tensor
-========
-
-Each Tensor instance is a multi-dimensional array allocated on a specific
-Device instance. Tensor instances store variables and provide
-linear algebra operations over different types of hardware devices without user
-awareness. Note that users need to make sure the tensor operands are
-allocated on the same device except copy functions.
-
-
-Tensor implementation
----------------------
-
-SINGA has three different sets of implmentations of Tensor functions, one for each
-type of Device.
-
-* 'tensor_math_cpp.h' implements operations using Cpp (with CBLAS) for CppGPU devices.
-* 'tensor_math_cuda.h' implements operations using Cuda (with cuBLAS) for CudaGPU devices.
-* 'tensor_math_opencl.h' implements operations using OpenCL for OpenclGPU devices.
-
-Python API
-----------
-
-There are two set of tensor functions,
-1. Tensor member functions, which would change the internal state of the Tensor instance.
-2. tensor module functions, which accepts Tensor instances as arguments and return
-Tensor instances.
-
-
-Create Tensor instances
-~~~~~~~~~~~~~~~~~~~~~~~
-
-.. autoclass:: singa.tensor.Tensor
-
-
-Tensor instances can be constructed from Numpy array,
-
-.. automodule:: singa.tensor
-   :members: from_numpy
-
-
-Set Tensor values
-~~~~~~~~~~~~~~~~~
-
-
-
-
-
-
-
-
-
-
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/docs/zh/index.md
----------------------------------------------------------------------
diff --git a/doc/docs/zh/index.md b/doc/docs/zh/index.md
deleted file mode 100644
index 4b49d5f..0000000
--- a/doc/docs/zh/index.md
+++ /dev/null
@@ -1,9 +0,0 @@
-SINGA \u4e2d\u6587\u6587\u6863
-==============
-
-.. toctree::
-
-   overview
-   installation_source
-   programming-guide
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/downloads.md
----------------------------------------------------------------------
diff --git a/doc/downloads.md b/doc/downloads.md
deleted file mode 100644
index 31e7274..0000000
--- a/doc/downloads.md
+++ /dev/null
@@ -1,67 +0,0 @@
-## Download SINGA
----
-
-* Latest code: please clone the dev branch from [Github](https://github.com/apache/incubator-singa)
-
-* v0.3.0 (20 April 2016):
-    * [Apache SINGA 0.3.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/0.3.0/apache-singa-incubating-0.3.0.tar.gz)
-      [\[MD5\]](https://dist.apache.org/repos/dist/release/incubator/singa/0.3.0/apache-singa-incubating-0.3.0.tar.gz.md5)
-      [\[KEYS\]](https://dist.apache.org/repos/dist/release/incubator/singa/0.3.0/KEYS)
-    * [Release Notes 0.3.0](releases/RELEASE_NOTES_0.3.0.html)
-    * New features and major updates,
-        * [Training on GPU cluster](v0.3.0/gpu.html) enables training of deep learning models over a GPU cluster.
-        * [Python wrapper improvement](v0.3.0/python.html) makes it easy to configure the job, including neural net and SGD algorithm.
-        * [New SGD updaters](v0.3.0/updater.html) are added, including Adam, AdaDelta and AdaMax.
-        * [Installation](v0.3.0/installation.html) has fewer dependent libraries for single node training.
-        * Heterogeneous training with CPU and GPU.
-        * Support cuDNN V4.
-        * Data prefetching.
-        * Fix some bugs.
-
-
-
-* v0.2.0 (14 January 2016):
-    * [Apache SINGA 0.2.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/0.2.0/apache-singa-incubating-0.2.0.tar.gz)
-      [\[MD5\]](https://archive.apache.org/dist/incubator/singa/0.2.0/apache-singa-incubating-0.2.0.tar.gz.md5)
-      [\[KEYS\]](https://archive.apache.org/dist/incubator/singa/0.2.0/KEYS)
-    * [Release Notes 0.2.0](releases/RELEASE_NOTES_0.2.0.html)
-    * New features and major updates,
-        * [Training on GPU](v0.2.0/gpu.html) enables training of complex models on a single node with multiple GPU cards.
-        * [Hybrid neural net partitioning](v0.2.0/hybrid.html) supports data and model parallelism at the same time.
-        * [Python wrapper](v0.2.0/python.html) makes it easy to configure the job, including neural net and SGD algorithm.
-        * [RNN model and BPTT algorithm](v0.2.0/general-rnn.html) are implemented to support applications based on RNN models, e.g., GRU.
-        * [Cloud software integration](v0.2.0/distributed-training.html) includes Mesos, Docker and HDFS.
-        * Visualization of neural net structure and layer information, which is helpful for debugging.
-        * Linear algebra functions and random functions against Blobs and raw data pointers.
-        * New layers, including SoftmaxLayer, ArgSortLayer, DummyLayer, RNN layers and cuDNN layers.
-        * Update Layer class to carry multiple data/grad Blobs.
-        * Extract features and test performance for new data by loading previously trained model parameters.
-        * Add Store class for IO operations.
-
-
-* v0.1.0 (8 October 2015):
-    * [Apache SINGA 0.1.0](http://www.apache.org/dyn/closer.cgi/incubator/singa/apache-singa-incubating-0.1.0.tar.gz)
-      [\[MD5\]](https://archive.apache.org/dist/incubator/singa/apache-singa-incubating-0.1.0.tar.gz.md5)
-      [\[KEYS\]](https://archive.apache.org/dist/incubator/singa/KEYS)
-    * [Amazon EC2 image](https://console.aws.amazon.com/ec2/v2/home?region=ap-southeast-1#LaunchInstanceWizard:ami=ami-b41001e6)
-    * [Release Notes 0.1.0](releases/RELEASE_NOTES_0.1.0.html)
-    * Major features include,
-        * Installation using GNU build utility
-        * Scripts for job management with zookeeper
-        * Programming model based on NeuralNet and Layer abstractions.
-        * System architecture based on Worker, Server and Stub.
-        * Training models from three different model categories, namely, feed-forward models, energy models and RNN models.
-        * Synchronous and asynchronous distributed training frameworks using CPU
-        * Checkpoint and restore
-        * Unit test using gtest
-
-**Disclaimer**
-
-Apache SINGA is an effort undergoing incubation at The Apache Software
-Foundation (ASF), sponsored by the name of Apache Incubator PMC. Incubation is
-required of all newly accepted projects until a further review indicates that
-the infrastructure, communications, and decision making process have stabilized
-in a manner consistent with other successful ASF projects. While incubation
-status is not necessarily a reflection of the completeness or stability of the
-code, it does indicate that the project has yet to be fully endorsed by the
-ASF.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/_templates/layout.html
----------------------------------------------------------------------
diff --git a/doc/en/_templates/layout.html b/doc/en/_templates/layout.html
new file mode 100755
index 0000000..590e578
--- /dev/null
+++ b/doc/en/_templates/layout.html
@@ -0,0 +1,61 @@
+{#
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+#}
+{% extends "!layout.html" %}
+     
+{% block extrahead %}
+    <link href="{{ pathto("_static/style.css", True) }}" rel="stylesheet" type="text/css">
+{% endblock %}
+
+{% block footer %}
+
+<div class="rst-versions shift-up" data-toggle="rst-versions" role="note" aria-label="versions">
+<a href="http://incubator.apache.org/">
+<img src= "{{pathto('_static/'+ 'apache.jpg' , 1) }}">  
+</a>
+
+  <span class="rst-current-version" data-toggle="rst-current-version">
+    <span class="fa fa-book"> incubator-singa </span>
+    v: {{ version }}
+    <span class="fa fa-caret-down"></span>
+  </span>
+  <div class="rst-other-versions">
+    <dl>
+       <dd><a href="">English</a></dd>
+       <dd><a href="{{pathto('zh/'+ 'index.html' , 1) }}">\u4e2d\u6587</a></dd>	  
+	  <!--dd><a href="/jp/latest/">\u65e5\u672c\u8a9e</a></dd>
+	  <dd><a href="/kr/latest/">\ud55c\uad6d\uc5b4</a></dd>
+	  <dd><a href="/it/latest/">Italiano</a></dd>
+	  <dd><a href="/ar/latest/">\u0627\u0644\u0639\u0631\u0628\u064a\u0629</a></dd-->
+    </dl>
+    </dl>
+    <dl>
+      <dt>Versions</dt>
+      <dd><a href="/{{ language }}/latest/">latest</a></dd>
+      <dd><a href="/{{ language }}/0.3.0/">v0.3.0</a></dd>
+    </dl>
+  </div>
+</div>
+
+ <a href="https://github.com/apache/incubator-singa">
+    <img style="position: absolute; top: 0; right: 0; border: 0; z-index: 10000;"
+        src="https://s3.amazonaws.com/github/ribbons/forkme_right_orange_ff7600.png"
+        alt="Fork me on GitHub">
+</a>
+
+{{ super() }}
+{% endblock %}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/31ae6bd4/doc/en/community/issue-tracking.md
----------------------------------------------------------------------
diff --git a/doc/en/community/issue-tracking.md b/doc/en/community/issue-tracking.md
new file mode 100644
index 0000000..26b23dd
--- /dev/null
+++ b/doc/en/community/issue-tracking.md
@@ -0,0 +1,9 @@
+## Issue Tracking
+
+___
+
+SINGA uses [JIRA](https://www.atlassian.com/software/jira) a J2EE-based, issue tracking and project management application.
+
+Issues, bugs, and feature requests should be submitted to the following issue tracking system for this project.
+
+* https://issues.apache.org/jira/browse/singa



[03/22] incubator-singa git commit: SINGA-237 New documentation files for SINGA v1.0

Posted by wa...@apache.org.
SINGA-237 New documentation files for SINGA v1.0

Updated the comments of python files for autodoc to generate python APIs by Sphinx.

Fixed a bug in optimizer which ignored the momentum value


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8cd55300
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8cd55300
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8cd55300

Branch: refs/heads/dev
Commit: 8cd55300ab30673414bbeeec7d68f1ddcd6393a2
Parents: 3299b0c
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Fri Aug 12 14:45:41 2016 +0800
Committer: wangwei <wa...@gmail.com>
Committed: Sun Aug 14 13:47:04 2016 +0800

----------------------------------------------------------------------
 cmake/Dependencies.cmake        |   5 +-
 doc/conf.py                     |  14 +-
 doc/docs/device.rst             |  29 +-
 doc/docs/index.rst              |   6 +
 doc/docs/initializer.rst        |  12 +
 doc/docs/layer.rst              |  14 +
 doc/docs/loss.rst               |   7 +
 doc/docs/metric.rst             |   8 +
 doc/docs/optimizer.rst          |  11 +
 doc/docs/tensor.md              |   7 -
 doc/docs/tensor.rst             |  30 ++
 doc/docs/utils.rst              |   6 +
 doc/index.rst                   |  28 +-
 examples/index.rst              |   6 +
 src/python/singa/device.py      |  31 ++
 src/python/singa/initializer.py |  86 ++++-
 src/python/singa/layer.py       | 417 ++++++++++++++----------
 src/python/singa/loss.py        | 105 +++++-
 src/python/singa/metric.py      |  49 ++-
 src/python/singa/optimizer.py   | 284 ++++++++--------
 src/python/singa/tensor.py      | 608 ++++++++++++++++++++++++++++++-----
 21 files changed, 1331 insertions(+), 432 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/cmake/Dependencies.cmake
----------------------------------------------------------------------
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index ceef429..e533ca8 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -54,12 +54,13 @@ IF(USE_OPENCV)
     MESSAGE(STATUS "Found OpenCV_${OpenCV_VERSION} at ${OpenCV_INCLUDE_DIRS}")
     INCLUDE_DIRECTORIES(SYSTEM ${OpenCV_INCLUDE_DIRS})
     LIST(APPEND SINGA_LINKER_LIBS ${OpenCV_LIBRARIES})
-ENDIF()    
+ENDIF()
 
 #LIST(APPEND SINGA_LINKER_LIBS "/home/wangwei/local/lib/libopenblas.so")
 #MESSAGE(STATUS "link lib : " ${SINGA_LINKER_LIBS})
 
 IF(USE_PYTHON)
-    FIND_PACKAGE(PythonLibs REQUIRED)
+    FIND_PACKAGE(PythonLibs 2.7 REQUIRED)
+    FIND_PACKAGE(PythonInterp 2.7 REQUIRED)
     FIND_PACKAGE(SWIG 3.0 REQUIRED)
 ENDIF()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 20ba51a..9f52d16 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -19,7 +19,8 @@
 import os
 import sys
 sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, '../src/python/singa/')
+sys.path.insert(1, os.path.abspath('../build/python'))
+#autodoc_mock_imports = ['singa.device', 'singa.tensor', 'singa.layer']
 
 # -- General configuration ------------------------------------------------
 from recommonmark.parser import CommonMarkParser
@@ -35,9 +36,8 @@ source_parsers = {
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
-extensions = [
-'sphinx.ext.autodoc'
-]
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -50,7 +50,7 @@ source_suffix = ['.rst', '.md']
 
 # The encoding of source files.
 #
-# source_encoding = 'utf-8-sig'
+source_encoding = 'utf-8-sig'
 
 # The master toctree document.
 master_doc = 'index'
@@ -150,7 +150,7 @@ html_theme = 'sphinx_rtd_theme'
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
 #
-html_logo = '/singa.png'
+html_logo = 'image/singa.png'
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
@@ -203,7 +203,7 @@ html_static_path = ['_static']
 
 # If true, links to the reST sources are added to the pages.
 #
-html_show_sourcelink = False
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
 #

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
index aa5defb..53faf48 100644
--- a/doc/docs/device.rst
+++ b/doc/docs/device.rst
@@ -2,7 +2,10 @@ Device
 =======
 
 
-The Device abstract represent a hardware device with memory and compuation units.
+The Device abstract represents any hardware device with memory and compuation units.
+All [Tensor operations](tensor.html) are scheduled by the resident device for execution.
+Tensor memory is also managed by the device's memory manager. Therefore, optimization
+of memory and execution are implemented in the Device class.
 
 Specific devices
 ----------------
@@ -13,24 +16,14 @@ Currently, SINGA has three Device implmentations,
 3. OpenclGPU for a GPU card which runs OpenCL code
 
 
-Create devices
----------------
-
 Python API
-~~~~~~~~~~
-
-.. autofunction:: device.create_cuda_gpus
-
-.. autofunction:: device.create_cuda_gpus_on
-
-.. autofunction:: device.create_cuda_gpu_on
-
-.. autofunction:: device.get_default_device
+----------
 
+.. automodule:: singa.device
+   :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
 
-The following code shows how to create devices,
 
-.. code:: python
+The following code provides examples of creating devices::
 
    from singa import device
    cuda = device.create_cuda_gpu_on(0)  # use GPU card of ID 0
@@ -39,9 +32,5 @@ The following code shows how to create devices,
    ary2 = device.create_cuda_gpus([0,2])  # create 2 devices on ID 0 and 2
 
 
-
 CPP API
-~~~~~~~
-
-
-
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index 8a74976..2294054 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -6,4 +6,10 @@ English
    installation
    software_stack
    device
+   tensor
+   layer
+   initializer
+   loss
+   metric
+   optimizer
    examples

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/initializer.rst b/doc/docs/initializer.rst
new file mode 100644
index 0000000..a190702
--- /dev/null
+++ b/doc/docs/initializer.rst
@@ -0,0 +1,12 @@
+Initializer
+===========
+
+Python API
+----------
+
+.. automodule:: singa.initializer
+   :members:
+   :member-order: bysource
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/layer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/layer.rst b/doc/docs/layer.rst
new file mode 100644
index 0000000..62ef3c3
--- /dev/null
+++ b/doc/docs/layer.rst
@@ -0,0 +1,14 @@
+Layer
+======
+
+Python API
+-----------
+.. automodule:: singa.layer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+
+CPP API
+--------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/loss.rst
----------------------------------------------------------------------
diff --git a/doc/docs/loss.rst b/doc/docs/loss.rst
new file mode 100644
index 0000000..27872dd
--- /dev/null
+++ b/doc/docs/loss.rst
@@ -0,0 +1,7 @@
+Loss
+=========
+
+
+.. automodule:: singa.loss
+   :members:
+   :show-inheritance:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/metric.rst
----------------------------------------------------------------------
diff --git a/doc/docs/metric.rst b/doc/docs/metric.rst
new file mode 100644
index 0000000..35fa24e
--- /dev/null
+++ b/doc/docs/metric.rst
@@ -0,0 +1,8 @@
+Metric
+=========
+
+
+.. automodule:: singa.metric
+   :members:
+   :show-inheritance:
+   :member-order: bysource

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/optimizer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/optimizer.rst b/doc/docs/optimizer.rst
new file mode 100644
index 0000000..486c01e
--- /dev/null
+++ b/doc/docs/optimizer.rst
@@ -0,0 +1,11 @@
+Optimizer
+=========
+
+
+.. automodule:: singa.optimizer
+   :members:
+   :member-order: bysource
+   :show-inheritance:
+   :undoc-members:
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/tensor.md
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.md b/doc/docs/tensor.md
deleted file mode 100644
index eaf8362..0000000
--- a/doc/docs/tensor.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Tensor
-
-
-##
-
-
-##

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.rst b/doc/docs/tensor.rst
new file mode 100644
index 0000000..ff6142e
--- /dev/null
+++ b/doc/docs/tensor.rst
@@ -0,0 +1,30 @@
+Tensor
+========
+
+Each Tensor instance is a multi-dimensional array allocated on a specific
+Device instance. Tensor instances store variables and provide
+linear algebra operations over different types of hardware devices without user
+awareness. Note that users need to make sure the tensor operands are
+allocated on the same device except copy functions.
+
+
+Tensor implementation
+---------------------
+
+SINGA has three different sets of implmentations of Tensor functions, one for each
+type of Device.
+
+* 'tensor_math_cpp.h' implements operations using Cpp (with CBLAS) for CppGPU devices.
+* 'tensor_math_cuda.h' implements operations using Cuda (with cuBLAS) for CudaGPU devices.
+* 'tensor_math_opencl.h' implements operations using OpenCL for OpenclGPU devices.
+
+Python API
+----------
+
+
+.. automodule:: singa.tensor
+   :members:
+
+
+CPP API
+---------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/utils.rst
----------------------------------------------------------------------
diff --git a/doc/docs/utils.rst b/doc/docs/utils.rst
new file mode 100644
index 0000000..5306719
--- /dev/null
+++ b/doc/docs/utils.rst
@@ -0,0 +1,6 @@
+Misc.
+=========
+
+
+.. automodule:: singa.utils
+   :members:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/index.rst
----------------------------------------------------------------------
diff --git a/doc/index.rst b/doc/index.rst
index ec727b1..50c65d7 100755
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -7,9 +7,9 @@ Welcome to Apache Singa
 =======================
 
 Recent News
-===========
+-----------
 
-* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_ 
+* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
 
 * The **second release** is now available, 14 Jan, 2016. `Download SINGA v0.2.0 <downloads.html>`_.
 
@@ -34,7 +34,7 @@ Recent News
 * SINGA has been accepted by `Apache Incubator <http://incubator.apache.org/>`_, 17 March, 2015.
 
 Getting Started
-===============
+---------------
 * The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
 
 * The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
@@ -42,7 +42,7 @@ Getting Started
 * Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
 
 Documentation
-=============
+-------------
 
 * Documentations are listed `here <docs.html>`_.
 
@@ -51,8 +51,8 @@ Documentation
 * Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
 
 How to contribute
-=================
-  
+----------------------
+
 * Please subscribe to our development mailing list dev-subscribe@singa.incubator.apache.org.
 
 * If you find any issues using SINGA, please report it to the `Issue Tracker <https://issues.apache.org/jira/browse/singa>`_.
@@ -62,17 +62,17 @@ How to contribute
 More details on contributing to SINGA is described `here <develop/how-contribute.html>`_ .
 
 Citing SINGA
-============
+------------
 
 Please cite the following two papers if you use SINGA in your research:
 
 * B. C. Ooi, K.-L. Tan, S. Wang, W. Wang, Q. Cai, G. Chen, J. Gao, Z. Luo, A. K. H. Tung, Y. Wang, Z. Xie, M. Zhang, and K. Zheng. `SINGA: A distributed deep learning platform <http://www.comp.nus.edu.sg/~ooibc/singaopen-mm15.pdf>`_. ACM Multimedia (Open Source Software Competition) 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-oss.txt>`_).
 
-* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_). 
+* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
 
 .. toctree::
    :hidden:
-   
+
    downloads
    docs
 
@@ -85,25 +85,25 @@ Please cite the following two papers if you use SINGA in your research:
    develop/how-contribute
    develop/contribute-code
    develop/contribute-docs
-   
+
 .. toctree::
    :hidden:
    :maxdepth: 2
    :caption: Community
-   
+
    community/source-repository
    community/mail-lists
    community/issue-tracking
    community/team-list
-   
+
 
 
 License
-=======
+----------
 SINGA is released under `Apache License Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
 
 Disclaimers
-===========
+-----------
 
 Apache SINGA is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
new file mode 100644
index 0000000..d6faf5d
--- /dev/null
+++ b/examples/index.rst
@@ -0,0 +1,6 @@
+.. toctree::
+
+   char-rnn/README
+   imagenet/README
+
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
index aff3587..eff6783 100644
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@ -68,21 +68,52 @@ def device_query(id, verbose=False):
 
 
 def create_cuda_gpus(num):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        num (int): number of device to create.
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
+
     return singa.Platform.CreateCudaGPUs(num)
 
 
 def create_cuda_gpu():
+    '''Create a single CudaGPU device.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
+
     return singa.Platform.CreateCudaGPUs(1)[0]
 
 
 def create_cuda_gpus_on(device_ids):
+    '''Create a list of CudaGPU devices.
+
+    Args:
+        device_ids (list): a list of GPU card IDs.
+
+    Returns:
+        a list of swig converted CudaGPU devices.
+    '''
     return singa.Platform.CreateCudaGPUsOn(device_ids)
 
 
 def create_cuda_gpu_on(device_id):
+    '''Create a CudaGPU device on the given device ID.
+
+    Args:
+        device_id (int): GPU card ID.
+
+    Returns:
+        a swig converted CudaGPU device.
+    '''
     devices = create_cuda_gpus_on([device_id])
     return devices[0]
 
 
 def get_default_device():
+    '''Get the default host device which is a CppCPU device'''
     return singa.Platform.GetDefaultDevice()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/initializer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py
index 15caed3..277fd2f 100644
--- a/src/python/singa/initializer.py
+++ b/src/python/singa/initializer.py
@@ -15,29 +15,113 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-"""Popular initialization methods for parameter values (Tensor ojects)"""
+'''Popular initialization methods for parameter values (Tensor objects).
+
+Example usages::
+
+    from singa import tensor
+    from singa import initializer
+
+    x = tensor.Tensor((3, 5))
+    initializer.xavier(x)
+'''
 
 import math
 
 
+'''
+TODO(wangwei) update the uniform and gaussian initializers
+
+def uniform(t, fan_in=0, fan_out=0):
+    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
+    fan_out = nb_channel * kh * kw
+    for dense layer weight, fan_in = input_feature_length,
+    fan_out = output_feature_length
+    # Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
+    training deep feedforward neuralnetworks.
+
+    assert fan_in >0 or fan_out > 0, \
+        'fan_in and fan_out cannot be 0 at the same time'
+    avg = 1
+    if fan_in * fan_out == 0:
+      avg = 2
+    x = math.sqrt(3.0f * avg / (fan_in + fan_out))
+    t.uniform(-x, x)
+
+
+def gaussian(t, fan_in=0, fan_out=0):
+    typically, for conv layer weight: fan_in = nb_filter * kh * kw,
+    fan_out = nb_channel * kh * kw
+    for dense layer weight, fan_in = input_feature_length,
+    fan_out = output_feature_length
+
+    Ref Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: Delving Deep into
+    Rectifiers: Surpassing Human-Level Performance on ImageNet Classification
+
+    assert fan_in >0 or fan_out > 0, \
+        'fan_in and fan_out cannot be 0 at the same time'
+    avg = 1
+    if fan_in * fan_out == 0:
+      avg = 2
+    std = math.sqrt(2.0f * avg / (fan_in + fan_out))
+    t.gaussian(0, std)
+'''
+
+
 def uniform(t, low=0, high=1):
+    '''Initialize the parameter values following an Uniform distribution.
+
+    Args:
+        t (Tensor): the parater tensor
+        low (float): lower bound
+        high (float): higher bound
+    '''
     t.uniform(low, high)
 
 
 def gaussian(t, mean=0, std=0.01):
+    '''Initialize the parameter values following an Gaussian distribution.
+
+    Args:
+        t (Tensor): the parater tensor
+        mean (float): mean of the distribution
+        std (float): standard variance
+    '''
     t.gaussian(mean, std)
 
 
 def xavier(t):
+    '''Initialize the matrix parameter follow a Uniform distribution from
+    [-sqrt(6/(fan_in + fan_out)), sqrt(6/(fan_in + fan_out))].
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
+
     scale = math.sqrt(6.0 / (t.shape[0] + t.shape[1]))
     t.uniform(-scale, scale)
 
 
 def glorot(t):
+    '''Initialize the matrix parameter follow a Gaussian distribution with
+    mean = 0 and std = sqrt(2.0 / (nb_row + nb_col))
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
     scale = math.sqrt(2.0 / (t.shape[0] + t.shape[1]))
     t.gaussian(0, 1)
     t *= scale
 
 
 def msra(t):
+    '''Initialize the matrix parameter follow a Guassian distribution with
+    mean = 0, std = math.sqrt(2.0 / nb_row).
+
+    Ref [He, Zhang, Ren and Sun 2015]: Specifically accounts for ReLU
+    nonlinearities.
+
+    Args:
+        t (Tensor): the parater tensor
+    '''
     t.gaussian(0, math.sqrt(2.0 / t.shape[0]))

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index c8c8c05..0759716 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -14,7 +14,30 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-""" Python layers which wraps the C++ layers by providing easy to construct APIs
+""" Python layers wrap the C++ layers to provide simpler construction APIs.
+
+Example usages::
+
+    from singa import layer
+    from singa import tensor
+    from singa import device
+    from singa.model_pb2 import kTrain
+
+    layer.engine = 'cudnn'  # to use cudnn layers
+    dev = device.create_cuda_gpu()
+
+    # create a convolution layer
+    conv = layer.Conv2D('conv', 32, 3, 1, pad=1, input_sample_shape=(3, 32, 32))
+    conv.to_device(dev)  # move the layer data onto a CudaGPU device
+    x = tensor.Tensor((3, 32, 32), dev)
+    x.uniform(-1, 1)
+    y = conv.foward(kTrain, x)
+
+    dy = tensor.Tensor()
+    dy.reset_like(y)
+    dy.set_value(0.1)
+    # dp is a list of tensors for parameter gradients
+    dx, dp = conv.backward(kTrain, dy)
 """
 
 from sets import Set
@@ -22,23 +45,37 @@ from . import singa_wrap
 from .proto import model_pb2
 import tensor
 
-# engine could be 'cudnn', 'singa', which is used to create layers.
-# e.g., CudnnConvolution layer is identified by 'cudnn_convolution'
-# Convolution layer is identified by 'singa_convolution'
-# engine is case insensitive
+
 engine = 'cudnn'
+'''engine is the prefix of layer identifier.
+
+The value could be one of [**'cudnn', 'singacpp', 'singacuda', 'singacl'**], for
+layers implemented using the cudnn library, Cpp, Cuda and OpenCL respectively.
+For example, CudnnConvolution layer is identified by 'cudnn_convolution';
+'singacpp_convolution' is for Convolution layer;
+Some layers' implementation use only Tensor functions, thererfore they are
+transparent to the underlying devices. For threse layers, they would have
+multiple identifiers, e.g., singacpp_dropout, singacuda_dropout and
+singacl_dropout are all for the Dropout layer.
+
+engine is case insensitive. Each python layer would create the correct specific
+layer using the engine attribute.
+'''
 
 
 class Layer(object):
-    """Base Python layer class.
+    '''Base Python layer class.
 
-    Usages:
-        1.  construct layer without input_sample_shapes, goto 2;
-            construct layer with input_sample_shapes, goto 3;
+    Typically, the life cycle of a layer instance includes:
+        1. construct layer without input_sample_shapes, goto 2;
+           construct layer with input_sample_shapes, goto 3;
         2. call setup to create the parameters and setup other meta fields
         3. call forward or access layer members
         4. call backward and get parameters for update
-    """
+
+    Args:
+        name (str): layer name
+    '''
 
     def __init__(self, name, **kwargs):
         self.layer = None  # layer converted by swig
@@ -49,20 +86,24 @@ class Layer(object):
         self.has_setup = False
 
     def param_names(self):
+        '''
+        Returns:
+            a list of strings, one for the name of one parameter Tensor
+        '''
         names = []
         for x in self.param_specs:
             names.append(x['name'])
         return names
 
     def setup(self, in_shapes):
-        """Call the C++ setup function to create params and set some meta data.
+        '''Call the C++ setup function to create params and set some meta data.
 
         Args:
             in_shapes: if the layer accepts a single input Tensor, in_shapes is
                 a single tuple specifying the inpute Tensor shape; if the layer
                 accepts multiple input Tensor (e.g., the concatenation layer),
-                in_shapes is a tuple of tuples, each for one input Tensor shape
-        """
+                in_shapes is a tuple of tuples, each for one input Tensor
+        '''
         if self.has_setup:
             return
         self.layer.Setup(list(in_shapes),
@@ -70,54 +111,92 @@ class Layer(object):
         self.has_setup = True
 
     def get_output_sample_shape(self):
+        '''Called after setup to get the shape of the output sample(s).
+
+        Returns:
+            a tuple for a single output Tensor or a list of tuples if this layer
+            has multiple outputs
+        '''
         assert self.has_setup, \
             'Must call setup() before get_output_sample_shape()'
         return self.layer.GetOutputSampleShape()
 
     def param_values(self):
-        """Return param value tensors.
+        '''Return param value tensors.
 
-        Do not store these tensors as layer members because cpp Tensor could be
-        moved onto diff devices due to the change of layer device. However, the
-        py tensors would not update its internal cpp tensor automatically.
-        """
+        Parameter tensors are not stored as layer members because cpp Tensor
+        could be moved onto diff devices due to the change of layer device,
+        which would result in inconsistency.
+
+        Returns:
+            a list of tensors, one for each paramter
+        '''
         return tensor.from_raw_tensors(self.layer.param_values())
 
-    def forward(self, flag, input):
+    def forward(self, flag, x):
         '''Forward propagate through this layer.
 
         Args:
-            flag, kTrain or kEval
-            input, an input tensor
+            flag (int): kTrain or kEval
+            x (Tensor or list<Tensor>): an input tensor if the layer is
+                connected from a single layer; a list of tensors if the layer
+                is connected from multiple layers.
 
         Return:
-            a tensor for the transformed feature
+            a tensor if the layer is connected to a single layer; a list of
+            tensors if the layer is connected to multiple layers;
         '''
         assert self.has_setup, 'Must call setup() before forward()'
-        assert isinstance(input, tensor.Tensor), 'input must be py Tensor'
-        y = self.layer.Forward(flag, input.singa_tensor)
-        return tensor.from_raw_tensor(y)
+        if type(x) == list:
+            xs = []
+            for t in x:
+                x.append(t.singa_tensor)
+        else:
+            assert isinstance(input, tensor.Tensor), \
+                'input must be a Tensor or a list of Tensor'
+            xs = x
+        y = self.layer.Forward(flag, xs)
+        if type(y) == list:
+            return tensor.from_raw_tensors(y)
+        else:
+            return tensor.from_raw_tensor(y)
 
-    def backward(self, flag, grad):
-        '''Backward propagate through this layer.
+    def backward(self, flag, dy):
+        '''Backward propagate gradients through this layer.
 
         Args:
-            flag, for future use.
-            grad, gradient of the returned values of the forward function.
-
+            flag (int): for future use.
+            dy (Tensor or list<Tensor>): the gradient tensor(s) y w.r.t the
+                objective loss
         Return:
-            <dx, <dp1, dp2..>>, dx is the gradient of the input of the
-            forward function, dpi is the gradient of the i-th parameter
+            <dx, <dp1, dp2..>>, dx is a (set of) tensor(s) for the gradient of x
+            , dpi is the gradient of the i-th parameter
         '''
-        assert isinstance(grad, tensor.Tensor), 'grad must be py Tensor'
-        ret = self.layer.Backward(flag, grad.singa_tensor)
-        return tensor.from_raw_tensor(ret[0]), tensor.from_raw_tensors(ret[1])
+        if type(dy) == list:
+            dys = []
+            for t in dy:
+                dys.append(t.singa_tensor)
+        else:
+            assert isinstance(dy, tensor.Tensor), \
+                'the input must be a Tensor or a set of Tensor'
+            dys = dy.singa_tensor
+        ret = self.layer.Backward(flag, dys)
+        if type(ret[0]) == list:
+            dxs = tensor.from_raw_tensors(ret[0])
+        else:
+            dxs = tensor.from_raw_tensor(ret[0])
+        return dxs, tensor.from_raw_tensors(ret[1])
 
     def to_device(self, device):
+        '''Move layer state tensors onto the given device.
+
+        Args:
+            device: swig converted device, created using singa.device
+        '''
         self.layer.ToDevice(device)
 
     def as_type(self, dtype):
-        self.layer.AsType(dtype)
+        pass
 
     def __copy__(self):
         pass
@@ -127,43 +206,42 @@ class Layer(object):
 
 
 class Conv2D(Layer):
+    """Construct a layer for 2D convolution.
 
+    Args:
+        nb_kernels (int): num of the channels (kernels) of the input Tensor
+        kernel: an integer or a pair of integers for kernel height and width
+        stride: an integer or a pair of integers for stride height and width
+        border_mode (string): padding mode, case in-sensitive,
+            'valid' -> padding is 0 for height and width
+            'same' -> padding is half of the kernel (floor), the kernel must be
+            odd number.
+        cudnn_prefer (string): the preferred algorithm for cudnn convolution
+            which could be 'fatest', 'autotune', 'limited_workspace' and
+            'no_workspace'
+        data_format (string): either 'NCHW' or 'NHWC'
+        use_bias (bool): True or False
+        pad: an integer or a pair of integers for padding height and width
+        W_specs (dict): used to specify the weight matrix specs, fields
+            include,
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            TODO(wangwei) 'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        b_specs (dict): hyper-parameters for bias vector, similar as W_specs
+        name (string): layer name.
+        input_sample_shape: 3d tuple for the shape of the input Tensor
+            without the batchsize, e.g., (channel, height, width) or
+            (height, width, channel)
+    """
     def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same',
                  cudnn_prefer='fatest', data_format='NCHW',
                  use_bias=True, W_specs=None, b_specs=None,
                  pad=None, input_sample_shape=None):
-        """Construct a layer for 2D convolution.
-
-        Args:
-            nb_kernels (int): num of the channels (kernels) of the input Tensor
-            kernel: an integer or a pair of integers for kernel height and width
-            stride: an integer or a pair of integers for stride height and width
-            border_mode (string): padding mode, case in-sensitive,
-                'valid' -> padding is 0 for height and width
-                'same' -> padding is half of the kernel (floor),
-                    the kernel must be odd number.
-            cudnn_prefer (string): the preferred algorithm for cudnn convolution
-                which could be 'fatest', 'autotune', 'limited_workspace' and
-                'no_workspace'
-            data_format (string): either 'NCHW' or 'NHWC'
-            use_bias (bool): True or False
-            pad: an integer or a pair of integers for padding height and width
-            W_specs (dict): used to specify the weight matrix specs, fields
-                include,
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                TODO(wangwei) 'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            b_specs (dict): hyper-parameters for bias vector, similar as W_specs
-            name (string): layer name.
-            input_sample_shape: 3d tuple for the shape of the input Tensor
-                without the batchsize, e.g., (channel, height, width) or
-                (height, width, channel)
-        """
         super(Conv2D, self).__init__(name)
         assert data_format == 'NCHW', 'Not supported data format: %s ' \
             'only "NCHW" is enabled currently' % (data_format)
@@ -195,19 +273,19 @@ class Conv2D(Layer):
 
 
 class Conv1D(Conv2D):
+    """Construct a layer for 1D convolution.
+
+    Most of the args are the same as those for Conv2D except the kernel,
+    stride, pad, which is a scalar instead of a tuple.
+    input_sample_shape is a tuple with a single value for the input feature
+    length
+    """
 
     def __init__(self, name, nb_kernels, kernel=3, stride=1,
                  border_mode='same', cudnn_prefer='fatest',
                  use_bias=True, W_specs={'init': 'Xavier'},
                  b_specs={'init': 'Constant', 'value': 0}, pad=None,
                  input_sample_shape=None):
-        """Construct a layer for 1D convolution.
-
-        Most of the args are the same as those for Conv2D except the kernel,
-        stride, pad, which is a scalar instead of a tuple.
-        input_sample_shape is a tuple with a single value for the input feature
-        length
-        """
         pad = None
         if pad is not None:
             pad = (0, pad)
@@ -227,7 +305,15 @@ class Conv1D(Conv2D):
 
 
 class Pooling2D(Layer):
+    '''2D pooling layer providing max/avg pooling.
+
+    All args are the same as those for Conv2D, except the following one
 
+    Args:
+        mode: pooling type, model_pb2.PoolingConf.MAX or
+            model_pb2.PoolingConf.AVE
+
+    '''
     def __init__(self, name, mode, kernel=3, stride=2, border_mode='same',
                  pad=None, data_format='NCHW', input_sample_shape=None):
         super(Pooling2D, self).__init__(name)
@@ -312,28 +398,26 @@ class AvgPooling1D(AvgPooling2D):
 
 
 class BatchNormalization(Layer):
-    # TODO(wangwei) add mode and epsilon arguments
+    """Batch-normalization.
 
+    Args:
+        momentum (float): for running average mean and variance.
+        beta_specs (dict): dictionary includes the fields for the beta
+            param:
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        gamma_specs (dict): similar to beta_specs, but for the gamma param.
+        name (string): layer name
+        input_sample_shape (tuple): with at least one integer
+    """
     def __init__(self, name, momentum=0.9,
                  beta_specs=None, gamma_specs=None, input_sample_shape=None):
-        """Batch-normalization.
-
-        Args:
-            momentum (float): for running average mean and variance.
-            beta_specs (dict): dictionary includes the fields for the beta
-                param:
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            gamma_specs (dict): similar to beta_specs, but for the gamma param.
-            name (string): layer name
-            input_sample_shape (tuple): with at least one integer
-        """
         super(BatchNormalization, self).__init__(name)
         conf = self.conf.batchnorm_conf
         conf.factor = momentum
@@ -362,16 +446,17 @@ class BatchNormalization(Layer):
 
 
 class LRN(Layer):
+    """Local response normalization.
+
+    Args:
+        size (int): # of channels to be crossed
+            normalization.
+        mode (string): 'cross_channel'
+        input_sample_shape (tuple): 3d tuple, (channel, height, width)
+    """
+
     def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel',
                  k=1, input_sample_shape=None):
-        """Local response normalization.
-
-        Args:
-            size (int): # of channels to be crossed
-                normalization.
-            mode (string): 'cross_channel'
-            input_sample_shape (tuple): 3d tuple, (channel, height, width)
-        """
         super(LRN, self).__init__(name)
         conf = self.conf.lrn_conf
         conf.local_size = size
@@ -388,29 +473,28 @@ class LRN(Layer):
 
 
 class Dense(Layer):
+    """Apply linear/affine transformation, also called inner-product or
+    fully connected layer.
 
+    Args:
+        num_output (int): output feature length.
+        use_bias (bool): add a bias vector or not to the transformed feature
+        W_specs (dict): specs for the weight matrix
+            'name' for parameter name
+            'lr_mult' for learning rate multiplier
+            'decay_mult' for weight decay multiplier
+            'init' for init method, which could be 'gaussian', 'uniform',
+            'xavier' and ''
+            'std', 'mean', 'high', 'low' for corresponding init methods
+            'clamp' for gradient constraint, value is scalar
+            'regularizer' for regularization, currently support 'l2'
+        b_specs (dict): specs for the bias vector, same fields as W_specs.
+        W_transpose (bool): if true, output=x*W.T+b;
+        input_sample_shape (tuple): input feature length
+    """
     def __init__(self, name, num_output, use_bias=True,
                  W_specs=None, b_specs=None,
                  W_transpose=True, input_sample_shape=None):
-        """Apply linear/affine transformation, also called inner-product or
-        fully connected layer.
-
-        Args:
-            num_output (int): output feature length.
-            use_bias (bool): add a bias vector or not to the transformed feature
-            W_specs (dict): specs for the weight matrix
-                'name' for parameter name
-                'lr_mult' for learning rate multiplier
-                'decay_mult' for weight decay multiplier
-                'init' for init method, which could be 'gaussian', 'uniform',
-                'xavier' and ''
-                'std', 'mean', 'high', 'low' for corresponding init methods
-                'clamp' for gradient constraint, value is scalar
-                'regularizer' for regularization, currently support 'l2'
-            b_specs (dict): specs for the bias vector, same fields as W_specs.
-            W_transpose (bool): if true, output=x*W.T+b;
-            input_sample_shape (tuple): input feature length
-        """
         super(Dense, self).__init__(name)
         conf = self.conf.dense_conf
         conf.num_output = num_output
@@ -435,14 +519,14 @@ class Dense(Layer):
 
 
 class Dropout(Layer):
+    """Droput layer.
 
-    def __init__(self, name, p=0.5, input_sample_shape=None):
-        """Droput layer.
+    Args:
+        p (float): probability for dropping out the element, i.e., set to 0
+        name (string): layer name
+    """
 
-        Args:
-            p (float): probability for dropping out the element, i.e., set to 0
-            name (string): layer name
-        """
+    def __init__(self, name, p=0.5, input_sample_shape=None):
         super(Dropout, self).__init__(name)
         conf = self.conf.dropout_conf
         conf.dropout_ratio = p
@@ -456,15 +540,14 @@ class Dropout(Layer):
 
 
 class Activation(Layer):
+    """Activation layers.
 
+    Args:
+        name (string): layer name
+        mode (string): 'relu', 'sigmoid', or 'tanh'
+        input_sample_shape (tuple): shape of a single sample
+    """
     def __init__(self, name, mode='relu', input_sample_shape=None):
-        """Activation layers.
-
-        Args:
-            name (string): layer name
-            mode (string): 'relu', 'sigmoid', or 'tanh'
-            input_sample_shape (tuple): shape of a single sample
-        """
         super(Activation, self).__init__(name)
         self.conf.type = (engine + '_' + mode).lower()
         _check_engine(engine, ['cudnn', 'singa'])
@@ -474,15 +557,14 @@ class Activation(Layer):
 
 
 class Softmax(Layer):
+    """Apply softmax.
 
+    Args:
+        axis (int): reshape the input as a matrix with the dimension
+            [0,axis) as the row, the [axis, -1) as the column.
+        input_sample_shape (tuple): shape of a single sample
+    """
     def __init__(self, name, axis=1, input_sample_shape=None):
-        """Apply softmax.
-
-        Args:
-            axis (int): reshape the input as a matrix with the dimension
-                [0,axis) as the row, the [axis, -1) as the column.
-            input_sample_shape (tuple): shape of a single sample
-        """
         super(Softmax, self).__init__(name)
         # conf = self.conf.softmax_conf
         # conf.axis = axis
@@ -493,14 +575,14 @@ class Softmax(Layer):
 
 
 class Flatten(Layer):
+    """Reshape the input tensor into a matrix.
 
+    Args:
+        axis (int): reshape the input as a matrix with the dimension
+            [0,axis) as the row, the [axis, -1) as the column.
+        input_sample_shape (tuple): shape for a single sample
+    """
     def __init__(self, name, axis=1, input_sample_shape=None):
-        """Reshape the input tensor into a matrix.
-        Args:
-            axis (int): reshape the input as a matrix with the dimension
-                [0,axis) as the row, the [axis, -1) as the column.
-            input_sample_shape (tuple): shape for a single sample
-        """
         super(Flatten, self).__init__(name)
         conf = self.conf.flatten_conf
         conf.axis = axis
@@ -511,26 +593,27 @@ class Flatten(Layer):
 
 
 class RNN(Layer):
+    '''Recurrent layer with 4 types of units, namely lstm, gru, tanh and relu.
+
+    Args:
+        hidden_size: hidden feature size, the same for all stacks of layers.
+        rnn_mode: decides the rnn unit, which could be one of 'lstm', 'gru',
+            'tanh' and 'relu', refer to cudnn manual for each mode.
+        num_stacks: num of stacks of rnn layers. It is different to the
+            unrolling seqence length.
+        input_mode: 'linear' convert the input feature x by by a linear
+            transformation to get a feature vector of size hidden_size;
+            'skip' does nothing but requires the input feature size equals
+            hidden_size
+        bidirection: True for bidirectional RNN
+        param_specs: config for initializing the RNN parameters.
+        input_sample_shape: includes a single integer for the input sample
+            feature size.
+    '''
+
     def __init__(self, name, hidden_size, rnn_mode='lstm', dropout=0.0,
                  num_stacks=1, input_mode='linear', bidirectional=False,
                  param_specs=None, input_sample_shape=None):
-        '''Wrapper for singa::RNN class.
-
-        Args:
-            hidden_size, hidden feature size, the same for all stacks of layers.
-            rnn_mode, decides the rnn unit, which could be one of 'lstm', 'gru',
-                'tanh' and 'relu', refer to cudnn manual for each mode.
-            num_stacks, num of stacks of rnn layers. It is different to the
-                unrolling seqence length.
-            input_mode, 'linear' convert the input feature x by by a linear
-                transformation to get a feature vector of size hidden_size;
-                'skip' does nothing but requires the input feature size equals
-                hidden_size
-            bidirection, True for bidirectional RNN
-            param_specs, config for initializing the RNN parameters.
-            input_sample_shape, includes a single integer for the input sample
-                feature size.
-        '''
         super(RNN, self).__init__(name)
         conf = self.conf.rnn_conf
         assert hidden_size > 0, 'Hidden feature size must > 0'
@@ -605,7 +688,7 @@ class RNN(Layer):
 
         Returns:
             <dx1, dx2, ... dxn, dhx, dcx>, where dxi is the gradient tensor for
-            the i-th input, its shape is (batch_size,
+                the i-th input, its shape is (batch_size,
                 input_feature_length). dhx is the gradient for the initial
                 hidden state. dcx is the gradient for the initial cell state,
                 which is valid only for lstm.
@@ -741,5 +824,7 @@ def _construct_param_specs_from_dict(specs):
 
 
 def get_layer_list():
-    """ Return a list of strings reprensenting the all supported layers"""
+    """ Return a list of strings which include the identifiers (tags) of all
+    supported layers
+    """
     return singa_wrap.GetRegisteredLayers()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/loss.py
----------------------------------------------------------------------
diff --git a/src/python/singa/loss.py b/src/python/singa/loss.py
index acfb813..c88290b 100644
--- a/src/python/singa/loss.py
+++ b/src/python/singa/loss.py
@@ -15,32 +15,127 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+
+'''
+Loss module includes a set of training loss implmentations. Some are converted
+from C++ implementation, and the rest are implemented directly using python
+Tensor.
+
+Example usage::
+
+    from singa import tensor
+    from singa import loss
+    from singa.proto import model_pb2
+
+    x = tensor.Tensor((3, 5))
+    x.uniform(0, 1)  # randomly genearte the prediction activation
+    y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int))  # set the truth
+
+    f = loss.SoftmaxCrossEntropy()
+    l = f.forward(model_pb2.kTrain, x, y)  # l is tensor with 3 loss values
+    g = f.backward()  # g is a tensor containing all gradients of x w.r.t l
+'''
+
 
 from . import singa_wrap as singa
 import tensor
 
 
 class Loss(object):
+    '''Base loss class.
+
+    Subclasses that wrap the C++ loss classes can use the inherited foward,
+    backward, and evaluate functions of this base class. Other subclasses need
+    to override these functions
+    '''
 
     def __init__(self):
         self.swig_loss = None
 
     def forward(self, flag, x, y):
-        """Return a tensor of floats, one per sample"""
+        '''Compute the loss values.
+
+        Args:
+            flag (int): kTrain or kEval. If it is kTrain, then the backward
+                function must be called before calling forward again.
+            x (Tensor): the prediction Tensor
+            y (Tensor): the ground truch Tensor, x.shape[0] must = y.shape[0]
+
+        Returns:
+            a tensor of floats for the loss values, one per sample
+        '''
         return tensor.from_raw_tensor(
             self.swig_loss.Forward(flag, x.singa_tensor, y.singa_tensor))
 
     def backward(self):
-        """Return the grad of x w.r.t. the loss obj"""
+        '''
+        Returns:
+            the grad of x w.r.t. the loss
+        '''
         return tensor.from_raw_tensor(self.swig_loss.Backward())
 
-    def evaluate(self, flag, x, y):
-        """Return the averaged loss for all samples in x"""
+    def evaluate(self, flag, x, y):  # TODO(wangwei) remove flag
+        '''
+        Args:
+            flag (int): must be kEval, to be removed
+            x (Tensor): the prediction Tensor
+            y (Tensor): the ground truth Tnesor
+
+        Returns:
+            the averaged loss for all samples in x.
+        '''
         return self.swig_loss.Evaluate(flag, x.singa_tensor, y.singa_tensor)
 
 
 class SoftmaxCrossEntropy(Loss):
+    '''This loss function is a combination of SoftMax and Cross-Entropy loss.
+
+    It converts the inputs via SoftMax function and then
+    computes the cross-entropy loss against the ground truth values.
+    '''
 
     def __init__(self):
         self.swig_loss = singa.SoftmaxCrossEntropy()
+
+
+class SquaredError(Loss):
+    '''This loss evaluates the squared error between the prediction and the
+    truth values.
+
+    It is implemented using Python Tensor operations.
+    '''
+    def __init__(self):
+        super(Loss, SquaredError).__init__()
+        self.err = None
+
+    def forward(self, flag, x, y):
+        '''Compute the error as 0.5 * ||x-y||^2.
+
+        Args:
+            flag (int): kTrain or kEval; if kTrain, then the backward must be
+                called before calling forward again.
+            x (Tensor): the prediction Tensor
+            y (Tensor): the truth Tensor, an integer value per sample, whose
+                value is [0, x.shape[1])
+
+        Returns:
+            a Tensor with one error value per sample
+        '''
+        self.err = x - y
+        return 0.5 * tensor.squared(self.err)
+
+    def backward(self):
+        '''Compute the gradient of x w.r.t the error.
+
+        Returns:
+            x - y
+        '''
+        return self.err
+
+    def evaluate(self, flag, x, y):
+        '''Compuate the averaged error.
+
+        Returns:
+            a float value as the averaged error
+        '''
+        return tensor.sum(0.5 * tensor.squared(x - y)) / x.size()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/metric.py
----------------------------------------------------------------------
diff --git a/src/python/singa/metric.py b/src/python/singa/metric.py
index 31b6892..3a5750d 100644
--- a/src/python/singa/metric.py
+++ b/src/python/singa/metric.py
@@ -15,28 +15,71 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+'''This module includes a set of metric classes for evaluating the model's
+performance. The specific metric classes could be converted from C++
+implmentation or implemented directly using Python.
+
+
+Example usage::
+
+    from singa import tensor
+    from singa import metric
+
+    x = tensor.Tensor((3, 5))
+    x.uniform(0, 1)  # randomly genearte the prediction activation
+    x = tensor.SoftMax(x)  # normalize the prediction into probabilities
+    y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int))  # set the truth
+
+    f = metric.Accuracy()
+    acc = f.evaluate(x, y)  # averaged accuracy over all 3 samples in x
+
+'''
 
 from . import singa_wrap as singa
 import tensor
 
 
 class Metric(object):
+    '''Base metric class.
+
+    Subclasses that wrap the C++ loss classes can use the inherited foward,
+    and evaluate functions of this base class. Other subclasses need
+    to override these functions. Users need to feed in the **predictions** and
+    ground truth to get the metric values.
+    '''
 
     def __init__(self):
         self.swig_metric = None
 
     def forward(self, x, y):
-        """Return a tensor of floats, one per sample"""
+        '''Compute the metric for each sample.
+
+        Args:
+            x (Tensor): predictions, one row per sample
+            y (Tensor): ground truth values, one row per sample
+
+        Returns:
+            a tensor of floats, one per sample
+        '''
         return tensor.from_raw_tensor(
             self.swig_metric.Forward(x.singa_tensor, y.singa_tensor))
 
     def evaluate(self, x, y):
-        """Return the averaged metric for all samples in x"""
+        '''Compute the averaged metric over all samples.
+
+        Args:
+            x (Tensor): predictions, one row per sample
+            y (Tensor): ground truth values, one row per sample
+        Returns:
+            a float value for the averaged metric
+        '''
         return self.swig_metric.Evaluate(x.singa_tensor, y.singa_tensor)
 
 
 class Accuracy(Metric):
+    '''Compute the top one accuracy for singel label prediction tasks.
 
+    It calls the C++ functions to do the calculation.
+    '''
     def __init__(self):
         self.swig_metric = singa.Accuracy()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 503527f..5d38997 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -15,7 +15,22 @@
 # specific language governing permissions and limitations
 # under the License.
 # =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+'''This module includes a set of optimizers for updating model parameters.
+
+Example usage::
+
+  from singa import optimizer
+  from singa import tensor
+
+  sgd = optimizer.SGD(lr=0.01, momentum=0.9, weight_decay=1e-4)
+  p = tensor.Tensor((3,5))
+  p.uniform(-1, 1)
+  g = tensor.Tensor((3,5))
+  g.gaussian(0, 0.01)
+
+  sgd.apply(1, g, p, 'param')  # use the global lr=0.1 for epoch 1
+  sgd.apply_with_lr(2, 0.03, g, p, 'param')  # use lr=0.03 for epoch 2
+'''
 
 from . import singa_wrap as singa
 import tensor
@@ -23,53 +38,44 @@ from proto import model_pb2
 
 
 class Optimizer(object):
-    """Base python optimizer.
-
-    Usages:
-        1. construct the optimizer
-        2. (optional) register each parameter with its specs.
-        3. use the optimizer to update parameter values given parameter
-            gradients and other optional info
-    """
-
+    '''The base python optimizer class.
+
+    Typically, an optimizer is used as follows:
+
+    1. construct the optimizer
+    2. (optional) register each parameter with its specs.
+    3. use the optimizer to update parameter values given parameter
+        gradients and other optional info
+
+    The subclasses should override the apply_with_lr function to do the real
+    parameter udpate.
+
+    Args:
+        lr (float): a constant for the learning rate, mutually exclusive with
+            'lr_gen'.
+        momentum (float): a constant for the momentum value
+        decay (float): the coefficent for L2 regularizer, which is mutually
+            exclusive with 'regularizer'.
+        lr_gen (function): a function returns the learning rate given
+            the current training step/epoch. It is mutually exclusive with lr.
+            If both are not set, the apply_with_lr function should be used for
+            param updating.
+        regularizer: an instance of Regularizer or RegularizerConf; If set,
+            regularization would be applied in apply_with_lr().
+            Users can also do regularization outside.
+        constraint: an instance of Constraint or ConstraintConf; If set,
+            constraint would be applied inside apply_with_lr(). Users can
+            also do regularization outside.
+    '''
     def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
-                 momentum_gen=None, regularizer=None, constraint=None):
-        """Constructor.
-
-        Args:
-            lr: a constant or a function that generates learning rate given a
-                step, which is mutually exclusive with 'lr_gen'.
-            momentum: a constant or a function that generates the momentum value
-                given a step.
-            decay (float): the coefficent for L2 regularizer, which is mutually
-                exclusive with 'regularizer'.
-            lr_gen (function): a function returns the learning rate given
-                the current training step. It is mutually exclusive with lr. If
-                both are not set, the apply_with_lr function should be used for
-                param updating.
-            momentum_gen (function): a function returns the momentum value given
-                the current training step. It is mutually exclusive with
-                momentum.
-            regularizer: an instance of Regularizer or RegularizerConf; If set,
-                regularization would be applied in apply_with_lr().
-                Users can also do regularization outside.
-            constraint: an instance of Constraint or ConstraintConf; If set,
-                constraint would be applied inside apply_with_lr(). Users can
-                also do regularization outside.
-        """
+                 regularizer=None, constraint=None):
         if lr is not None:
             assert lr_gen is None, 'Cannot set lr and lr_gen at the same time'
 
-            def lr_gen(step):
+            def lr_gen(epoch):
                 return lr
         self.lr_gen = lr_gen
-        if momentum is not None:
-            assert momentum_gen is None, 'Cannot set momentum and momentum_gen'\
-                ' at the same time'
-
-            def momentum_gen(step):
-                return momentum
-        self.momentum_gen = momentum_gen
+        self.momentum = momentum
         if decay is not None:
             assert regularizer is None, \
                 'Cannot set decay and regularizer at the same time'
@@ -94,14 +100,15 @@ class Optimizer(object):
         self.learning_rate_multiplier = {}
 
     def register(self, name, specs):
-        """Register the param specs, including creating regularizer and
+        '''Register the param specs, including creating regularizer and
         constraint per param object. Param specific regularizer and constraint
         have higher priority than the global ones.
 
         Args:
             name (str): parameter name
-            specs (ParamSpec): protobuf obj
-        """
+            specs (ParamSpec): protobuf obj, including regularizer and
+                constraint, multipliers for learning rate and weight decay.
+        '''
         if specs.HasField('regularizer'):
             self.regularizers[name] = CppRegularizer(specs.constraint)
         if specs.HasField('constraint'):
@@ -111,8 +118,8 @@ class Optimizer(object):
         if specs.decay_mult != 1:
             self.decay_multiplier[name] = specs.decay_mult
 
-    def apply_regularizer_constraint(self, value, grad, name=None, step=None):
-        """Apply regularization and constraint if available.
+    def apply_regularizer_constraint(self, value, grad, name=None, epoch=None):
+        '''Apply regularization and constraint if available.
 
         If there are both global regularizer (constraint) and param specific
         regularizer (constraint), it would use the param specific one.
@@ -121,46 +128,48 @@ class Optimizer(object):
             value (Tensor): parameter value Tensor
             grad (Tensor): parameter gradient Tensor
             name (string): to get parameter specific regularizer or constraint
-            step (int): some regularizer or constraint would use step
+            epoch (int): some regularizer or constraint would use epoch
 
-        Return:
+        Returns:
             the updated gradient Tensor
-        """
+        '''
         if name is not None and name in self.constraints:
-            self.constraints[name].apply(value, grad, step)
+            self.constraints[name].apply(value, grad, epoch)
         elif self.constraint is not None:
-            self.constraint.apply(step, value, grad)
+            self.constraint.apply(epoch, value, grad)
 
         if name is not None and name in self.regularizers:
-            self.regularizers[name].apply(value, grad, step)
+            self.regularizers[name].apply(value, grad, epoch)
         elif self.regularizer is not None:
-            self.regularizer.apply(step, value, grad)
+            self.regularizer.apply(epoch, value, grad)
         return grad
 
-    def apply_with_lr(self, step, lr, grad, value, name=None):
-        """Do update with given learning rate.
+    def apply_with_lr(self, epoch, lr, grad, value, name=None):
+        '''Do update with given learning rate.
 
         The subclass optimizer must override this function.
+
         Args:
-            step (int): training step (could be iteration or epoch)
+            epoch (int): training epoch (could be iteration or epoch)
             lr (float): learning rate
             grad (Tensor): parameter gradient
             value (Tesnor): parameter value
             name (string): paramter name to retrieval parameter specific
                 updating rules (including regularizer and constraint)
 
-        Return:
+        Returns:
             updated parameter value
-        """
+        '''
         assert False, 'This is the base function, pls call the subclass func'
         return value
 
-    def apply(self, step, grad, value, name=None):
-        """Do update assume the learning rate generator is set.
+    def apply(self, epoch, grad, value, name=None):
+        '''Do update assuming the learning rate generator is set.
 
         The subclass optimizer does not need to override this function.
+
         Args:
-            step (int): training step (could be iteration or epoch)
+            epoch (int): training epoch (could be iteration or epoch)
             grad (Tensor): parameter gradient
             value (Tesnor): parameter value
             name (string): paramter name to retrieval parameter specific
@@ -168,97 +177,109 @@ class Optimizer(object):
 
         Return:
             updated parameter value
-        """
-
+        '''
         assert self.lr_gen is not None, 'Learning rate generator is not set.'\
             'Either set the lr_gen in constructor or call apply_with_lr'
-        lr = self.lr_gen(step)
-        return self.apply_with_lr(step, lr, grad, value, name)
+        lr = self.lr_gen(epoch)
+        return self.apply_with_lr(epoch, lr, grad, value, name)
 
 
 class SGD(Optimizer):
+    '''The vallina Stochasitc Gradient Descent algorithm with momentum.
 
-    def __init__(self, lr=None, momentum=None, decay=None, **kwargs):
-        """The vallina Stochasitc Gradient Descent algorithm.
+    See the base Optimizer for all arguments.
+    '''
 
-        See the base Optimizer for all arguments.
-        """
-        super(SGD, self).__init__(lr, momentum, decay)
+    def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(SGD, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+                                  constraint)
         conf = model_pb2.OptimizerConf()
+        conf.momentum = self.momentum
+        conf.type = 'sgd'
         self.opt = singa.CreateOptimizer('SGD')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class Nesterov(Optimizer):
+    '''The SGD with Nesterov momentum.
 
-    def __init__(self, lr=None, momentum=0.9, decay=None, **kwargs):
-        """The SGD with Nesterov momentum
+    See the base Optimizer for all arguments.
+    '''
 
-        See the base Optimizer for all arguments.
-        """
-        super(Nesterov, self).__init__(lr, momentum, decay, kwargs)
+    def __init__(self, lr=None, momentum=0.9, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(Nesterov, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+                                       constraint)
         conf = model_pb2.OptimizerConf()
+        conf.momentum = momentum
+        conf.type = 'nesterov'
         self.opt = singa.CreateOptimizer('Nesterov')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class AdaGrad(Optimizer):
+    '''AdaGrad optimizer.
 
-    def __init__(self, epsilon=1e-8, lr=None, decay=None, **kwargs):
-        """AdaGrad optimizer.
+    See the base Optimizer for all constructor args.
 
-        See the base Optimizer for all constructor args.
-        Args:
-            epsilon (float): small number for preventing numeric error.
-        """
-        super(RMSProp, self).__init__(lr, decay, **kwargs)
+    Args:
+        epsilon (float): small number for preventing numeric error.
+    '''
+    def __init__(self, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+                                      constraint)
         conf = model_pb2.OptimizerConf()
         conf.delta = epsilon
+        conf.type = 'adagrad'
         self.opt = singa.CreateOptimizer('AdaGrad')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        grad = self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class RMSProp(Optimizer):
+    '''RMSProp optimizer.
 
-    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, **kwargs):
-        """RMSProp optimizer.
+    See the base Optimizer for all constructor args.
 
-        See the base Optimizer for all constructor args.
-        Args:
-            rho (float): float within [0, 1]
-            epsilon (float): small value for preventing numeric error
-        """
-        super(RMSProp, self).__init__(lr, decay, kwargs)
+    Args:
+        rho (float): float within [0, 1]
+        epsilon (float): small value for preventing numeric error
+    '''
+
+    def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+                 regularizer=None, constraint=None):
+        super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+                                      constraint)
         conf = model_pb2.OptimizerConf()
         conf.rho = rho
         conf.delta = epsilon
         self.opt = singa.CreateOptimizer('RMSProp')
         self.opt.Setup(conf.SerializeToString())
 
-    def apply_with_lr(self, step, lr, grad, value, name):
-        grad = self.apply_regularizer_constraint(step, value, grad, name)
-        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
+    def apply_with_lr(self, epoch, lr, grad, value, name):
+        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
         return value
 
 
 class Regularizer(object):
-    """Base Python regularizer for parameter gradients.
-    """
+    '''Base Python regularizer for parameter gradients.'''
 
     def apply(self, value, grad):
         assert False, 'Not Implemented. Call the subclass function.'
@@ -266,34 +287,32 @@ class Regularizer(object):
 
 
 class CppRegularizer(Regularizer):
-    """Wrapper for regularizer implemented using C++.
-    """
+    '''Wrapper for regularizer implemented using C++.
 
-    def __init__(self, conf):
-        """Constructor.
+    Args:
+        conf (RegularizerConf): protobuf message for the configuration.
+    '''
 
-        Args:
-            conf (RegularizerConf): protobuf message for the configuration.
-        """
+    def __init__(self, conf):
         self.reg = singa.CreateRegularizer(conf.type)
         self.reg.Setup(conf.SerializeToString())
 
-    def apply(self, step, value, grad):
-        self.reg.Apply(step, value.singa_tensor, grad.singa_tensor)
+    def apply(self, epoch, value, grad):
+        self.reg.Apply(epoch, value.singa_tensor, grad.singa_tensor)
         return grad
 
 
 class L2Regularizer(Regularizer):
-    """L2 regularization"""
+    '''L2 regularization
+
+    Args:
+        coefficient (float): regularization coefficient.
+    '''
 
     def __init__(self, coefficient):
-        """
-        Args:
-            coefficient (float): regularization coefficient.
-        """
         self.coefficient = coefficient
 
-    def apply(self, step, value, grad, coefficient=None):
+    def apply(self, epoch, value, grad, coefficient=None):
         if coefficient is None:
             assert self.coefficient is not None, 'Must set the coefficient'
             coefficient = self.coefficient
@@ -302,39 +321,34 @@ class L2Regularizer(Regularizer):
 
 
 class Constraint(object):
-    """Base Python constraint class for paramter gradients.
-    """
+    '''Base Python constraint class for paramter gradients'''
 
-    def apply(self, step, value, grad):
+    def apply(self, epoch, value, grad):
         return grad
 
 
 class CppConstraint(Constraint):
-    """Wrapper for constraints implemented using C++.
-    """
+    '''Wrapper for constraints implemented using C++.
 
+    Args:
+        conf (ConstraintConf): protobuf message for the configuration.
+    '''
     def __init__(self, conf):
-        """Constructor.
-
-        Args:
-            conf (ConstraintConf): protobuf message for the configuration.
-        """
         self.constraint = singa.CreateConstraint(conf.type)
         self.constraint.Setup(conf.SerializeToString())
 
-    def apply(self, step, value, grad):
-        self.constraint.Apply(step, value.singa_tensor, grad.singa_tensor)
+    def apply(self, epoch, value, grad):
+        self.constraint.Apply(epoch, value.singa_tensor, grad.singa_tensor)
         return grad
 
 
 class L2Constraint(Constraint):
-    """Rescale the gradient to make the L2 norm <= a given threshold.
-    """
+    '''Rescale the gradient to make the L2 norm <= a given threshold'''
 
     def __init__(self, threshold=None):
         self.threshold = threshold
 
-    def apply(self, step, value, grad, threshold=None):
+    def apply(self, epoch, value, grad, threshold=None):
         if threshold is None:
             assert self.threshold is not None, 'Must set the threshold'
             threshold = self.threshold


[11/22] incubator-singa git commit: Merge commits from wenfeng for the installation page.

Posted by wa...@apache.org.
Merge commits from wenfeng for the installation page.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/2c049d68
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/2c049d68
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/2c049d68

Branch: refs/heads/dev
Commit: 2c049d683f1bd4f590e87d9dd2f3fe0d2a1fb591
Parents: 30731ee 410f238
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 19:37:46 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 19:37:46 2016 +0800

----------------------------------------------------------------------
 doc/docs/installation.md     | 297 +++++++++++++++++++++++++++++---------
 src/python/setup.py.in       |   5 +-
 src/python/singa/__init__.py | 240 ------------------------------
 src/python/singa/command.py  | 240 ++++++++++++++++++++++++++++++
 4 files changed, 468 insertions(+), 314 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/2c049d68/doc/docs/installation.md
----------------------------------------------------------------------
diff --cc doc/docs/installation.md
index 4cf4ea7,6bfdee3..5d3c8a2
--- a/doc/docs/installation.md
+++ b/doc/docs/installation.md
@@@ -1,71 -1,138 +1,226 @@@
--# Building SINGA from source
--
--## Dependencies
--
--### Required
- * Google Protobuf (>=2.5)
 -* Google Protobuf (>=2.5,<3)
--* BLAS (tested with OpenBLAS >=0.2.10)
--* CUDA (tested with 6.5, 7.0 and 7.5)
--* CUDNN (v4 and v5)
--* cmake (>=2.6)
--
--Users must install the above mandatory libraries.
--Currently CUDA and CUDNN are also mandatory, but it would become optional later.
--
--### Optional
--* Glog
--* OpenCV (tested with 2.4.8)
--* LMDB (tested with 0.9)
--
--
--## Instructions
--
--Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
--
--
--    $ git clone https://github.com/apache/incubator-singa.git
--    $ cd incubator-singa/
--    # switch to dev branch
--    $ git checkout dev
--
--
--If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
--which could be downloaded as
--
--    $ git submodule init
--    $ git submodule update
- 
--
- ### Linux & MacOS 
--
- GCC (>=4.8.1) is required to compile SINGA on Linux.
- You can use gcc compiler to do the installation on MacOS following the
- steps in Linux installation. 
- In addition, you can also install singa via clang compiler following the
- commands in this section.
 -### Linux OS
--
 -GCC (>=4.8.1) is required to compile SINGA on Linux OS.
--In SINGA_ROOT, execute the following commands for compiling SINGA,
--
--    $ mkdir build && cd build
 -    # generate Makefile for compilation
--    $ cmake ..
 -    # compile SINGA
--    $ make
--
- Note that if you are using CUDNN and it is not installed under system default
- folder, you need to let cmake know the paths to CUDNN,
 -Note that if you are using CUDNN, you need to let cmake know the paths to CUDNN,
--
--    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
--    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
--
- You can use `ccmake ..` to configure the compilation options including using
- LMDB, GLOG, etc. In addition, you can set the proper search paths for the
- dependent libraries.
 -You can use `cmake ..` to configure the compilation options including using
 -LMDB, GLOG, etc.
--
--After compiling SINGA, you can run the unit tests by
--
--    $ ./bin/test_singa
--
--You can see all the testing cases with testing results. If SINGA passes all
--tests, then you have successfully installed SINGA. Please proceed to try the examples!
 -
 -
 -### MacOS
--
 -Currently only Linux OS is officially support.
--
--### Windows
 -
 -Currently only Linux OS is officially support.
 -
 -
 -# Install SINGA Python Module
 -
 -SINGA provide a python binding for python programers. Users can either install from source or 
 -from pre-built wheel file.
 -
 -## Install from source
 -
 -### Required
 -* python(==2.7)   
 -* pip(>=1.5)
 -* SWIG(>=3.0)   
 -* numpy(>=1.11.0)   
 -* Google protobuf(>=2.5,<3)   
 -
 -
 -### Configuration
 -To build SINGA python package, users should turn on python building switch in cmake config file: "CMakeList.txt"
 -
 -    OPTION(USE_PYTHON "Generate py wrappers" ON)
 -
 -### Instructions
 -Follow the instructions in the above sections to build SINGA from source,
 -
 -After that, execute the following commands:
 -
 -    # under the build directory
 -    $ cd python
 -    $ sudo pip install . 
 -
 -Then singa package should be installed in the corresponding python library. 
 -
 -## Pip Install from wheel 
 -
 -Install pip if it is not already installed:
 -
 -    $ sudo apt-get install python-pip python-dev
 -
 -Then, select the correct binary to install:
 -
 -    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.5
 -    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5/singa-1.0.0-cp27-none-linux_x86_64.whl
 -
 -    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.6
 -    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6/singa-1.0.0-cp27-none-linux_x86_64.whl
 -
 -    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.5, CUDA toolkit 7.5 and CuDNN v5
 -    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
 -   
 -    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.6, CUDA toolkit 7.5 and CuDNN v5
 -    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
 -   
 -Install SINGA:
 -
 -    $ sudo pip install --upgrade $SINGA_WHEEL_URL
 -
 -### build wheel file from source
 -
 -Users can build wheel file from source. After build SINGA, execute the following commands:
 -
 -    # under the build directory
 -    $ cd python
 -    $ python setup.py bdist_wheel
 -
 -Then users may get built wheel file under "dist" directory
++# Installation
++
++## Dependencies
++
++### Required
++* google protobuf (>=2.5,<3)
++* blas (tested with openblas >=0.2.10)
++* cmake (>=2.6)
++
++
++### Optional
++* glog
++* opencv (tested with 2.4.8)
++* lmdb (tested with 0.9)
++* cuda (tested with 6.5, 7.0 and 7.5)
++* cudnn (v4 and v5)
++
++PySINGA has additional dependencies
++
++* python(==2.7)
++* pip(>=1.5)
++* swig(>=3.0)
++* numpy(>=1.11.0)
++* openblas (>=0.2.10)
++
++Users are encouraged to install the cuda and [cudnn](https://developer.nvidia.com/cudnn) for running SINGA on GPUs to
++get better performance.
++Most of the dependent libraries could be installed via package mangers like
++apt-get or homebrew.
++
++    # for ubuntu users, tested on 14.04
++    sudo apt-get install libprotobuf-dev libopencv-dev protobuf-compiler libgoogle-glog-dev liblmdb-dev, python2.7-dev, python-pip, python-numpy
++
++    # for Mac OS users
++    brew install -vd glog lmdb
++    brew tap homebrew/science
++    brew install opencv
++    brew install openblas
++    brew tap homebrew/python
++    brew install python
++    brew install numpy  --with-openblas
++
++
++## Install PySINGA
++
++### From wheel
++
++After installing the dependencies for SINGA and PySINGA, please download the correct binary:
++
++    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, CPU only, Python 2.7, Protobuf 2.6
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.5, CUDA toolkit 7.5 and CuDNN v5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.5-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++    # Ubuntu/Linux 64-bit, GPU enabled, Python 2.7, Protobuf 2.6, CUDA toolkit 7.5 and CuDNN v5
++    $ export SINGA_WHEEL_URL=http://comp.nus.edu.sg/~dbsystem/singa/assets/file/pb2.6-cuda7.5-cudnn5/singa-1.0.0-cp27-none-linux_x86_64.whl
++
++Then, run the following command
++
++    $ sudo pip install --upgrade $SINGA_WHEEL_URL
++
++If you do not have sudo right, you can run `pip install` in a python virtual environment.
++
++
++### From source
++
++Please compile SINGA from source (see the next section) with the 'USE_PYTHON' option on,
++and then run the following commands,
++
++    # under the build directory
++    $ cd python
++    $ sudo pip install .
++
++If you are using a virtual environment, you can ignore the `sudo` keyword.
++
++Developers can build the wheel file via
++
++    # under the build directory
++    $ cd python
++    $ python setup.py bdist_wheel
++
++
++The generated wheel file is under "dist" directory
++
++
++## Build SINGA from source
++
++Please clone the newest code from [Github](https://github.com/apache/incubator-singa) and execute the following commands,
++
++    $ git clone https://github.com/apache/incubator-singa.git
++    $ cd incubator-singa/
++
++If you use CUDA, then [CNMeM](https://github.com/NVIDIA/cnmem) is necessary,
++which could be downloaded as
++
++    $ git submodule init
++    $ git submodule update
++
++
++### Linux & MacOS
++
++GCC (>=4.8.1) is required to compile SINGA on Linux.
++For Mac OS users, you can use either GCC or Clang.
++
++In SINGA_ROOT, execute the following commands for compiling SINGA,
++
++    $ mkdir build && cd build
++    $ cmake ..
++    $ make
++    $ make install
++
++Note that if you are using CUDNN and it is not installed under system default
++folder, you need to let cmake know the paths to CUDNN,
++
++    $ export CMAKE_INCLUDE_PATH=<path to cudnn>/include:$CMAKE_INCLUDE_PATH
++    $ export CMAKE_LIBRARY_PATH=<path to cudnn>/lib64:$CMAKE_LIBRARY_PATH
++
++You can use `ccmake ..` to configure the compilation options including using
++generating python binding and changing the installation folder.
++If the dependent libraries are not in the system default paths, you need to export
++the following environment variables
++
++    export CMAKE_INCLUDE_PATH=<path to your header file folder>
++    export CMAKE_LIBRARY_PATH=<path to your lib file folder>
++
++After compiling SINGA, you can run the unit tests by
++
++    $ ./bin/test_singa
++
++You can see all the testing cases with testing results. If SINGA passes all
++tests, then you have successfully installed SINGA. Please proceed to try the examples!
++
++
++### Windows
++To be added.
++
++
++## FAQ
++
++* Q: Error from running `cmake ..`, which cannot find the dependent libraries.
++
++    A: If you haven't installed the libraries, please install them. If you installed
++    the libraries in a folder that is outside of the system folder, e.g. /usr/local,
++    please export the following variables
++
++        export CMAKE_INCLUDE_PATH=<path to your header file folder>
++        export CMAKE_LIBRARY_PATH=<path to your lib file folder>
++
++
++* Q: Error from `make`, e.g. the linking phase
++
++    A: If your libraries are in other folders than system default paths, you need
++    to export the following varaibles
++
++    $ export LIBRARY_PATH=<path to your lib file folder>
++    $ export LD_LIBRARY_PATH=<path to your lib file folder>
++
++
++* Q: Error from header files, e.g. 'cblas.h no such file or directory exists'
++
++    A: You need to include the folder of the cblas.h into CPLUS_INCLUDE_PATH,
++    e.g.,
++
++        $ export CPLUS_INCLUDE_PATH=/opt/OpenBLAS/include:$CPLUS_INCLUDE_PATH
++
++* Q:While compiling SINGA, I get error `SSE2 instruction set not enabled`
++
++    A:You can try following command:
++
++        $ make CFLAGS='-msse2' CXXFLAGS='-msse2'
++
++* Q:I get `ImportError: cannot import name enum_type_wrapper` from google.protobuf.internal when I try to import .py files.
++
++    A: You need to install the python binding of protobuf, which could be installed via
++
++        $ sudo apt-get install protobuf
++
++    or from source
++
++        $ cd /PROTOBUF/SOURCE/FOLDER
++        $ cd python
++        $ python setup.py build
++        $ python setup.py install
++
++* Q: When I build OpenBLAS from source, I am told that I need a Fortran compiler.
++
++    A: You can compile OpenBLAS by
++
++        $ make ONLY_CBLAS=1
++
++    or install it using
++
++        $ sudo apt-get install libopenblas-dev
++
++* Q: When I build protocol buffer, it reports that GLIBC++_3.4.20 not found in /usr/lib64/libstdc++.so.6.
++
++    A9: This means the linker found libstdc++.so.6 but that library
++    belongs to an older version of GCC than was used to compile and link the
++    program. The program depends on code defined in
++    the newer libstdc++ that belongs to the newer version of GCC, so the linker
++    must be told how to find the newer libstdc++ shared library.
++    The simplest way to fix this is to find the correct libstdc++ and export it to
++    LD_LIBRARY_PATH. For example, if GLIBC++_3.4.20 is listed in the output of the
++    following command,
++
++        $ strings /usr/local/lib64/libstdc++.so.6|grep GLIBC++
++
++    then you just set your environment variable as
++
++        $ export LD_LIBRARY_PATH=/usr/local/lib64:$LD_LIBRARY_PATH
++
++* Q: When I build glog, it reports that "src/logging_unittest.cc:83:20: error: \u2018gflags\u2019 is not a namespace-name"
++
++    A: It maybe that you have installed gflags with a different namespace such as "google". so glog can't find 'gflags' namespace.
++    Because it is not necessary to have gflags to build glog. So you can change the configure.ac file to ignore gflags.
++
++        1. cd to glog src directory
++        2. change line 125 of configure.ac  to "AC_CHECK_LIB(gflags, main, ac_cv_have_libgflags=0, ac_cv_have_libgflags=0)"
++        3. autoreconf
++
++    After this, you can build glog again.
++


[15/22] incubator-singa git commit: Merge commits for debugging the gradient average error and commits for documentation.

Posted by wa...@apache.org.
Merge commits for debugging the gradient average error and commits for documentation.

Conflicts:
	examples/cifar10/alexnet.py
	src/python/singa/layer.py
	src/python/singa/optimizer.py


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5d20d353
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5d20d353
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5d20d353

Branch: refs/heads/dev
Commit: 5d20d353bd09f2bd758f27f5e1851af7ae8d4123
Parents: 5db7eb6 6d4539e
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Aug 15 20:31:21 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Aug 15 20:31:21 2016 +0800

----------------------------------------------------------------------
 examples/cifar10/alexnet.cc   | 11 +++-----
 examples/cifar10/alexnet.py   | 53 +++++++-------------------------------
 examples/cifar10/train.py     | 19 +++++++-------
 src/model/feed_forward_net.cc |  6 ++---
 src/model/optimizer/sgd.cc    |  4 +--
 src/python/singa/layer.py     | 30 +++++++++++++++++----
 src/python/singa/net.py       |  8 +++++-
 src/python/singa/optimizer.py | 29 +++++++++++----------
 src/python/singa/tensor.py    |  8 +++---
 9 files changed, 80 insertions(+), 88 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5d20d353/examples/cifar10/alexnet.cc
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5d20d353/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --cc examples/cifar10/alexnet.py
index 17b6a89,dae129f..02437b3
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@@ -35,54 -36,20 +35,21 @@@ def create_net(use_cpu=False)
      W0_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.0001}
      W1_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01}
      W2_specs = {'init': 'gaussian', 'mean': 0, 'std': 0.01, 'decay_mult': 250}
-     b_specs = {'init': 'constant', 'value': 0, 'lt_mult': 2}
-     net.add(
-         layer.Conv2D(
-             'conv1',
-             32,
-             5,
-             1,
-             W_specs=W0_specs.copy(),
-             b_specs=b_specs.copy(),
-             pad=2,
-             input_sample_shape=(
-                 3,
-                 32,
-                 32,
-             )))
++
+     b_specs = {'init': 'constant', 'value': 0, 'lr_mult': 2, 'decay_mult': 0}
+     net.add(layer.Conv2D('conv1', 32, 5, 1, W_specs=W0_specs.copy(), b_specs=b_specs.copy(), pad=2, input_sample_shape=(3,32,32,)))
      net.add(layer.MaxPooling2D('pool1', 3, 2, pad=1))
      net.add(layer.Activation('relu1'))
-     net.add(layer.LRN(name='lrn1'))
-     net.add(
-         layer.Conv2D(
-             'conv2',
-             32,
-             5,
-             1,
-             W_specs=W1_specs.copy(),
-             b_specs=b_specs.copy(),
-          pad=2))
+     net.add(layer.LRN(name='lrn1', size=3, alpha=5e-5))
+     net.add(layer.Conv2D('conv2', 32, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
      net.add(layer.Activation('relu2'))
-     net.add(layer.MaxPooling2D('pool2', 3, 2,  pad=1))
-     net.add(layer.LRN('lrn2'))
-     net.add(
-         layer.Conv2D(
-             'conv3',
-             64,
-             5,
-             1,
-             W_specs=W1_specs.copy(),
-             b_specs=b_specs.copy(),
-          pad=2))
+     net.add(layer.AvgPooling2D('pool2', 3, 2,  pad=1))
+     net.add(layer.LRN('lrn2', size=3, alpha=5e-5))
+     net.add(layer.Conv2D('conv3', 64, 5, 1, W_specs=W1_specs.copy(), b_specs=b_specs.copy(), pad=2))
      net.add(layer.Activation('relu3'))
-     net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
+     net.add(layer.AvgPooling2D('pool3', 3, 2, pad=1))
      net.add(layer.Flatten('flat'))
-     net.add(
-         layer.Dense(
-             'dense',
-             10,
-             W_specs=W2_specs.copy(),
-          b_specs=b_specs.copy()))
 -    net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
++    net.add(layer.Dense( 'dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
      for (p, specs) in zip(net.param_values(), net.param_specs()):
          filler = specs.filler
          if filler.type == 'gaussian':

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5d20d353/src/python/singa/layer.py
----------------------------------------------------------------------
diff --cc src/python/singa/layer.py
index b0fdb5e,1e9caeb..a9f3826
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@@ -473,28 -388,29 +473,47 @@@ class LRN(Layer)
  
  
  class Dense(Layer):
 +    """Apply linear/affine transformation, also called inner-product or
 +    fully connected layer.
  
 +    Args:
 +        num_output (int): output feature length.
 +        use_bias (bool): add a bias vector or not to the transformed feature
 +        W_specs (dict): specs for the weight matrix
 +            'name' for parameter name
 +            'lr_mult' for learning rate multiplier
 +            'decay_mult' for weight decay multiplier
 +            'init' for init method, which could be 'gaussian', 'uniform',
 +            'xavier' and ''
 +            'std', 'mean', 'high', 'low' for corresponding init methods
 +            'clamp' for gradient constraint, value is scalar
 +            'regularizer' for regularization, currently support 'l2'
 +        b_specs (dict): specs for the bias vector, same fields as W_specs.
 +        W_transpose (bool): if true, output=x*W.T+b;
 +        input_sample_shape (tuple): input feature length
 +    """
      def __init__(self, name, num_output, use_bias=True,
                   W_specs=None, b_specs=None,
-                  W_transpose=True, input_sample_shape=None):
+                  W_transpose=False, input_sample_shape=None):
+         """Apply linear/affine transformation, also called inner-product or
+         fully connected layer.
+ 
+         Args:
+             num_output (int): output feature length.
+             use_bias (bool): add a bias vector or not to the transformed feature
+             W_specs (dict): specs for the weight matrix
+                 'name' for parameter name
+                 'lr_mult' for learning rate multiplier
+                 'decay_mult' for weight decay multiplier
+                 'init' for init method, which could be 'gaussian', 'uniform',
+                 'xavier' and ''
+                 'std', 'mean', 'high', 'low' for corresponding init methods
+                 'clamp' for gradient constraint, value is scalar
+                 'regularizer' for regularization, currently support 'l2'
+             b_specs (dict): specs for the bias vector, same fields as W_specs.
+             W_transpose (bool): if true, output=x*W.T+b;
+             input_sample_shape (tuple): input feature length
+         """
          super(Dense, self).__init__(name)
          conf = self.conf.dense_conf
          conf.num_output = num_output
@@@ -508,15 -424,12 +527,15 @@@
              W_specs['name'] = name + '_weight'
          if 'name' not in b_specs:
              b_specs['name'] = name + '_bias'
-         self.conf.param.extend([_construct_param_specs_from_dict(W_specs)])
-         self.param_specs.append(_construct_param_specs_from_dict(W_specs))
-         self.conf.param.extend([_construct_param_specs_from_dict(b_specs)])
-         self.param_specs.append(_construct_param_specs_from_dict(b_specs))
+         wspecs = _construct_param_specs_from_dict(W_specs)
+         bspecs = _construct_param_specs_from_dict(b_specs)
+         self.conf.param.extend([wspecs, bspecs])
+         self.param_specs.extend([wspecs, bspecs])
          # dense layer is transparent to engine.
 -        self.layer = _create_layer('singa', 'Dense')
 +        if engine == 'cudnn':
 +            self.layer = _create_layer('singacuda', 'Dense')
 +        else:
 +            self.layer = _create_layer(engine, 'Dense')
          if input_sample_shape is not None:
              self.setup(input_sample_shape)
  

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5d20d353/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --cc src/python/singa/optimizer.py
index 14cf3c0,32f03d4..74e6ade
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@@ -107,23 -100,24 +107,25 @@@ class Optimizer(object)
  
          Args:
              name (str): parameter name
 -            specs (ParamSpec): protobuf obj
 -        """
 -        assert type(specs) == model_pb2.ParamSpec, \
 +            specs (ParamSpec): protobuf obj, including regularizer and
 +                constraint, multipliers for learning rate and weight decay.
- 
 +        '''
 +        assert isinstance(specs, model_pb2.ParamSpec), \
              'specs should be model_pb2.ParamSpec instance'
          if specs.HasField('regularizer'):
              self.regularizers[name] = CppRegularizer(specs.regularizer)
+         elif specs.decay_mult != 1:
+             self.regularizers[name] = L2Regularizer(
+                 specs.decay_mult * self.regularizer.coefficient)
+ 
          if specs.HasField('constraint'):
              self.constraints[name] = CppConstraint(specs.constraint)
+ 
          if specs.lr_mult != 1:
              self.learning_rate_multiplier[name] = specs.lr_mult
-         if specs.decay_mult != 1:
-             self.decay_multiplier[name] = specs.decay_mult
  
 -    def apply_regularizer_constraint(self, value, grad, name=None, step=None):
 -        """Apply regularization and constraint if available.
 +    def apply_regularizer_constraint(self, epoch, value, grad, name=None):
 +        '''Apply regularization and constraint if available.
  
          If there are both global regularizer (constraint) and param specific
          regularizer (constraint), it would use the param specific one.
@@@ -189,32 -184,24 +191,27 @@@
  
  
  class SGD(Optimizer):
 +    '''The vallina Stochasitc Gradient Descent algorithm with momentum.
  
 -    def __init__(self, lr=None, momentum=None, decay=None):
 -        """The vallina Stochasitc Gradient Descent algorithm.
 +    See the base Optimizer for all arguments.
 +    '''
  
 -        See the base Optimizer for all arguments.
 -        """
 -        super(SGD, self).__init__(lr, momentum, decay)
 +    def __init__(self, lr=None, momentum=None, weight_decay=None, lr_gen=None,
 +                 regularizer=None, constraint=None):
-         super(
-             SGD,
-             self).__init__(
-             lr,
-             momentum,
-             weight_decay,
-             lr_gen,
-             regularizer,
-          constraint)
++        super(SGD, self).__init__(lr, momentum, weight_decay, lr_gen,
++                                  regularizer, constraint)
          conf = model_pb2.OptimizerConf()
 -        if momentum is not None:
 -            conf.momentum = momentum
 +        if self.momentum is not None:
 +            conf.momentum = self.momentum
 +        conf.type = 'sgd'
          self.opt = singa.CreateOptimizer('SGD')
          self.opt.Setup(conf.SerializeToString())
  
 -    def apply_with_lr(self, step, lr, grad, value, name):
 -        self.apply_regularizer_constraint(value, grad, name, step)
 +    def apply_with_lr(self, epoch, lr, grad, value, name):
 +        self.apply_regularizer_constraint(epoch, value, grad, name)
+         if name is not None and name in self.learning_rate_multiplier:
+             lr = lr * self.learning_rate_multiplier[name]
 -        self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
 +        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
          return value
  
  
@@@ -260,9 -240,11 +257,11 @@@ class AdaGrad(Optimizer)
          self.opt = singa.CreateOptimizer('AdaGrad')
          self.opt.Setup(conf.SerializeToString())
  
 -    def apply_with_lr(self, step, lr, grad, value, name):
 -        grad = self.apply_regularizer_constraint(step, value, grad, name)
 +    def apply_with_lr(self, epoch, lr, grad, value, name):
 +        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+         if name is not None and name in self.learning_rate_multiplier:
+             lr = lr * self.learning_rate_multiplier[name]
 -        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
 +        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
          return value
  
  
@@@ -286,9 -265,11 +285,11 @@@ class RMSProp(Optimizer)
          self.opt = singa.CreateOptimizer('RMSProp')
          self.opt.Setup(conf.SerializeToString())
  
 -    def apply_with_lr(self, step, lr, grad, value, name):
 -        grad = self.apply_regularizer_constraint(step, value, grad, name)
 +    def apply_with_lr(self, epoch, lr, grad, value, name):
 +        grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+         if name is not None and name in self.learning_rate_multiplier:
+             lr = lr * self.learning_rate_multiplier[name]
 -        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
 +        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
          return value
  
  

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5d20d353/src/python/singa/tensor.py
----------------------------------------------------------------------