You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/08/17 18:02:34 UTC
[13/51] [abbrv] incubator-singa git commit: SINGA-237 New
documentation files for SINGA v1.0
SINGA-237 New documentation files for SINGA v1.0
Updated the comments of python files for autodoc to generate python APIs by Sphinx.
Fixed a bug in optimizer which ignored the momentum value
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8cd55300
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8cd55300
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8cd55300
Branch: refs/heads/master
Commit: 8cd55300ab30673414bbeeec7d68f1ddcd6393a2
Parents: 3299b0c
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Fri Aug 12 14:45:41 2016 +0800
Committer: wangwei <wa...@gmail.com>
Committed: Sun Aug 14 13:47:04 2016 +0800
----------------------------------------------------------------------
cmake/Dependencies.cmake | 5 +-
doc/conf.py | 14 +-
doc/docs/device.rst | 29 +-
doc/docs/index.rst | 6 +
doc/docs/initializer.rst | 12 +
doc/docs/layer.rst | 14 +
doc/docs/loss.rst | 7 +
doc/docs/metric.rst | 8 +
doc/docs/optimizer.rst | 11 +
doc/docs/tensor.md | 7 -
doc/docs/tensor.rst | 30 ++
doc/docs/utils.rst | 6 +
doc/index.rst | 28 +-
examples/index.rst | 6 +
src/python/singa/device.py | 31 ++
src/python/singa/initializer.py | 86 ++++-
src/python/singa/layer.py | 417 ++++++++++++++----------
src/python/singa/loss.py | 105 +++++-
src/python/singa/metric.py | 49 ++-
src/python/singa/optimizer.py | 284 ++++++++--------
src/python/singa/tensor.py | 608 ++++++++++++++++++++++++++++++-----
21 files changed, 1331 insertions(+), 432 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/cmake/Dependencies.cmake
----------------------------------------------------------------------
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index ceef429..e533ca8 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -54,12 +54,13 @@ IF(USE_OPENCV)
MESSAGE(STATUS "Found OpenCV_${OpenCV_VERSION} at ${OpenCV_INCLUDE_DIRS}")
INCLUDE_DIRECTORIES(SYSTEM ${OpenCV_INCLUDE_DIRS})
LIST(APPEND SINGA_LINKER_LIBS ${OpenCV_LIBRARIES})
-ENDIF()
+ENDIF()
#LIST(APPEND SINGA_LINKER_LIBS "/home/wangwei/local/lib/libopenblas.so")
#MESSAGE(STATUS "link lib : " ${SINGA_LINKER_LIBS})
IF(USE_PYTHON)
- FIND_PACKAGE(PythonLibs REQUIRED)
+ FIND_PACKAGE(PythonLibs 2.7 REQUIRED)
+ FIND_PACKAGE(PythonInterp 2.7 REQUIRED)
FIND_PACKAGE(SWIG 3.0 REQUIRED)
ENDIF()
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/conf.py
----------------------------------------------------------------------
diff --git a/doc/conf.py b/doc/conf.py
index 20ba51a..9f52d16 100755
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -19,7 +19,8 @@
import os
import sys
sys.path.insert(0, os.path.abspath('.'))
-sys.path.insert(1, '../src/python/singa/')
+sys.path.insert(1, os.path.abspath('../build/python'))
+#autodoc_mock_imports = ['singa.device', 'singa.tensor', 'singa.layer']
# -- General configuration ------------------------------------------------
from recommonmark.parser import CommonMarkParser
@@ -35,9 +36,8 @@ source_parsers = {
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
-extensions = [
-'sphinx.ext.autodoc'
-]
+extensions = ['sphinx.ext.autodoc', 'sphinx.ext.napoleon']
+napoleon_google_docstring = True
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
@@ -50,7 +50,7 @@ source_suffix = ['.rst', '.md']
# The encoding of source files.
#
-# source_encoding = 'utf-8-sig'
+source_encoding = 'utf-8-sig'
# The master toctree document.
master_doc = 'index'
@@ -150,7 +150,7 @@ html_theme = 'sphinx_rtd_theme'
# The name of an image file (relative to this directory) to place at the top
# of the sidebar.
#
-html_logo = '/singa.png'
+html_logo = 'image/singa.png'
# The name of an image file (relative to this directory) to use as a favicon of
# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
@@ -203,7 +203,7 @@ html_static_path = ['_static']
# If true, links to the reST sources are added to the pages.
#
-html_show_sourcelink = False
+# html_show_sourcelink = True
# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
#
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/device.rst
----------------------------------------------------------------------
diff --git a/doc/docs/device.rst b/doc/docs/device.rst
index aa5defb..53faf48 100644
--- a/doc/docs/device.rst
+++ b/doc/docs/device.rst
@@ -2,7 +2,10 @@ Device
=======
-The Device abstract represent a hardware device with memory and compuation units.
+The Device abstract represents any hardware device with memory and compuation units.
+All [Tensor operations](tensor.html) are scheduled by the resident device for execution.
+Tensor memory is also managed by the device's memory manager. Therefore, optimization
+of memory and execution are implemented in the Device class.
Specific devices
----------------
@@ -13,24 +16,14 @@ Currently, SINGA has three Device implmentations,
3. OpenclGPU for a GPU card which runs OpenCL code
-Create devices
----------------
-
Python API
-~~~~~~~~~~
-
-.. autofunction:: device.create_cuda_gpus
-
-.. autofunction:: device.create_cuda_gpus_on
-
-.. autofunction:: device.create_cuda_gpu_on
-
-.. autofunction:: device.get_default_device
+----------
+.. automodule:: singa.device
+ :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device
-The following code shows how to create devices,
-.. code:: python
+The following code provides examples of creating devices::
from singa import device
cuda = device.create_cuda_gpu_on(0) # use GPU card of ID 0
@@ -39,9 +32,5 @@ The following code shows how to create devices,
ary2 = device.create_cuda_gpus([0,2]) # create 2 devices on ID 0 and 2
-
CPP API
-~~~~~~~
-
-
-
+---------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/index.rst
----------------------------------------------------------------------
diff --git a/doc/docs/index.rst b/doc/docs/index.rst
index 8a74976..2294054 100644
--- a/doc/docs/index.rst
+++ b/doc/docs/index.rst
@@ -6,4 +6,10 @@ English
installation
software_stack
device
+ tensor
+ layer
+ initializer
+ loss
+ metric
+ optimizer
examples
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/initializer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/initializer.rst b/doc/docs/initializer.rst
new file mode 100644
index 0000000..a190702
--- /dev/null
+++ b/doc/docs/initializer.rst
@@ -0,0 +1,12 @@
+Initializer
+===========
+
+Python API
+----------
+
+.. automodule:: singa.initializer
+ :members:
+ :member-order: bysource
+
+CPP API
+--------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/layer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/layer.rst b/doc/docs/layer.rst
new file mode 100644
index 0000000..62ef3c3
--- /dev/null
+++ b/doc/docs/layer.rst
@@ -0,0 +1,14 @@
+Layer
+======
+
+Python API
+-----------
+.. automodule:: singa.layer
+ :members:
+ :member-order: bysource
+ :show-inheritance:
+ :undoc-members:
+
+
+CPP API
+--------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/loss.rst
----------------------------------------------------------------------
diff --git a/doc/docs/loss.rst b/doc/docs/loss.rst
new file mode 100644
index 0000000..27872dd
--- /dev/null
+++ b/doc/docs/loss.rst
@@ -0,0 +1,7 @@
+Loss
+=========
+
+
+.. automodule:: singa.loss
+ :members:
+ :show-inheritance:
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/metric.rst
----------------------------------------------------------------------
diff --git a/doc/docs/metric.rst b/doc/docs/metric.rst
new file mode 100644
index 0000000..35fa24e
--- /dev/null
+++ b/doc/docs/metric.rst
@@ -0,0 +1,8 @@
+Metric
+=========
+
+
+.. automodule:: singa.metric
+ :members:
+ :show-inheritance:
+ :member-order: bysource
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/optimizer.rst
----------------------------------------------------------------------
diff --git a/doc/docs/optimizer.rst b/doc/docs/optimizer.rst
new file mode 100644
index 0000000..486c01e
--- /dev/null
+++ b/doc/docs/optimizer.rst
@@ -0,0 +1,11 @@
+Optimizer
+=========
+
+
+.. automodule:: singa.optimizer
+ :members:
+ :member-order: bysource
+ :show-inheritance:
+ :undoc-members:
+
+
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/tensor.md
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.md b/doc/docs/tensor.md
deleted file mode 100644
index eaf8362..0000000
--- a/doc/docs/tensor.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# Tensor
-
-
-##
-
-
-##
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/tensor.rst
----------------------------------------------------------------------
diff --git a/doc/docs/tensor.rst b/doc/docs/tensor.rst
new file mode 100644
index 0000000..ff6142e
--- /dev/null
+++ b/doc/docs/tensor.rst
@@ -0,0 +1,30 @@
+Tensor
+========
+
+Each Tensor instance is a multi-dimensional array allocated on a specific
+Device instance. Tensor instances store variables and provide
+linear algebra operations over different types of hardware devices without user
+awareness. Note that users need to make sure the tensor operands are
+allocated on the same device except copy functions.
+
+
+Tensor implementation
+---------------------
+
+SINGA has three different sets of implmentations of Tensor functions, one for each
+type of Device.
+
+* 'tensor_math_cpp.h' implements operations using Cpp (with CBLAS) for CppGPU devices.
+* 'tensor_math_cuda.h' implements operations using Cuda (with cuBLAS) for CudaGPU devices.
+* 'tensor_math_opencl.h' implements operations using OpenCL for OpenclGPU devices.
+
+Python API
+----------
+
+
+.. automodule:: singa.tensor
+ :members:
+
+
+CPP API
+---------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/docs/utils.rst
----------------------------------------------------------------------
diff --git a/doc/docs/utils.rst b/doc/docs/utils.rst
new file mode 100644
index 0000000..5306719
--- /dev/null
+++ b/doc/docs/utils.rst
@@ -0,0 +1,6 @@
+Misc.
+=========
+
+
+.. automodule:: singa.utils
+ :members:
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/doc/index.rst
----------------------------------------------------------------------
diff --git a/doc/index.rst b/doc/index.rst
index ec727b1..50c65d7 100755
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -7,9 +7,9 @@ Welcome to Apache Singa
=======================
Recent News
-===========
+-----------
-* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
+* The **third release** is now available, 20 April, 2016. `Download SINGA v0.3.0 <downloads.html>`_
* The **second release** is now available, 14 Jan, 2016. `Download SINGA v0.2.0 <downloads.html>`_.
@@ -34,7 +34,7 @@ Recent News
* SINGA has been accepted by `Apache Incubator <http://incubator.apache.org/>`_, 17 March, 2015.
Getting Started
-===============
+---------------
* The `Introduction <docs/overview.html>`_ page gives an overview of SINGA.
* The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA.
@@ -42,7 +42,7 @@ Getting Started
* Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA.
Documentation
-=============
+-------------
* Documentations are listed `here <docs.html>`_.
@@ -51,8 +51,8 @@ Documentation
* Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_.
How to contribute
-=================
-
+----------------------
+
* Please subscribe to our development mailing list dev-subscribe@singa.incubator.apache.org.
* If you find any issues using SINGA, please report it to the `Issue Tracker <https://issues.apache.org/jira/browse/singa>`_.
@@ -62,17 +62,17 @@ How to contribute
More details on contributing to SINGA is described `here <develop/how-contribute.html>`_ .
Citing SINGA
-============
+------------
Please cite the following two papers if you use SINGA in your research:
* B. C. Ooi, K.-L. Tan, S. Wang, W. Wang, Q. Cai, G. Chen, J. Gao, Z. Luo, A. K. H. Tung, Y. Wang, Z. Xie, M. Zhang, and K. Zheng. `SINGA: A distributed deep learning platform <http://www.comp.nus.edu.sg/~ooibc/singaopen-mm15.pdf>`_. ACM Multimedia (Open Source Software Competition) 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-oss.txt>`_).
-* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
+* W. Wang, G. Chen, T. T. A. Dinh, B. C. Ooi, K.-L.Tan, J. Gao, and S. Wang. `SINGA: putting deep learning in the hands of multimedia users <http://www.comp.nus.edu.sg/~ooibc/singa-mm15.pdf>`_. ACM Multimedia 2015 (`BibTex <http://www.comp.nus.edu.sg/~dbsystem/singa//assets/file/bib-singa.txt>`_, `Slides <files/mm2015.ppt>`_).
.. toctree::
:hidden:
-
+
downloads
docs
@@ -85,25 +85,25 @@ Please cite the following two papers if you use SINGA in your research:
develop/how-contribute
develop/contribute-code
develop/contribute-docs
-
+
.. toctree::
:hidden:
:maxdepth: 2
:caption: Community
-
+
community/source-repository
community/mail-lists
community/issue-tracking
community/team-list
-
+
License
-=======
+----------
SINGA is released under `Apache License Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0>`_.
Disclaimers
-===========
+-----------
Apache SINGA is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator. Incubation is required of all newly accepted projects until a further review indicates that the infrastructure, communications, and decision making process have stabilized in a manner consistent with other successful ASF projects. While incubation status is not necessarily a reflection of the completeness or stability of the code, it does indicate that the project has yet to be fully endorsed by the ASF.
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/examples/index.rst
----------------------------------------------------------------------
diff --git a/examples/index.rst b/examples/index.rst
new file mode 100644
index 0000000..d6faf5d
--- /dev/null
+++ b/examples/index.rst
@@ -0,0 +1,6 @@
+.. toctree::
+
+ char-rnn/README
+ imagenet/README
+
+
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/device.py
----------------------------------------------------------------------
diff --git a/src/python/singa/device.py b/src/python/singa/device.py
index aff3587..eff6783 100644
--- a/src/python/singa/device.py
+++ b/src/python/singa/device.py
@@ -68,21 +68,52 @@ def device_query(id, verbose=False):
def create_cuda_gpus(num):
+ '''Create a list of CudaGPU devices.
+
+ Args:
+ num (int): number of device to create.
+ Returns:
+ a list of swig converted CudaGPU devices.
+ '''
+
return singa.Platform.CreateCudaGPUs(num)
def create_cuda_gpu():
+ '''Create a single CudaGPU device.
+
+ Returns:
+ a swig converted CudaGPU device.
+ '''
+
return singa.Platform.CreateCudaGPUs(1)[0]
def create_cuda_gpus_on(device_ids):
+ '''Create a list of CudaGPU devices.
+
+ Args:
+ device_ids (list): a list of GPU card IDs.
+
+ Returns:
+ a list of swig converted CudaGPU devices.
+ '''
return singa.Platform.CreateCudaGPUsOn(device_ids)
def create_cuda_gpu_on(device_id):
+ '''Create a CudaGPU device on the given device ID.
+
+ Args:
+ device_id (int): GPU card ID.
+
+ Returns:
+ a swig converted CudaGPU device.
+ '''
devices = create_cuda_gpus_on([device_id])
return devices[0]
def get_default_device():
+ '''Get the default host device which is a CppCPU device'''
return singa.Platform.GetDefaultDevice()
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/initializer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/initializer.py b/src/python/singa/initializer.py
index 15caed3..277fd2f 100644
--- a/src/python/singa/initializer.py
+++ b/src/python/singa/initializer.py
@@ -15,29 +15,113 @@
# specific language governing permissions and limitations
# under the License.
# =============================================================================
-"""Popular initialization methods for parameter values (Tensor ojects)"""
+'''Popular initialization methods for parameter values (Tensor objects).
+
+Example usages::
+
+ from singa import tensor
+ from singa import initializer
+
+ x = tensor.Tensor((3, 5))
+ initializer.xavier(x)
+'''
import math
+'''
+TODO(wangwei) update the uniform and gaussian initializers
+
+def uniform(t, fan_in=0, fan_out=0):
+ typically, for conv layer weight: fan_in = nb_filter * kh * kw,
+ fan_out = nb_channel * kh * kw
+ for dense layer weight, fan_in = input_feature_length,
+ fan_out = output_feature_length
+ # Ref: [Bengio and Glorot 2010]: Understanding the difficulty of
+ training deep feedforward neuralnetworks.
+
+ assert fan_in >0 or fan_out > 0, \
+ 'fan_in and fan_out cannot be 0 at the same time'
+ avg = 1
+ if fan_in * fan_out == 0:
+ avg = 2
+ x = math.sqrt(3.0f * avg / (fan_in + fan_out))
+ t.uniform(-x, x)
+
+
+def gaussian(t, fan_in=0, fan_out=0):
+ typically, for conv layer weight: fan_in = nb_filter * kh * kw,
+ fan_out = nb_channel * kh * kw
+ for dense layer weight, fan_in = input_feature_length,
+ fan_out = output_feature_length
+
+ Ref Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun: Delving Deep into
+ Rectifiers: Surpassing Human-Level Performance on ImageNet Classification
+
+ assert fan_in >0 or fan_out > 0, \
+ 'fan_in and fan_out cannot be 0 at the same time'
+ avg = 1
+ if fan_in * fan_out == 0:
+ avg = 2
+ std = math.sqrt(2.0f * avg / (fan_in + fan_out))
+ t.gaussian(0, std)
+'''
+
+
def uniform(t, low=0, high=1):
+ '''Initialize the parameter values following an Uniform distribution.
+
+ Args:
+ t (Tensor): the parater tensor
+ low (float): lower bound
+ high (float): higher bound
+ '''
t.uniform(low, high)
def gaussian(t, mean=0, std=0.01):
+ '''Initialize the parameter values following an Gaussian distribution.
+
+ Args:
+ t (Tensor): the parater tensor
+ mean (float): mean of the distribution
+ std (float): standard variance
+ '''
t.gaussian(mean, std)
def xavier(t):
+ '''Initialize the matrix parameter follow a Uniform distribution from
+ [-sqrt(6/(fan_in + fan_out)), sqrt(6/(fan_in + fan_out))].
+
+ Args:
+ t (Tensor): the parater tensor
+ '''
+
scale = math.sqrt(6.0 / (t.shape[0] + t.shape[1]))
t.uniform(-scale, scale)
def glorot(t):
+ '''Initialize the matrix parameter follow a Gaussian distribution with
+ mean = 0 and std = sqrt(2.0 / (nb_row + nb_col))
+
+ Args:
+ t (Tensor): the parater tensor
+ '''
scale = math.sqrt(2.0 / (t.shape[0] + t.shape[1]))
t.gaussian(0, 1)
t *= scale
def msra(t):
+ '''Initialize the matrix parameter follow a Guassian distribution with
+ mean = 0, std = math.sqrt(2.0 / nb_row).
+
+ Ref [He, Zhang, Ren and Sun 2015]: Specifically accounts for ReLU
+ nonlinearities.
+
+ Args:
+ t (Tensor): the parater tensor
+ '''
t.gaussian(0, math.sqrt(2.0 / t.shape[0]))
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index c8c8c05..0759716 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -14,7 +14,30 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
-""" Python layers which wraps the C++ layers by providing easy to construct APIs
+""" Python layers wrap the C++ layers to provide simpler construction APIs.
+
+Example usages::
+
+ from singa import layer
+ from singa import tensor
+ from singa import device
+ from singa.model_pb2 import kTrain
+
+ layer.engine = 'cudnn' # to use cudnn layers
+ dev = device.create_cuda_gpu()
+
+ # create a convolution layer
+ conv = layer.Conv2D('conv', 32, 3, 1, pad=1, input_sample_shape=(3, 32, 32))
+ conv.to_device(dev) # move the layer data onto a CudaGPU device
+ x = tensor.Tensor((3, 32, 32), dev)
+ x.uniform(-1, 1)
+ y = conv.foward(kTrain, x)
+
+ dy = tensor.Tensor()
+ dy.reset_like(y)
+ dy.set_value(0.1)
+ # dp is a list of tensors for parameter gradients
+ dx, dp = conv.backward(kTrain, dy)
"""
from sets import Set
@@ -22,23 +45,37 @@ from . import singa_wrap
from .proto import model_pb2
import tensor
-# engine could be 'cudnn', 'singa', which is used to create layers.
-# e.g., CudnnConvolution layer is identified by 'cudnn_convolution'
-# Convolution layer is identified by 'singa_convolution'
-# engine is case insensitive
+
engine = 'cudnn'
+'''engine is the prefix of layer identifier.
+
+The value could be one of [**'cudnn', 'singacpp', 'singacuda', 'singacl'**], for
+layers implemented using the cudnn library, Cpp, Cuda and OpenCL respectively.
+For example, CudnnConvolution layer is identified by 'cudnn_convolution';
+'singacpp_convolution' is for Convolution layer;
+Some layers' implementation use only Tensor functions, thererfore they are
+transparent to the underlying devices. For threse layers, they would have
+multiple identifiers, e.g., singacpp_dropout, singacuda_dropout and
+singacl_dropout are all for the Dropout layer.
+
+engine is case insensitive. Each python layer would create the correct specific
+layer using the engine attribute.
+'''
class Layer(object):
- """Base Python layer class.
+ '''Base Python layer class.
- Usages:
- 1. construct layer without input_sample_shapes, goto 2;
- construct layer with input_sample_shapes, goto 3;
+ Typically, the life cycle of a layer instance includes:
+ 1. construct layer without input_sample_shapes, goto 2;
+ construct layer with input_sample_shapes, goto 3;
2. call setup to create the parameters and setup other meta fields
3. call forward or access layer members
4. call backward and get parameters for update
- """
+
+ Args:
+ name (str): layer name
+ '''
def __init__(self, name, **kwargs):
self.layer = None # layer converted by swig
@@ -49,20 +86,24 @@ class Layer(object):
self.has_setup = False
def param_names(self):
+ '''
+ Returns:
+ a list of strings, one for the name of one parameter Tensor
+ '''
names = []
for x in self.param_specs:
names.append(x['name'])
return names
def setup(self, in_shapes):
- """Call the C++ setup function to create params and set some meta data.
+ '''Call the C++ setup function to create params and set some meta data.
Args:
in_shapes: if the layer accepts a single input Tensor, in_shapes is
a single tuple specifying the inpute Tensor shape; if the layer
accepts multiple input Tensor (e.g., the concatenation layer),
- in_shapes is a tuple of tuples, each for one input Tensor shape
- """
+ in_shapes is a tuple of tuples, each for one input Tensor
+ '''
if self.has_setup:
return
self.layer.Setup(list(in_shapes),
@@ -70,54 +111,92 @@ class Layer(object):
self.has_setup = True
def get_output_sample_shape(self):
+ '''Called after setup to get the shape of the output sample(s).
+
+ Returns:
+ a tuple for a single output Tensor or a list of tuples if this layer
+ has multiple outputs
+ '''
assert self.has_setup, \
'Must call setup() before get_output_sample_shape()'
return self.layer.GetOutputSampleShape()
def param_values(self):
- """Return param value tensors.
+ '''Return param value tensors.
- Do not store these tensors as layer members because cpp Tensor could be
- moved onto diff devices due to the change of layer device. However, the
- py tensors would not update its internal cpp tensor automatically.
- """
+ Parameter tensors are not stored as layer members because cpp Tensor
+ could be moved onto diff devices due to the change of layer device,
+ which would result in inconsistency.
+
+ Returns:
+ a list of tensors, one for each paramter
+ '''
return tensor.from_raw_tensors(self.layer.param_values())
- def forward(self, flag, input):
+ def forward(self, flag, x):
'''Forward propagate through this layer.
Args:
- flag, kTrain or kEval
- input, an input tensor
+ flag (int): kTrain or kEval
+ x (Tensor or list<Tensor>): an input tensor if the layer is
+ connected from a single layer; a list of tensors if the layer
+ is connected from multiple layers.
Return:
- a tensor for the transformed feature
+ a tensor if the layer is connected to a single layer; a list of
+ tensors if the layer is connected to multiple layers;
'''
assert self.has_setup, 'Must call setup() before forward()'
- assert isinstance(input, tensor.Tensor), 'input must be py Tensor'
- y = self.layer.Forward(flag, input.singa_tensor)
- return tensor.from_raw_tensor(y)
+ if type(x) == list:
+ xs = []
+ for t in x:
+ x.append(t.singa_tensor)
+ else:
+ assert isinstance(input, tensor.Tensor), \
+ 'input must be a Tensor or a list of Tensor'
+ xs = x
+ y = self.layer.Forward(flag, xs)
+ if type(y) == list:
+ return tensor.from_raw_tensors(y)
+ else:
+ return tensor.from_raw_tensor(y)
- def backward(self, flag, grad):
- '''Backward propagate through this layer.
+ def backward(self, flag, dy):
+ '''Backward propagate gradients through this layer.
Args:
- flag, for future use.
- grad, gradient of the returned values of the forward function.
-
+ flag (int): for future use.
+ dy (Tensor or list<Tensor>): the gradient tensor(s) y w.r.t the
+ objective loss
Return:
- <dx, <dp1, dp2..>>, dx is the gradient of the input of the
- forward function, dpi is the gradient of the i-th parameter
+ <dx, <dp1, dp2..>>, dx is a (set of) tensor(s) for the gradient of x
+ , dpi is the gradient of the i-th parameter
'''
- assert isinstance(grad, tensor.Tensor), 'grad must be py Tensor'
- ret = self.layer.Backward(flag, grad.singa_tensor)
- return tensor.from_raw_tensor(ret[0]), tensor.from_raw_tensors(ret[1])
+ if type(dy) == list:
+ dys = []
+ for t in dy:
+ dys.append(t.singa_tensor)
+ else:
+ assert isinstance(dy, tensor.Tensor), \
+ 'the input must be a Tensor or a set of Tensor'
+ dys = dy.singa_tensor
+ ret = self.layer.Backward(flag, dys)
+ if type(ret[0]) == list:
+ dxs = tensor.from_raw_tensors(ret[0])
+ else:
+ dxs = tensor.from_raw_tensor(ret[0])
+ return dxs, tensor.from_raw_tensors(ret[1])
def to_device(self, device):
+ '''Move layer state tensors onto the given device.
+
+ Args:
+ device: swig converted device, created using singa.device
+ '''
self.layer.ToDevice(device)
def as_type(self, dtype):
- self.layer.AsType(dtype)
+ pass
def __copy__(self):
pass
@@ -127,43 +206,42 @@ class Layer(object):
class Conv2D(Layer):
+ """Construct a layer for 2D convolution.
+ Args:
+ nb_kernels (int): num of the channels (kernels) of the input Tensor
+ kernel: an integer or a pair of integers for kernel height and width
+ stride: an integer or a pair of integers for stride height and width
+ border_mode (string): padding mode, case in-sensitive,
+ 'valid' -> padding is 0 for height and width
+ 'same' -> padding is half of the kernel (floor), the kernel must be
+ odd number.
+ cudnn_prefer (string): the preferred algorithm for cudnn convolution
+ which could be 'fatest', 'autotune', 'limited_workspace' and
+ 'no_workspace'
+ data_format (string): either 'NCHW' or 'NHWC'
+ use_bias (bool): True or False
+ pad: an integer or a pair of integers for padding height and width
+ W_specs (dict): used to specify the weight matrix specs, fields
+ include,
+ 'name' for parameter name
+ 'lr_mult' for learning rate multiplier
+ 'decay_mult' for weight decay multiplier
+ 'init' for init method, which could be 'gaussian', 'uniform',
+ 'xavier' and ''
+ 'std', 'mean', 'high', 'low' for corresponding init methods
+ TODO(wangwei) 'clamp' for gradient constraint, value is scalar
+ 'regularizer' for regularization, currently support 'l2'
+ b_specs (dict): hyper-parameters for bias vector, similar as W_specs
+ name (string): layer name.
+ input_sample_shape: 3d tuple for the shape of the input Tensor
+ without the batchsize, e.g., (channel, height, width) or
+ (height, width, channel)
+ """
def __init__(self, name, nb_kernels, kernel=3, stride=1, border_mode='same',
cudnn_prefer='fatest', data_format='NCHW',
use_bias=True, W_specs=None, b_specs=None,
pad=None, input_sample_shape=None):
- """Construct a layer for 2D convolution.
-
- Args:
- nb_kernels (int): num of the channels (kernels) of the input Tensor
- kernel: an integer or a pair of integers for kernel height and width
- stride: an integer or a pair of integers for stride height and width
- border_mode (string): padding mode, case in-sensitive,
- 'valid' -> padding is 0 for height and width
- 'same' -> padding is half of the kernel (floor),
- the kernel must be odd number.
- cudnn_prefer (string): the preferred algorithm for cudnn convolution
- which could be 'fatest', 'autotune', 'limited_workspace' and
- 'no_workspace'
- data_format (string): either 'NCHW' or 'NHWC'
- use_bias (bool): True or False
- pad: an integer or a pair of integers for padding height and width
- W_specs (dict): used to specify the weight matrix specs, fields
- include,
- 'name' for parameter name
- 'lr_mult' for learning rate multiplier
- 'decay_mult' for weight decay multiplier
- 'init' for init method, which could be 'gaussian', 'uniform',
- 'xavier' and ''
- 'std', 'mean', 'high', 'low' for corresponding init methods
- TODO(wangwei) 'clamp' for gradient constraint, value is scalar
- 'regularizer' for regularization, currently support 'l2'
- b_specs (dict): hyper-parameters for bias vector, similar as W_specs
- name (string): layer name.
- input_sample_shape: 3d tuple for the shape of the input Tensor
- without the batchsize, e.g., (channel, height, width) or
- (height, width, channel)
- """
super(Conv2D, self).__init__(name)
assert data_format == 'NCHW', 'Not supported data format: %s ' \
'only "NCHW" is enabled currently' % (data_format)
@@ -195,19 +273,19 @@ class Conv2D(Layer):
class Conv1D(Conv2D):
+ """Construct a layer for 1D convolution.
+
+ Most of the args are the same as those for Conv2D except the kernel,
+ stride, pad, which is a scalar instead of a tuple.
+ input_sample_shape is a tuple with a single value for the input feature
+ length
+ """
def __init__(self, name, nb_kernels, kernel=3, stride=1,
border_mode='same', cudnn_prefer='fatest',
use_bias=True, W_specs={'init': 'Xavier'},
b_specs={'init': 'Constant', 'value': 0}, pad=None,
input_sample_shape=None):
- """Construct a layer for 1D convolution.
-
- Most of the args are the same as those for Conv2D except the kernel,
- stride, pad, which is a scalar instead of a tuple.
- input_sample_shape is a tuple with a single value for the input feature
- length
- """
pad = None
if pad is not None:
pad = (0, pad)
@@ -227,7 +305,15 @@ class Conv1D(Conv2D):
class Pooling2D(Layer):
+ '''2D pooling layer providing max/avg pooling.
+
+ All args are the same as those for Conv2D, except the following one
+ Args:
+ mode: pooling type, model_pb2.PoolingConf.MAX or
+ model_pb2.PoolingConf.AVE
+
+ '''
def __init__(self, name, mode, kernel=3, stride=2, border_mode='same',
pad=None, data_format='NCHW', input_sample_shape=None):
super(Pooling2D, self).__init__(name)
@@ -312,28 +398,26 @@ class AvgPooling1D(AvgPooling2D):
class BatchNormalization(Layer):
- # TODO(wangwei) add mode and epsilon arguments
+ """Batch-normalization.
+ Args:
+ momentum (float): for running average mean and variance.
+ beta_specs (dict): dictionary includes the fields for the beta
+ param:
+ 'name' for parameter name
+ 'lr_mult' for learning rate multiplier
+ 'decay_mult' for weight decay multiplier
+ 'init' for init method, which could be 'gaussian', 'uniform',
+ 'xavier' and ''
+ 'std', 'mean', 'high', 'low' for corresponding init methods
+ 'clamp' for gradient constraint, value is scalar
+ 'regularizer' for regularization, currently support 'l2'
+ gamma_specs (dict): similar to beta_specs, but for the gamma param.
+ name (string): layer name
+ input_sample_shape (tuple): with at least one integer
+ """
def __init__(self, name, momentum=0.9,
beta_specs=None, gamma_specs=None, input_sample_shape=None):
- """Batch-normalization.
-
- Args:
- momentum (float): for running average mean and variance.
- beta_specs (dict): dictionary includes the fields for the beta
- param:
- 'name' for parameter name
- 'lr_mult' for learning rate multiplier
- 'decay_mult' for weight decay multiplier
- 'init' for init method, which could be 'gaussian', 'uniform',
- 'xavier' and ''
- 'std', 'mean', 'high', 'low' for corresponding init methods
- 'clamp' for gradient constraint, value is scalar
- 'regularizer' for regularization, currently support 'l2'
- gamma_specs (dict): similar to beta_specs, but for the gamma param.
- name (string): layer name
- input_sample_shape (tuple): with at least one integer
- """
super(BatchNormalization, self).__init__(name)
conf = self.conf.batchnorm_conf
conf.factor = momentum
@@ -362,16 +446,17 @@ class BatchNormalization(Layer):
class LRN(Layer):
+ """Local response normalization.
+
+ Args:
+ size (int): # of channels to be crossed
+ normalization.
+ mode (string): 'cross_channel'
+ input_sample_shape (tuple): 3d tuple, (channel, height, width)
+ """
+
def __init__(self, name, size=5, alpha=1, beta=0.75, mode='cross_channel',
k=1, input_sample_shape=None):
- """Local response normalization.
-
- Args:
- size (int): # of channels to be crossed
- normalization.
- mode (string): 'cross_channel'
- input_sample_shape (tuple): 3d tuple, (channel, height, width)
- """
super(LRN, self).__init__(name)
conf = self.conf.lrn_conf
conf.local_size = size
@@ -388,29 +473,28 @@ class LRN(Layer):
class Dense(Layer):
+ """Apply linear/affine transformation, also called inner-product or
+ fully connected layer.
+ Args:
+ num_output (int): output feature length.
+ use_bias (bool): add a bias vector or not to the transformed feature
+ W_specs (dict): specs for the weight matrix
+ 'name' for parameter name
+ 'lr_mult' for learning rate multiplier
+ 'decay_mult' for weight decay multiplier
+ 'init' for init method, which could be 'gaussian', 'uniform',
+ 'xavier' and ''
+ 'std', 'mean', 'high', 'low' for corresponding init methods
+ 'clamp' for gradient constraint, value is scalar
+ 'regularizer' for regularization, currently support 'l2'
+ b_specs (dict): specs for the bias vector, same fields as W_specs.
+ W_transpose (bool): if true, output=x*W.T+b;
+ input_sample_shape (tuple): input feature length
+ """
def __init__(self, name, num_output, use_bias=True,
W_specs=None, b_specs=None,
W_transpose=True, input_sample_shape=None):
- """Apply linear/affine transformation, also called inner-product or
- fully connected layer.
-
- Args:
- num_output (int): output feature length.
- use_bias (bool): add a bias vector or not to the transformed feature
- W_specs (dict): specs for the weight matrix
- 'name' for parameter name
- 'lr_mult' for learning rate multiplier
- 'decay_mult' for weight decay multiplier
- 'init' for init method, which could be 'gaussian', 'uniform',
- 'xavier' and ''
- 'std', 'mean', 'high', 'low' for corresponding init methods
- 'clamp' for gradient constraint, value is scalar
- 'regularizer' for regularization, currently support 'l2'
- b_specs (dict): specs for the bias vector, same fields as W_specs.
- W_transpose (bool): if true, output=x*W.T+b;
- input_sample_shape (tuple): input feature length
- """
super(Dense, self).__init__(name)
conf = self.conf.dense_conf
conf.num_output = num_output
@@ -435,14 +519,14 @@ class Dense(Layer):
class Dropout(Layer):
+ """Droput layer.
- def __init__(self, name, p=0.5, input_sample_shape=None):
- """Droput layer.
+ Args:
+ p (float): probability for dropping out the element, i.e., set to 0
+ name (string): layer name
+ """
- Args:
- p (float): probability for dropping out the element, i.e., set to 0
- name (string): layer name
- """
+ def __init__(self, name, p=0.5, input_sample_shape=None):
super(Dropout, self).__init__(name)
conf = self.conf.dropout_conf
conf.dropout_ratio = p
@@ -456,15 +540,14 @@ class Dropout(Layer):
class Activation(Layer):
+ """Activation layers.
+ Args:
+ name (string): layer name
+ mode (string): 'relu', 'sigmoid', or 'tanh'
+ input_sample_shape (tuple): shape of a single sample
+ """
def __init__(self, name, mode='relu', input_sample_shape=None):
- """Activation layers.
-
- Args:
- name (string): layer name
- mode (string): 'relu', 'sigmoid', or 'tanh'
- input_sample_shape (tuple): shape of a single sample
- """
super(Activation, self).__init__(name)
self.conf.type = (engine + '_' + mode).lower()
_check_engine(engine, ['cudnn', 'singa'])
@@ -474,15 +557,14 @@ class Activation(Layer):
class Softmax(Layer):
+ """Apply softmax.
+ Args:
+ axis (int): reshape the input as a matrix with the dimension
+ [0,axis) as the row, the [axis, -1) as the column.
+ input_sample_shape (tuple): shape of a single sample
+ """
def __init__(self, name, axis=1, input_sample_shape=None):
- """Apply softmax.
-
- Args:
- axis (int): reshape the input as a matrix with the dimension
- [0,axis) as the row, the [axis, -1) as the column.
- input_sample_shape (tuple): shape of a single sample
- """
super(Softmax, self).__init__(name)
# conf = self.conf.softmax_conf
# conf.axis = axis
@@ -493,14 +575,14 @@ class Softmax(Layer):
class Flatten(Layer):
+ """Reshape the input tensor into a matrix.
+ Args:
+ axis (int): reshape the input as a matrix with the dimension
+ [0,axis) as the row, the [axis, -1) as the column.
+ input_sample_shape (tuple): shape for a single sample
+ """
def __init__(self, name, axis=1, input_sample_shape=None):
- """Reshape the input tensor into a matrix.
- Args:
- axis (int): reshape the input as a matrix with the dimension
- [0,axis) as the row, the [axis, -1) as the column.
- input_sample_shape (tuple): shape for a single sample
- """
super(Flatten, self).__init__(name)
conf = self.conf.flatten_conf
conf.axis = axis
@@ -511,26 +593,27 @@ class Flatten(Layer):
class RNN(Layer):
+ '''Recurrent layer with 4 types of units, namely lstm, gru, tanh and relu.
+
+ Args:
+ hidden_size: hidden feature size, the same for all stacks of layers.
+ rnn_mode: decides the rnn unit, which could be one of 'lstm', 'gru',
+ 'tanh' and 'relu', refer to cudnn manual for each mode.
+ num_stacks: num of stacks of rnn layers. It is different to the
+ unrolling seqence length.
+ input_mode: 'linear' convert the input feature x by by a linear
+ transformation to get a feature vector of size hidden_size;
+ 'skip' does nothing but requires the input feature size equals
+ hidden_size
+ bidirection: True for bidirectional RNN
+ param_specs: config for initializing the RNN parameters.
+ input_sample_shape: includes a single integer for the input sample
+ feature size.
+ '''
+
def __init__(self, name, hidden_size, rnn_mode='lstm', dropout=0.0,
num_stacks=1, input_mode='linear', bidirectional=False,
param_specs=None, input_sample_shape=None):
- '''Wrapper for singa::RNN class.
-
- Args:
- hidden_size, hidden feature size, the same for all stacks of layers.
- rnn_mode, decides the rnn unit, which could be one of 'lstm', 'gru',
- 'tanh' and 'relu', refer to cudnn manual for each mode.
- num_stacks, num of stacks of rnn layers. It is different to the
- unrolling seqence length.
- input_mode, 'linear' convert the input feature x by by a linear
- transformation to get a feature vector of size hidden_size;
- 'skip' does nothing but requires the input feature size equals
- hidden_size
- bidirection, True for bidirectional RNN
- param_specs, config for initializing the RNN parameters.
- input_sample_shape, includes a single integer for the input sample
- feature size.
- '''
super(RNN, self).__init__(name)
conf = self.conf.rnn_conf
assert hidden_size > 0, 'Hidden feature size must > 0'
@@ -605,7 +688,7 @@ class RNN(Layer):
Returns:
<dx1, dx2, ... dxn, dhx, dcx>, where dxi is the gradient tensor for
- the i-th input, its shape is (batch_size,
+ the i-th input, its shape is (batch_size,
input_feature_length). dhx is the gradient for the initial
hidden state. dcx is the gradient for the initial cell state,
which is valid only for lstm.
@@ -741,5 +824,7 @@ def _construct_param_specs_from_dict(specs):
def get_layer_list():
- """ Return a list of strings reprensenting the all supported layers"""
+ """ Return a list of strings which include the identifiers (tags) of all
+ supported layers
+ """
return singa_wrap.GetRegisteredLayers()
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/loss.py
----------------------------------------------------------------------
diff --git a/src/python/singa/loss.py b/src/python/singa/loss.py
index acfb813..c88290b 100644
--- a/src/python/singa/loss.py
+++ b/src/python/singa/loss.py
@@ -15,32 +15,127 @@
# specific language governing permissions and limitations
# under the License.
# =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+
+'''
+Loss module includes a set of training loss implmentations. Some are converted
+from C++ implementation, and the rest are implemented directly using python
+Tensor.
+
+Example usage::
+
+ from singa import tensor
+ from singa import loss
+ from singa.proto import model_pb2
+
+ x = tensor.Tensor((3, 5))
+ x.uniform(0, 1) # randomly genearte the prediction activation
+ y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int)) # set the truth
+
+ f = loss.SoftmaxCrossEntropy()
+ l = f.forward(model_pb2.kTrain, x, y) # l is tensor with 3 loss values
+ g = f.backward() # g is a tensor containing all gradients of x w.r.t l
+'''
+
from . import singa_wrap as singa
import tensor
class Loss(object):
+ '''Base loss class.
+
+ Subclasses that wrap the C++ loss classes can use the inherited foward,
+ backward, and evaluate functions of this base class. Other subclasses need
+ to override these functions
+ '''
def __init__(self):
self.swig_loss = None
def forward(self, flag, x, y):
- """Return a tensor of floats, one per sample"""
+ '''Compute the loss values.
+
+ Args:
+ flag (int): kTrain or kEval. If it is kTrain, then the backward
+ function must be called before calling forward again.
+ x (Tensor): the prediction Tensor
+ y (Tensor): the ground truch Tensor, x.shape[0] must = y.shape[0]
+
+ Returns:
+ a tensor of floats for the loss values, one per sample
+ '''
return tensor.from_raw_tensor(
self.swig_loss.Forward(flag, x.singa_tensor, y.singa_tensor))
def backward(self):
- """Return the grad of x w.r.t. the loss obj"""
+ '''
+ Returns:
+ the grad of x w.r.t. the loss
+ '''
return tensor.from_raw_tensor(self.swig_loss.Backward())
- def evaluate(self, flag, x, y):
- """Return the averaged loss for all samples in x"""
+ def evaluate(self, flag, x, y): # TODO(wangwei) remove flag
+ '''
+ Args:
+ flag (int): must be kEval, to be removed
+ x (Tensor): the prediction Tensor
+ y (Tensor): the ground truth Tnesor
+
+ Returns:
+ the averaged loss for all samples in x.
+ '''
return self.swig_loss.Evaluate(flag, x.singa_tensor, y.singa_tensor)
class SoftmaxCrossEntropy(Loss):
+ '''This loss function is a combination of SoftMax and Cross-Entropy loss.
+
+ It converts the inputs via SoftMax function and then
+ computes the cross-entropy loss against the ground truth values.
+ '''
def __init__(self):
self.swig_loss = singa.SoftmaxCrossEntropy()
+
+
+class SquaredError(Loss):
+ '''This loss evaluates the squared error between the prediction and the
+ truth values.
+
+ It is implemented using Python Tensor operations.
+ '''
+ def __init__(self):
+ super(Loss, SquaredError).__init__()
+ self.err = None
+
+ def forward(self, flag, x, y):
+ '''Compute the error as 0.5 * ||x-y||^2.
+
+ Args:
+ flag (int): kTrain or kEval; if kTrain, then the backward must be
+ called before calling forward again.
+ x (Tensor): the prediction Tensor
+ y (Tensor): the truth Tensor, an integer value per sample, whose
+ value is [0, x.shape[1])
+
+ Returns:
+ a Tensor with one error value per sample
+ '''
+ self.err = x - y
+ return 0.5 * tensor.squared(self.err)
+
+ def backward(self):
+ '''Compute the gradient of x w.r.t the error.
+
+ Returns:
+ x - y
+ '''
+ return self.err
+
+ def evaluate(self, flag, x, y):
+ '''Compuate the averaged error.
+
+ Returns:
+ a float value as the averaged error
+ '''
+ return tensor.sum(0.5 * tensor.squared(x - y)) / x.size()
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/metric.py
----------------------------------------------------------------------
diff --git a/src/python/singa/metric.py b/src/python/singa/metric.py
index 31b6892..3a5750d 100644
--- a/src/python/singa/metric.py
+++ b/src/python/singa/metric.py
@@ -15,28 +15,71 @@
# specific language governing permissions and limitations
# under the License.
# =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+'''This module includes a set of metric classes for evaluating the model's
+performance. The specific metric classes could be converted from C++
+implmentation or implemented directly using Python.
+
+
+Example usage::
+
+ from singa import tensor
+ from singa import metric
+
+ x = tensor.Tensor((3, 5))
+ x.uniform(0, 1) # randomly genearte the prediction activation
+ x = tensor.SoftMax(x) # normalize the prediction into probabilities
+ y = tensor.from_numpy(np.array([0, 1, 3], dtype=np.int)) # set the truth
+
+ f = metric.Accuracy()
+ acc = f.evaluate(x, y) # averaged accuracy over all 3 samples in x
+
+'''
from . import singa_wrap as singa
import tensor
class Metric(object):
+ '''Base metric class.
+
+ Subclasses that wrap the C++ loss classes can use the inherited foward,
+ and evaluate functions of this base class. Other subclasses need
+ to override these functions. Users need to feed in the **predictions** and
+ ground truth to get the metric values.
+ '''
def __init__(self):
self.swig_metric = None
def forward(self, x, y):
- """Return a tensor of floats, one per sample"""
+ '''Compute the metric for each sample.
+
+ Args:
+ x (Tensor): predictions, one row per sample
+ y (Tensor): ground truth values, one row per sample
+
+ Returns:
+ a tensor of floats, one per sample
+ '''
return tensor.from_raw_tensor(
self.swig_metric.Forward(x.singa_tensor, y.singa_tensor))
def evaluate(self, x, y):
- """Return the averaged metric for all samples in x"""
+ '''Compute the averaged metric over all samples.
+
+ Args:
+ x (Tensor): predictions, one row per sample
+ y (Tensor): ground truth values, one row per sample
+ Returns:
+ a float value for the averaged metric
+ '''
return self.swig_metric.Evaluate(x.singa_tensor, y.singa_tensor)
class Accuracy(Metric):
+ '''Compute the top one accuracy for singel label prediction tasks.
+ It calls the C++ functions to do the calculation.
+ '''
def __init__(self):
self.swig_metric = singa.Accuracy()
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8cd55300/src/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py
index 503527f..5d38997 100644
--- a/src/python/singa/optimizer.py
+++ b/src/python/singa/optimizer.py
@@ -15,7 +15,22 @@
# specific language governing permissions and limitations
# under the License.
# =============================================================================
-""" Python wrappers for optimizers implemented by C++."""
+'''This module includes a set of optimizers for updating model parameters.
+
+Example usage::
+
+ from singa import optimizer
+ from singa import tensor
+
+ sgd = optimizer.SGD(lr=0.01, momentum=0.9, weight_decay=1e-4)
+ p = tensor.Tensor((3,5))
+ p.uniform(-1, 1)
+ g = tensor.Tensor((3,5))
+ g.gaussian(0, 0.01)
+
+ sgd.apply(1, g, p, 'param') # use the global lr=0.1 for epoch 1
+ sgd.apply_with_lr(2, 0.03, g, p, 'param') # use lr=0.03 for epoch 2
+'''
from . import singa_wrap as singa
import tensor
@@ -23,53 +38,44 @@ from proto import model_pb2
class Optimizer(object):
- """Base python optimizer.
-
- Usages:
- 1. construct the optimizer
- 2. (optional) register each parameter with its specs.
- 3. use the optimizer to update parameter values given parameter
- gradients and other optional info
- """
-
+ '''The base python optimizer class.
+
+ Typically, an optimizer is used as follows:
+
+ 1. construct the optimizer
+ 2. (optional) register each parameter with its specs.
+ 3. use the optimizer to update parameter values given parameter
+ gradients and other optional info
+
+ The subclasses should override the apply_with_lr function to do the real
+ parameter udpate.
+
+ Args:
+ lr (float): a constant for the learning rate, mutually exclusive with
+ 'lr_gen'.
+ momentum (float): a constant for the momentum value
+ decay (float): the coefficent for L2 regularizer, which is mutually
+ exclusive with 'regularizer'.
+ lr_gen (function): a function returns the learning rate given
+ the current training step/epoch. It is mutually exclusive with lr.
+ If both are not set, the apply_with_lr function should be used for
+ param updating.
+ regularizer: an instance of Regularizer or RegularizerConf; If set,
+ regularization would be applied in apply_with_lr().
+ Users can also do regularization outside.
+ constraint: an instance of Constraint or ConstraintConf; If set,
+ constraint would be applied inside apply_with_lr(). Users can
+ also do regularization outside.
+ '''
def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
- momentum_gen=None, regularizer=None, constraint=None):
- """Constructor.
-
- Args:
- lr: a constant or a function that generates learning rate given a
- step, which is mutually exclusive with 'lr_gen'.
- momentum: a constant or a function that generates the momentum value
- given a step.
- decay (float): the coefficent for L2 regularizer, which is mutually
- exclusive with 'regularizer'.
- lr_gen (function): a function returns the learning rate given
- the current training step. It is mutually exclusive with lr. If
- both are not set, the apply_with_lr function should be used for
- param updating.
- momentum_gen (function): a function returns the momentum value given
- the current training step. It is mutually exclusive with
- momentum.
- regularizer: an instance of Regularizer or RegularizerConf; If set,
- regularization would be applied in apply_with_lr().
- Users can also do regularization outside.
- constraint: an instance of Constraint or ConstraintConf; If set,
- constraint would be applied inside apply_with_lr(). Users can
- also do regularization outside.
- """
+ regularizer=None, constraint=None):
if lr is not None:
assert lr_gen is None, 'Cannot set lr and lr_gen at the same time'
- def lr_gen(step):
+ def lr_gen(epoch):
return lr
self.lr_gen = lr_gen
- if momentum is not None:
- assert momentum_gen is None, 'Cannot set momentum and momentum_gen'\
- ' at the same time'
-
- def momentum_gen(step):
- return momentum
- self.momentum_gen = momentum_gen
+ self.momentum = momentum
if decay is not None:
assert regularizer is None, \
'Cannot set decay and regularizer at the same time'
@@ -94,14 +100,15 @@ class Optimizer(object):
self.learning_rate_multiplier = {}
def register(self, name, specs):
- """Register the param specs, including creating regularizer and
+ '''Register the param specs, including creating regularizer and
constraint per param object. Param specific regularizer and constraint
have higher priority than the global ones.
Args:
name (str): parameter name
- specs (ParamSpec): protobuf obj
- """
+ specs (ParamSpec): protobuf obj, including regularizer and
+ constraint, multipliers for learning rate and weight decay.
+ '''
if specs.HasField('regularizer'):
self.regularizers[name] = CppRegularizer(specs.constraint)
if specs.HasField('constraint'):
@@ -111,8 +118,8 @@ class Optimizer(object):
if specs.decay_mult != 1:
self.decay_multiplier[name] = specs.decay_mult
- def apply_regularizer_constraint(self, value, grad, name=None, step=None):
- """Apply regularization and constraint if available.
+ def apply_regularizer_constraint(self, value, grad, name=None, epoch=None):
+ '''Apply regularization and constraint if available.
If there are both global regularizer (constraint) and param specific
regularizer (constraint), it would use the param specific one.
@@ -121,46 +128,48 @@ class Optimizer(object):
value (Tensor): parameter value Tensor
grad (Tensor): parameter gradient Tensor
name (string): to get parameter specific regularizer or constraint
- step (int): some regularizer or constraint would use step
+ epoch (int): some regularizer or constraint would use epoch
- Return:
+ Returns:
the updated gradient Tensor
- """
+ '''
if name is not None and name in self.constraints:
- self.constraints[name].apply(value, grad, step)
+ self.constraints[name].apply(value, grad, epoch)
elif self.constraint is not None:
- self.constraint.apply(step, value, grad)
+ self.constraint.apply(epoch, value, grad)
if name is not None and name in self.regularizers:
- self.regularizers[name].apply(value, grad, step)
+ self.regularizers[name].apply(value, grad, epoch)
elif self.regularizer is not None:
- self.regularizer.apply(step, value, grad)
+ self.regularizer.apply(epoch, value, grad)
return grad
- def apply_with_lr(self, step, lr, grad, value, name=None):
- """Do update with given learning rate.
+ def apply_with_lr(self, epoch, lr, grad, value, name=None):
+ '''Do update with given learning rate.
The subclass optimizer must override this function.
+
Args:
- step (int): training step (could be iteration or epoch)
+ epoch (int): training epoch (could be iteration or epoch)
lr (float): learning rate
grad (Tensor): parameter gradient
value (Tesnor): parameter value
name (string): paramter name to retrieval parameter specific
updating rules (including regularizer and constraint)
- Return:
+ Returns:
updated parameter value
- """
+ '''
assert False, 'This is the base function, pls call the subclass func'
return value
- def apply(self, step, grad, value, name=None):
- """Do update assume the learning rate generator is set.
+ def apply(self, epoch, grad, value, name=None):
+ '''Do update assuming the learning rate generator is set.
The subclass optimizer does not need to override this function.
+
Args:
- step (int): training step (could be iteration or epoch)
+ epoch (int): training epoch (could be iteration or epoch)
grad (Tensor): parameter gradient
value (Tesnor): parameter value
name (string): paramter name to retrieval parameter specific
@@ -168,97 +177,109 @@ class Optimizer(object):
Return:
updated parameter value
- """
-
+ '''
assert self.lr_gen is not None, 'Learning rate generator is not set.'\
'Either set the lr_gen in constructor or call apply_with_lr'
- lr = self.lr_gen(step)
- return self.apply_with_lr(step, lr, grad, value, name)
+ lr = self.lr_gen(epoch)
+ return self.apply_with_lr(epoch, lr, grad, value, name)
class SGD(Optimizer):
+ '''The vallina Stochasitc Gradient Descent algorithm with momentum.
- def __init__(self, lr=None, momentum=None, decay=None, **kwargs):
- """The vallina Stochasitc Gradient Descent algorithm.
+ See the base Optimizer for all arguments.
+ '''
- See the base Optimizer for all arguments.
- """
- super(SGD, self).__init__(lr, momentum, decay)
+ def __init__(self, lr=None, momentum=None, decay=None, lr_gen=None,
+ regularizer=None, constraint=None):
+ super(SGD, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+ constraint)
conf = model_pb2.OptimizerConf()
+ conf.momentum = self.momentum
+ conf.type = 'sgd'
self.opt = singa.CreateOptimizer('SGD')
self.opt.Setup(conf.SerializeToString())
- def apply_with_lr(self, step, lr, grad, value, name):
- self.apply_regularizer_constraint(step, value, grad, name)
- self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+ def apply_with_lr(self, epoch, lr, grad, value, name):
+ self.apply_regularizer_constraint(epoch, value, grad, name)
+ self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
return value
class Nesterov(Optimizer):
+ '''The SGD with Nesterov momentum.
- def __init__(self, lr=None, momentum=0.9, decay=None, **kwargs):
- """The SGD with Nesterov momentum
+ See the base Optimizer for all arguments.
+ '''
- See the base Optimizer for all arguments.
- """
- super(Nesterov, self).__init__(lr, momentum, decay, kwargs)
+ def __init__(self, lr=None, momentum=0.9, decay=None, lr_gen=None,
+ regularizer=None, constraint=None):
+ super(Nesterov, self).__init__(lr, momentum, decay, lr_gen, regularizer,
+ constraint)
conf = model_pb2.OptimizerConf()
+ conf.momentum = momentum
+ conf.type = 'nesterov'
self.opt = singa.CreateOptimizer('Nesterov')
self.opt.Setup(conf.SerializeToString())
- def apply_with_lr(self, step, lr, grad, value, name):
- self.apply_regularizer_constraint(step, value, grad, name)
- self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+ def apply_with_lr(self, epoch, lr, grad, value, name):
+ self.apply_regularizer_constraint(epoch, value, grad, name)
+ self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
return value
class AdaGrad(Optimizer):
+ '''AdaGrad optimizer.
- def __init__(self, epsilon=1e-8, lr=None, decay=None, **kwargs):
- """AdaGrad optimizer.
+ See the base Optimizer for all constructor args.
- See the base Optimizer for all constructor args.
- Args:
- epsilon (float): small number for preventing numeric error.
- """
- super(RMSProp, self).__init__(lr, decay, **kwargs)
+ Args:
+ epsilon (float): small number for preventing numeric error.
+ '''
+ def __init__(self, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+ regularizer=None, constraint=None):
+ super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+ constraint)
conf = model_pb2.OptimizerConf()
conf.delta = epsilon
+ conf.type = 'adagrad'
self.opt = singa.CreateOptimizer('AdaGrad')
self.opt.Setup(conf.SerializeToString())
- def apply_with_lr(self, step, lr, grad, value, name):
- grad = self.apply_regularizer_constraint(step, value, grad, name)
- self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+ def apply_with_lr(self, epoch, lr, grad, value, name):
+ grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+ self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
return value
class RMSProp(Optimizer):
+ '''RMSProp optimizer.
- def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, **kwargs):
- """RMSProp optimizer.
+ See the base Optimizer for all constructor args.
- See the base Optimizer for all constructor args.
- Args:
- rho (float): float within [0, 1]
- epsilon (float): small value for preventing numeric error
- """
- super(RMSProp, self).__init__(lr, decay, kwargs)
+ Args:
+ rho (float): float within [0, 1]
+ epsilon (float): small value for preventing numeric error
+ '''
+
+ def __init__(self, rho=0.9, epsilon=1e-8, lr=None, decay=None, lr_gen=None,
+ regularizer=None, constraint=None):
+ super(RMSProp, self).__init__(lr, decay, lr_gen, regularizer,
+ constraint)
conf = model_pb2.OptimizerConf()
conf.rho = rho
conf.delta = epsilon
self.opt = singa.CreateOptimizer('RMSProp')
self.opt.Setup(conf.SerializeToString())
- def apply_with_lr(self, step, lr, grad, value, name):
- grad = self.apply_regularizer_constraint(step, value, grad, name)
- self.opt.Apply(step, lr, name, grad.singa_tensor, value.singa_tensor)
+ def apply_with_lr(self, epoch, lr, grad, value, name):
+ grad = self.apply_regularizer_constraint(epoch, value, grad, name)
+ self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
return value
class Regularizer(object):
- """Base Python regularizer for parameter gradients.
- """
+ '''Base Python regularizer for parameter gradients.'''
def apply(self, value, grad):
assert False, 'Not Implemented. Call the subclass function.'
@@ -266,34 +287,32 @@ class Regularizer(object):
class CppRegularizer(Regularizer):
- """Wrapper for regularizer implemented using C++.
- """
+ '''Wrapper for regularizer implemented using C++.
- def __init__(self, conf):
- """Constructor.
+ Args:
+ conf (RegularizerConf): protobuf message for the configuration.
+ '''
- Args:
- conf (RegularizerConf): protobuf message for the configuration.
- """
+ def __init__(self, conf):
self.reg = singa.CreateRegularizer(conf.type)
self.reg.Setup(conf.SerializeToString())
- def apply(self, step, value, grad):
- self.reg.Apply(step, value.singa_tensor, grad.singa_tensor)
+ def apply(self, epoch, value, grad):
+ self.reg.Apply(epoch, value.singa_tensor, grad.singa_tensor)
return grad
class L2Regularizer(Regularizer):
- """L2 regularization"""
+ '''L2 regularization
+
+ Args:
+ coefficient (float): regularization coefficient.
+ '''
def __init__(self, coefficient):
- """
- Args:
- coefficient (float): regularization coefficient.
- """
self.coefficient = coefficient
- def apply(self, step, value, grad, coefficient=None):
+ def apply(self, epoch, value, grad, coefficient=None):
if coefficient is None:
assert self.coefficient is not None, 'Must set the coefficient'
coefficient = self.coefficient
@@ -302,39 +321,34 @@ class L2Regularizer(Regularizer):
class Constraint(object):
- """Base Python constraint class for paramter gradients.
- """
+ '''Base Python constraint class for paramter gradients'''
- def apply(self, step, value, grad):
+ def apply(self, epoch, value, grad):
return grad
class CppConstraint(Constraint):
- """Wrapper for constraints implemented using C++.
- """
+ '''Wrapper for constraints implemented using C++.
+ Args:
+ conf (ConstraintConf): protobuf message for the configuration.
+ '''
def __init__(self, conf):
- """Constructor.
-
- Args:
- conf (ConstraintConf): protobuf message for the configuration.
- """
self.constraint = singa.CreateConstraint(conf.type)
self.constraint.Setup(conf.SerializeToString())
- def apply(self, step, value, grad):
- self.constraint.Apply(step, value.singa_tensor, grad.singa_tensor)
+ def apply(self, epoch, value, grad):
+ self.constraint.Apply(epoch, value.singa_tensor, grad.singa_tensor)
return grad
class L2Constraint(Constraint):
- """Rescale the gradient to make the L2 norm <= a given threshold.
- """
+ '''Rescale the gradient to make the L2 norm <= a given threshold'''
def __init__(self, threshold=None):
self.threshold = threshold
- def apply(self, step, value, grad, threshold=None):
+ def apply(self, epoch, value, grad, threshold=None):
if threshold is None:
assert self.threshold is not None, 'Must set the threshold'
threshold = self.threshold