You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@singa.apache.org by wa...@apache.org on 2017/02/17 06:24:05 UTC

[1/5] incubator-singa git commit: SINGA-300 - Add residual networks for imagenet classification

Repository: incubator-singa
Updated Branches:
  refs/heads/master 9b2d16134 -> f2ad93381


SINGA-300 - Add residual networks for imagenet classification

Add the wide residual network for imagenet serving; refer to https://github.com/szagoruyko/wide-residual-networks

convert original resnet from torch to singa. tested wide resnet;


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c0317d18
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c0317d18
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c0317d18

Branch: refs/heads/master
Commit: c0317d185004ea594f19b74cbb9bc2c97e66a7b1
Parents: 59ca44a
Author: Wei Wang <wa...@gmail.com>
Authored: Fri Feb 10 14:32:42 2017 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Sun Feb 12 22:00:46 2017 +0800

----------------------------------------------------------------------
 examples/imagenet/resnet/convert.py  |  48 +++++++++++
 examples/imagenet/resnet/model.py    | 138 ++++++++++++++++++++++++++++++
 examples/imagenet/resnet/serve.py    | 138 ++++++++++++++++++++++++++++++
 python/singa/device.py               |   4 +-
 python/singa/layer.py                |  33 +++----
 python/singa/net.py                  |  34 +++++---
 src/model/layer/convolution.cc       |   8 +-
 src/model/layer/convolution.h        |   5 +-
 src/model/layer/cudnn_convolution.cc |   8 +-
 src/model/layer/dense.cc             |  16 ++--
 src/model/layer/dense.h              |   7 +-
 11 files changed, 398 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/examples/imagenet/resnet/convert.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/convert.py b/examples/imagenet/resnet/convert.py
new file mode 100644
index 0000000..c976bf5
--- /dev/null
+++ b/examples/imagenet/resnet/convert.py
@@ -0,0 +1,48 @@
+import torchfile
+import numpy as np
+import cPickle as pickle
+
+def conv(m, name, params):
+    outplane = m['weight'].shape[0]
+    params[name + '-conv_weight'] = np.reshape(m['weight'], (outplane, -1))
+    return params
+
+def batchnorm(m, name, params):
+    params[name + '-bn_gamma'] = m['weight']
+    params[name + '-bn_beta'] = m['bias']
+    params[name + '-bn_mean'] = m['running_mean']
+    params[name + '-bn_var'] = m['running_var']
+    return params
+
+def block(m, name, params, has_identity):
+    branch=m[0].modules[0].modules
+    params = conv(branch[0], name + '-1', params)
+    params = batchnorm(branch[1], name + '-1', params)
+    params = conv(branch[3], name + '-2', params)
+    params = batchnorm(branch[4], name + '-2', params)
+    params = conv(branch[6], name + '-3', params)
+    params = batchnorm(branch[7], name + '-3', params)
+    if not has_identity:
+        shortcut = m[0].modules[1].modules
+        params = conv(shortcut[0], name + '-shortcut', params)
+        params = batchnorm(shortcut[1], name + '-shortcut', params)
+    return params
+
+def stage(sid, m, num_blk, params):
+    for i in range(num_blk):
+        params = block(m[i].modules, 'stage%d-blk%d' % (sid, i), params, i!=0)
+    return params
+
+params = {}
+model = torchfile.load('wrn-50-2.t7').modules
+params = conv(model[0], 'input', params)
+params = batchnorm(model[1], 'input', params)
+params = stage(0, model[4].modules, 3, params)
+params = stage(1, model[5].modules, 4, params)
+params = stage(2, model[6].modules, 6, params)
+params = stage(3, model[7].modules, 3, params)
+
+params['dense_weight'] = np.transpose(model[10]['weight'])
+params['dense_bias'] = model[10]['bias']
+with open('wrn-50-2.pickle', 'wb') as fd:
+    pickle.dump(params, fd)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/examples/imagenet/resnet/model.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/model.py b/examples/imagenet/resnet/model.py
new file mode 100644
index 0000000..7c9a3cf
--- /dev/null
+++ b/examples/imagenet/resnet/model.py
@@ -0,0 +1,138 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+''' This model is created following https://github.com/facebook/fb.resnet.torch.git
+'''
+from singa.layer import Conv2D, Activation, MaxPooling2D, AvgPooling2D,\
+        Split, Merge, Flatten, Dense, BatchNormalization, Softmax
+from singa import net as ffnet
+from singa import initializer
+
+ffnet.verbose=True
+
+conv_bias = False
+
+def conv(net, prefix, n, ksize, stride=1, pad=0, bn=True, relu=True, src=None):
+    ret = net.add(Conv2D(
+        prefix + '-conv', n, ksize, stride, pad=pad, use_bias=conv_bias), src)
+    if bn:
+        ret = net.add(BatchNormalization(prefix + '-bn'))
+    if relu:
+        ret = net.add(Activation(prefix + '-relu'))
+    return ret
+
+
+def shortcut(net, prefix, inplane, outplane, stride, src):
+    if inplane == outplane:
+        return src
+    return conv(net, prefix + '-shortcut', outplane, 1, stride, 0, True, False, src)
+
+
+def bottleneck(name, net, inplane, midplane, outplane, stride=1, preact=False):
+    split = net.add(Split(name + '-split', 2))
+    conv(net, name + '-1', midplane, 1, 1, 0, True, True, src=split)
+    conv(net, name + '-2', midplane, 3, stride, 1, True, True)
+    br0 = conv(net, name + '-3', outplane, 1, 1, 0, True, False)
+    br1 = shortcut(net, name, inplane, outplane, stride, split)
+    net.add(Merge(name + '-add'), [br0, br1])
+    return net.add(Activation(name + '-relu'))
+
+def basicblock(name, net, inplane, midplane, outplane, stride=1, preact=False):
+    assert midplane==outplane, 'midplan and outplane should be the same'
+    split = net.add(Split(name + '-split', 2))
+    if preact:
+        net.add(BatchNormalization(name + '-preact-bn'), split)
+        net.add(Activation(name + '-preact-relu'))
+    conv(net, name + '-1', outplane, 3, stride, 1, True, True, split)
+    br0 = conv(net, name + '-2', outplane, 3, 1, 1, True, False)
+    br1 = shortcut(net, name, inplane, outplane, stride, split)
+    net.add(Merge(name + '-add'), [br0, br1])
+    return net.add(Activation(name + '-add-relu'))
+
+
+def stage(sid, net, num_blk, inplane, midplane, outplane, stride, block):
+    block('stage%d-blk%d' % (sid, 0), net, inplane, midplane, outplane, stride)
+    for i in range(1, num_blk):
+        block('stage%d-blk%d' % (sid, i), net, outplane, midplane, outplane)
+
+def init_params(net, weight_path):
+    if weight_path == None:
+        for pname, pval in zip(net.param_names(), net.param_values()):
+            print pname, pval.shape
+            if 'conv' in pname and len(pval.shape) > 1:
+                initializer.gaussian(pval, 0, pval.shape[1])
+            elif 'dense' in pname:
+                if len(pval.shape) > 1:
+                    initializer.gaussian(pval, 0, pval.shape[0])
+                else:
+                    pval.set_value(0)
+            # init params from batch norm layer
+            elif 'mean' in pname or 'beta' in pname:
+                pval.set_value(0)
+            elif 'var' in pname:
+                pval.set_value(1)
+            elif 'gamma' in pname:
+                initializer.uniform(pval, 0, 1)
+    else:
+        net.load(weight_path, use_pickle = 'pickle' in weight_path)
+
+def create_resnet(weight_path=None, depth=50):
+    cfg = {
+            50: ([3, 4, 6, 3], bottleneck),
+            101: ([3, 4, 23, 3], bottleneck),
+            152: ([3, 8, 36, 3], bottleneck),
+            }
+    net = ffnet.FeedForwardNet()
+    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, input_sample_shape=(3, 224, 224)))
+    net.add(BatchNormalization('input-bn'))
+    net.add(Activation('input_relu'))
+    net.add(MaxPooling2D('input_pool', 3, 2, pad=1))
+
+    conf = cfg[depth]
+    stage(0, net, conf[0][0], 64, 64, 256, 1, conf[1])
+    stage(1, net, conf[0][1], 256, 128, 512, 2, conf[1])
+    stage(2, net, conf[0][2], 512, 256, 1024, 2, conf[1])
+    stage(3, net, conf[0][3], 1024, 512, 2048, 2, conf[1])
+    net.add(AvgPooling2D('avg', 7, 1))
+    net.add(Flatten('flat'))
+    net.add(Dense('dense', 1000))
+
+    init_params(net, weight_path)
+    return net
+
+
+def create_wide_resnet(weight_path=None):
+    net = ffnet.FeedForwardNet()
+    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, use_bias=False, input_sample_shape=(3, 224, 224)))
+    net.add(BatchNormalization('input-bn'))
+    net.add(Activation('input_relu'))
+    net.add(MaxPooling2D('input_pool', 3, 2, pad=1))
+
+    stage(0, net, 3, 64, 128, 256, 1, bottleneck)
+    stage(1, net, 4, 256, 256, 512, 2, bottleneck)
+    stage(2, net, 6, 512, 512, 1024, 2, bottleneck)
+    stage(3, net, 3, 1024, 1024, 2048, 2, bottleneck)
+
+    net.add(AvgPooling2D('avg_pool', 7, 1, pad=0))
+    net.add(Flatten('flag'))
+    net.add(Dense('dense', 1000))
+
+    init_params(net, weight_path)
+    return net
+
+
+if __name__ == '__main__':
+    create_net('wrn-50-2.pickle')

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/examples/imagenet/resnet/serve.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/serve.py b/examples/imagenet/resnet/serve.py
new file mode 100644
index 0000000..d9609ce
--- /dev/null
+++ b/examples/imagenet/resnet/serve.py
@@ -0,0 +1,138 @@
+import os
+import sys
+import time
+import numpy as np
+import threading
+import traceback
+from scipy.misc import imread, imresize
+from argparse import ArgumentParser
+
+from singa import device
+from singa import tensor
+from singa import data
+from singa import image_tool
+from singa import metric
+from rafiki.agent import Agent, MsgType
+import model
+
+tool = image_tool.ImageTool()
+num_augmentation = 10
+crop_size = 224
+mean = np.array([0.485, 0.456, 0.406])
+std = np.array([ 0.229, 0.224, 0.225])
+def image_transform(img):
+    '''Input an image path and return a set of augmented images (type Image)'''
+    global tool
+    return tool.load(img).resize_by_list([256]).crop5((crop_size, crop_size), 5).flip(2).get()
+
+
+def predict(net, images, num=10):
+    '''predict probability distribution for one net.
+
+    Args:
+        net: neural net (vgg or resnet)
+        images: a batch of augmented images (type numpy)
+        num: num of augmentations
+    '''
+    prob = net.predict(images)
+    prob = tensor.to_numpy(prob)
+    prob = prob.reshape((images.shape[0] / num, num, -1))
+    prob = np.average(prob, 1)
+    return prob
+
+
+def allowed_file(filename):
+    return '.' in filename and filename.rsplit('.', 1)[1] in \
+        ["PNG", "png", "jpg", "JPG", "JPEG", "jpeg"]
+
+
+def serve(net, label_map, dev, agent, topk=5):
+    '''Serve to predict image labels.
+
+    It prints the topk food names for each image.
+
+    Args:
+        label_map: a list of food names, corresponding to the index in meta_file
+    '''
+
+    images =tensor.Tensor((num_augmentation, 3, crop_size, crop_size), dev)
+    while True:
+        msg, val = agent.pull()
+        if msg is None:
+            time.sleep(0.1)
+            continue
+        msg = MsgType.parse(msg)
+        if msg.is_request():
+            try:
+                # process images
+                im = [np.array(x.convert('RGB'), dtype=np.float32).transpose(2, 0, 1) for x in image_transform(val['image'])]
+                im = np.array(im) / 256
+                im -= mean[np.newaxis, :, np.newaxis, np.newaxis]
+                im /= std[np.newaxis, :, np.newaxis, np.newaxis]
+                images.copy_from_numpy(im)
+                print "input: ", images.l1()
+                # do prediction
+                prob = predict(net, images, num_augmentation)[0]
+                idx = np.argsort(-prob)
+                # prepare results
+                response = ""
+                for i in range(topk):
+                    response += "%s:%f <br/>" % (label_map[idx[i]], prob[idx[i]])
+            except:
+                traceback.print_exc()
+                response = "sorry, system error during prediction."
+            agent.push(MsgType.kResponse, response)
+        elif msg.is_command():
+            if MsgType.kCommandStop.equal(msg):
+                print 'get stop command'
+                agent.push(MsgType.kStatus, "success")
+                break
+            else:
+                print 'get unsupported command %s' % str(msg)
+                agent.push(MsgType.kStatus, "Unknown command")
+        else:
+            print 'get unsupported message %s' % str(msg)
+            agent.push(MsgType.kStatus, "unsupported msg; going to shutdown")
+            break
+    print "server stop"
+
+def main():
+    try:
+        # Setup argument parser
+        parser = ArgumentParser(description="Wide residual network")
+
+        parser.add_argument("-p", "--port", default=9999, help="listen port")
+        parser.add_argument("-c", "--use_cpu", action="store_true",
+                            help="If set, load models onto CPU devices")
+        parser.add_argument("--parameter_file", default="wrn-50-2.pickle")
+
+        # Process arguments
+        args = parser.parse_args()
+        port = args.port
+
+        # start to train
+        agent = Agent(port)
+
+        net = model.create_wide_resnet(args.parameter_file)
+        dev = device.create_cuda_gpu()
+        net.to_device(dev)
+        print 'Finish loading models'
+
+        labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
+        serve(net, labels, dev, agent)
+
+        # acc = evaluate(net, '../val_list.txt',  'image/val', dev)
+        # print acc
+
+        # wait the agent finish handling http request
+        agent.stop()
+    except SystemExit:
+        return
+    except:
+        traceback.print_exc()
+        sys.stderr.write("  for help use --help \n\n")
+        return 2
+
+
+if __name__ == '__main__':
+    main()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/python/singa/device.py
----------------------------------------------------------------------
diff --git a/python/singa/device.py b/python/singa/device.py
index 1df4c84..fdd2a92 100644
--- a/python/singa/device.py
+++ b/python/singa/device.py
@@ -132,12 +132,12 @@ def create_cuda_gpu_on(device_id):
 
 def create_opencl_device():
     '''Create the default OpenCL device.
-    
+
     Returns:
         a swig converted OpenCL device.
     '''
     assert singa.USE_OPENCL, 'SINGA has not been compiled with OpenCL enabled.'
-    return singa.Platform.GetDefaultDevice()
+    return singa.Platform.GetDefaultOpenclDevice()
 
 
 default_device = singa.Platform.GetDefaultDevice()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/python/singa/layer.py b/python/singa/layer.py
index 0bea2d2..7975042 100644
--- a/python/singa/layer.py
+++ b/python/singa/layer.py
@@ -337,18 +337,19 @@ class Conv2D(Layer):
         # conf.data_format = data_format
         if W_specs is None:
             W_specs = {'init': 'xavier'}
-        if b_specs is None:
-            b_specs = {'init': 'constant'}
         if 'name' not in W_specs:
             W_specs['name'] = name + '_weight'
-        if 'name' not in b_specs:
-            b_specs['name'] = name + '_bias'
         wspecs = _construct_param_specs_from_dict(W_specs)
         self.conf.param.extend([wspecs])
         self.param_specs.append(wspecs)
-        bspecs = _construct_param_specs_from_dict(b_specs)
-        self.conf.param.extend([bspecs])
-        self.param_specs.append(bspecs)
+        if use_bias:
+            if b_specs is None:
+                b_specs = {'init': 'constant'}
+            if 'name' not in b_specs:
+                b_specs['name'] = name + '_bias'
+            bspecs = _construct_param_specs_from_dict(b_specs)
+            self.conf.param.extend([bspecs])
+            self.param_specs.append(bspecs)
 
         _check_engine(engine, ['cudnn', 'singacpp', 'singacl'])
         self.layer = _create_layer(engine, 'Convolution')
@@ -610,16 +611,19 @@ class Dense(Layer):
         conf.transpose = W_transpose
         if W_specs is None:
             W_specs = {'init': 'xavier'}
-        if b_specs is None:
-            b_specs = {'init': 'constant', 'value': 0}
         if 'name' not in W_specs:
             W_specs['name'] = name + '_weight'
-        if 'name' not in b_specs:
-            b_specs['name'] = name + '_bias'
         wspecs = _construct_param_specs_from_dict(W_specs)
-        bspecs = _construct_param_specs_from_dict(b_specs)
-        self.conf.param.extend([wspecs, bspecs])
-        self.param_specs.extend([wspecs, bspecs])
+        self.conf.param.extend([wspecs])
+        self.param_specs.append(wspecs)
+        if use_bias:
+            if b_specs is None:
+                b_specs = {'init': 'constant', 'value': 0}
+            if 'name' not in b_specs:
+                b_specs['name'] = name + '_bias'
+            bspecs = _construct_param_specs_from_dict(b_specs)
+            self.conf.param.extend([bspecs])
+            self.param_specs.append(bspecs)
         # dense layer is transparent to engine.
         if engine == 'cudnn':
             self.layer = _create_layer('singacuda', 'Dense')
@@ -775,7 +779,6 @@ class Split(Layer):
         input_sample_shape: includes a single integer for the input sample
             feature size.
     '''
-
     def __init__(self, name, num_output, input_sample_shape=None):
         self.num_output = num_output
         self.in_shape = input_sample_shape

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/python/singa/net.py
----------------------------------------------------------------------
diff --git a/python/singa/net.py b/python/singa/net.py
index 027e78c..26fb61d 100644
--- a/python/singa/net.py
+++ b/python/singa/net.py
@@ -386,16 +386,16 @@ class FeedForwardNet(object):
         '''
         if use_pickle:
             params = {}
-            for (specs, val) in zip(self.param_specs(), self.param_values()):
+            for (name, val) in zip(self.param_names(), self.param_values()):
                 val.to_host()
-                params[specs.name] = tensor.to_numpy(val)
+                params[name] = tensor.to_numpy(val)
                 with open(f, 'wb') as fd:
                     pickle.dump(params, fd)
         else:
             sp = snapshot.Snapshot(f, True, buffer_size)
-            for (specs, val) in zip(self.param_specs(), self.param_values()):
+            for (name, val) in zip(self.param_names(), self.param_values()):
                 val.to_host()
-                sp.write(specs.name, val)
+                sp.write(name, val)
 
     def load(self, f, buffer_size=10, use_pickle=False):
         '''Load model parameters using io/snapshot.
@@ -407,18 +407,30 @@ class FeedForwardNet(object):
                     'then set use_pickle=False for loading it'
             with open(f, 'rb') as fd:
                 params = pickle.load(fd)
-                for (specs, val) in zip(self.param_specs(),
-                                        self.param_values()):
+                for name, val in zip(self.param_names(), self.param_values()):
+                    if name not in params:
+                        print 'Param: %s missing in the checkpoint file' % name
+                        continue
                     try:
-                        val.copy_from_numpy(params[specs.name])
+                        val.copy_from_numpy(params[name])
                     except AssertionError as err:
-                        print 'Error from copying values for param: %s' % specs.name
-                        print 'shape of param vs checkpoint', val.shape, params[specs.name].shape
+                        print 'Error from copying values for param: %s' % name
+                        print 'shape of param vs checkpoint', \
+                                val.shape, params[name].shape
                         raise err
         else:
             print 'NOTE: If your model was saved using pickle, '\
                     'then set use_pickle=True for loading it'
             sp = snapshot.Snapshot(f, False, buffer_size)
             params = sp.read()
-            for (specs, val) in zip(self.param_specs(), self.param_values()):
-                val.copy_data(params[specs.name])
+            for (name, val) in zip(self.param_names(), self.param_values()):
+                if name not in params:
+                    print 'Param: %s missing in the checkpoint file' % name
+                    continue
+                try:
+                    val.copy_data(params[name])
+                except AssertionError as err:
+                    print 'Error from copying values for param: %s' % name
+                    print 'shape of param vs checkpoint', \
+                            val.shape, params[name].shape
+                    raise err

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/src/model/layer/convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/convolution.cc b/src/model/layer/convolution.cc
index 78ec1af..8940fb2 100644
--- a/src/model/layer/convolution.cc
+++ b/src/model/layer/convolution.cc
@@ -97,7 +97,8 @@ void Convolution::Setup(const Shape &in_sample, const LayerConf &conf) {
 
   // Setup shape of weight_ and bias_
   weight_.Reshape(Shape{num_filters_, col_height_});
-  bias_.Reshape(Shape{num_filters_});
+  if (bias_term_)
+    bias_.Reshape(Shape{num_filters_});
   // Assume the order of param is: weight, bias
   for (const auto &spec : conf.param()) param_specs_.push_back(spec);
 }
@@ -143,7 +144,6 @@ const std::pair<Tensor, vector<Tensor>> Convolution::Backward(
   Tensor dx;
   Tensor db, dw;
   dx.ResetLike(src_data);
-  db.ResetLike(bias_);
   dw.ResetLike(weight_);
   dw.SetValue(0.0f);
   size_t batchsize = grad.shape(0);
@@ -156,6 +156,7 @@ const std::pair<Tensor, vector<Tensor>> Convolution::Backward(
     SumColumns(tmp1, &tmp2);
     Tensor tmp3 = Reshape(tmp2, Shape{batchsize, num_filters_});
 
+    db.ResetLike(bias_);
     SumRows(tmp3, &db);
   }
 
@@ -178,7 +179,8 @@ const std::pair<Tensor, vector<Tensor>> Convolution::Backward(
     dx.CopyDataFromHostPtr(dx_b, imagesize, b * imagesize);
   }
   param_grad.push_back(dw);
-  param_grad.push_back(db);
+  if (bias_term_)
+    param_grad.push_back(db);
   delete[] data_col;
   delete[] dx_b;
   return std::make_pair(dx, param_grad);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/src/model/layer/convolution.h
----------------------------------------------------------------------
diff --git a/src/model/layer/convolution.h b/src/model/layer/convolution.h
index 7b7fd00..89b5319 100644
--- a/src/model/layer/convolution.h
+++ b/src/model/layer/convolution.h
@@ -57,7 +57,10 @@ class Convolution : public Layer {
               const int stride_w, float* data_im);
 
   const std::vector<Tensor> param_values() override {
-    return std::vector<Tensor>{weight_, bias_};
+    if (bias_term_)
+      return std::vector<Tensor>{weight_, bias_};
+    else
+      return std::vector<Tensor>{weight_};
   }
 
   size_t kernel_w() const { return kernel_w_; }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/src/model/layer/cudnn_convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_convolution.cc b/src/model/layer/cudnn_convolution.cc
index 196d137..03ad8b9 100644
--- a/src/model/layer/cudnn_convolution.cc
+++ b/src/model/layer/cudnn_convolution.cc
@@ -60,7 +60,8 @@ void CudnnConvolution::InitCudnn(const Tensor &input) {
   size_t batchsize = input.shape(0);
   CUDNN_CHECK(cudnnCreateTensorDescriptor(&x_desc_));
   CUDNN_CHECK(cudnnCreateTensorDescriptor(&y_desc_));
-  CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc_));
+  if (bias_term_)
+    CUDNN_CHECK(cudnnCreateTensorDescriptor(&bias_desc_));
   CUDNN_CHECK(cudnnCreateFilterDescriptor(&filter_desc_));
   CUDNN_CHECK(cudnnCreateConvolutionDescriptor(&conv_desc_));
 
@@ -209,11 +210,11 @@ const std::pair<Tensor, vector<Tensor>> CudnnConvolution::Backward(
   Tensor dx;
   dx.ResetLike(src_data);
   Tensor db, dw;
-  db.ResetLike(bias_);
   dw.ResetLike(weight_);
 
   // LOG(ERROR) << "backward bias";
   if (bias_term_) {
+    db.ResetLike(bias_);
     dx.device()->Exec([grad, db, this](Context *ctx) {
       Block *dyblock = grad.block(), *dbblock = db.block();
       float alpha = 1.f, beta = 0.f;
@@ -248,7 +249,8 @@ const std::pair<Tensor, vector<Tensor>> CudnnConvolution::Backward(
                                  this->x_desc_, dxblock->mutable_data());
   }, {grad.block(), weight_.block()}, {dx.block(), workspace_.block()});
   param_grad.push_back(dw);
-  param_grad.push_back(db);
+  if (bias_term_)
+    param_grad.push_back(db);
   return std::make_pair(dx, param_grad);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/src/model/layer/dense.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.cc b/src/model/layer/dense.cc
index 64e3d86..fac9130 100644
--- a/src/model/layer/dense.cc
+++ b/src/model/layer/dense.cc
@@ -38,11 +38,13 @@ void Dense::Setup(const Shape& in_sample, const LayerConf &conf) {
   vdim_ = in_sample.at(0);
   hdim_ = dense_conf.num_output();
   transpose_ = dense_conf.transpose();
+  bias_term_ = dense_conf.bias_term();
   if (transpose_)  // was {vdim_, hdim} by zhaojing?
     weight_.Reshape(Shape{hdim_, vdim_});
   else
     weight_.Reshape(Shape{vdim_, hdim_});
-  bias_.Reshape(Shape{hdim_});
+  if (bias_term_)
+    bias_.Reshape(Shape{hdim_});
   for (auto specs: conf.param())
     param_specs_.push_back(specs);
 }
@@ -56,7 +58,8 @@ const Tensor Dense::Forward(int flag, const Tensor &input) {
     output = Mult(input, weight_.T());
   else
     output = Mult(input, weight_);
-  AddRow(bias_, &output);
+  if (bias_term_)
+    AddRow(bias_, &output);
   if (flag & kTrain)
     buf_.push(input);
   return output;
@@ -70,10 +73,12 @@ const std::pair<Tensor, vector<Tensor>> Dense::Backward(int flag,
   Tensor src_data = buf_.top();
   buf_.pop();
   Tensor db, dw, dx;
-  db.ResetLike(bias_);
   dw.ResetLike(weight_);
   dx.ResetLike(src_data);
-  SumRows(grad, &db);
+  if (bias_term_) {
+    db.ResetLike(bias_);
+    SumRows(grad, &db);
+  }
   if (transpose_) {
     dx = Mult(grad, weight_);
     dw = Mult(grad.T(), src_data);
@@ -82,7 +87,8 @@ const std::pair<Tensor, vector<Tensor>> Dense::Backward(int flag,
     dw = Mult(src_data.T(), grad);
   }
   param_grad.push_back(dw);
-  param_grad.push_back(db);
+  if (bias_term_)
+    param_grad.push_back(db);
   return std::make_pair(dx, param_grad);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c0317d18/src/model/layer/dense.h
----------------------------------------------------------------------
diff --git a/src/model/layer/dense.h b/src/model/layer/dense.h
index 8a149a5..8f53699 100644
--- a/src/model/layer/dense.h
+++ b/src/model/layer/dense.h
@@ -46,7 +46,10 @@ class Dense : public Layer {
 
   void ToDevice(std::shared_ptr<Device> device) override;
   const std::vector<Tensor> param_values() override {
-    return std::vector<Tensor>{weight_, bias_};
+    if (bias_term_)
+      return std::vector<Tensor>{weight_, bias_};
+    else
+      return std::vector<Tensor>{weight_};
   }
   size_t num_output() const { return hdim_; }
   size_t num_input() const { return vdim_; }
@@ -67,6 +70,8 @@ class Dense : public Layer {
   /// Used in auto-encoder, where the decoder would share its weight matrix from
   /// the encoder's transposed weight matrix.
   bool transpose_ = false;
+  /// use bias or not;
+  bool bias_term_ = true;
   size_t vdim_, hdim_;
   Tensor weight_, bias_;
   // Tensor data_, grad_;

[3/5] incubator-singa git commit: SINGA-300 - Add residual networks for imagenet classification

Posted by wa...@apache.org.

SINGA-300 - Add residual networks for imagenet classification

Add license header and the README file


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/026db89d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/026db89d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/026db89d

Branch: refs/heads/master
Commit: 026db89d4899b51c8bf216f08edec580305c5e15
Parents: 45ec92d
Author: Wei Wang <wa...@gmail.com>
Authored: Sun Feb 12 22:07:01 2017 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Fri Feb 17 12:59:19 2017 +0800

----------------------------------------------------------------------
 examples/imagenet/resnet/README.md  | 54 ++++++++++++++++++++++++++++++++
 examples/imagenet/resnet/convert.py | 22 +++++++++++--
 examples/imagenet/resnet/serve.py   | 16 ++++++++++
 3 files changed, 89 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/026db89d/examples/imagenet/resnet/README.md
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/README.md b/examples/imagenet/resnet/README.md
new file mode 100644
index 0000000..4d5f231
--- /dev/null
+++ b/examples/imagenet/resnet/README.md
@@ -0,0 +1,54 @@
+---
+name: Resnets on ImageNet
+SINGA version: 1.1
+SINGA commit: 45ec92d8ffc1fa1385a9307fdf07e21da939ee2f
+parameter_url: https://s3-ap-southeast-1.amazonaws.com/dlfile/resnet/resnet-18.tar.gz
+license: Apache V2, https://github.com/facebook/fb.resnet.torch/blob/master/LICENSE
+---
+
+# Image Classification using Residual Networks
+
+
+In this example, we convert Residual Networks trained on [Torch](https://github.com/facebook/fb.resnet.torch) to SINGA for image classification.
+
+## Instructions
+
+* Download one parameter checkpoint file (see below) and the synset word file of ImageNet into this folder, e.g.,
+
+        $ wget https://s3-ap-southeast-1.amazonaws.com/dlfile/resnet/resnet-18.tar.gz
+        $ wget https://s3-ap-southeast-1.amazonaws.com/dlfile/resnet/synset_words.txt
+        $ tar xvf resnet-18.tar.gz
+
+* Usage
+
+        $ python serve.py -h
+
+* Example
+
+        # use cpu
+        $ python serve.py -C --parameter_file resnet-18.pickle --model resnet --depth 18 &
+        # use gpu
+        $ python serve.py --parameter_file resnet-18.pickle --model resnet --depth 18 &
+
+  The parameter files for the following model and depth configuration pairs are provided:
+  * resnet (original resnet), [18](https://s3-ap-southeast-1.amazonaws.com/dlfile/resnet/resnet-101.tar.gz)|[34](https://s3-ap-southeast-1.amazonaws.com/dlfile/resnet/resnet-34.tar.gz)|[101](https://s3-ap-southeast-1.amazonaws.com/dlfile/resnet/resnet-101.tar.gz)|[152](https://s3-ap-southeast-1.amazonaws.com/dlfile/resnet/resnet-152.tar.gz)
+  * addbn (resnet with a batch normalization layer after the addition), [50](https://s3-ap-southeast-1.amazonaws.com/dlfile/resnet/resnet-50.tar.gz)
+  * wrn (wide resnet), [50](https://s3-ap-southeast-1.amazonaws.com/dlfile/resnet/wrn-50-2.tar.gz)
+  * preact (resnet with pre-activation) [200](https://s3-ap-southeast-1.amazonaws.com/dlfile/resnet/resnet-200.tar.gz)
+
+* Submit images for classification
+
+        $ curl -i -F image=@image1.jpg http://localhost:9999/api
+        $ curl -i -F image=@image2.jpg http://localhost:9999/api
+        $ curl -i -F image=@image3.jpg http://localhost:9999/api
+
+image1.jpg, image2.jpg and image3.jpg should be downloaded before executing the above commands.
+
+## Details
+
+The parameter files were extracted from the original [torch files](https://github.com/facebook/fb.resnet.torch/tree/master/pretrained) via
+the convert.py program.
+
+Usage:
+
+    $ python convert.py -h

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/026db89d/examples/imagenet/resnet/convert.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/convert.py b/examples/imagenet/resnet/convert.py
index e630281..6bf4101 100644
--- a/examples/imagenet/resnet/convert.py
+++ b/examples/imagenet/resnet/convert.py
@@ -1,12 +1,28 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+'''Extract the net parameters from the torch file and store them as python dict
+using cPickle'''
+
 import os
 import torchfile
 import numpy as np
 import cPickle as pickle
 from argparse import ArgumentParser
 
-'''Extract the net parameters from the torch file and store them as python dict
-using cPickle'''
-
 import model
 
 verbose=False

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/026db89d/examples/imagenet/resnet/serve.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/serve.py b/examples/imagenet/resnet/serve.py
index a6163f7..aea7b18 100644
--- a/examples/imagenet/resnet/serve.py
+++ b/examples/imagenet/resnet/serve.py
@@ -1,3 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import os
 import sys
 import time

[4/5] incubator-singa git commit: SINGA-300 - Add residual networks for imagenet classification

Posted by wa...@apache.org.

SINGA-300 - Add residual networks for imagenet classification

fix bugs for using CPU


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8ddfab5f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8ddfab5f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8ddfab5f

Branch: refs/heads/master
Commit: 8ddfab5fa394acc0fac8f14c4ae7d2576d7460c9
Parents: 026db89
Author: Wei Wang <wa...@gmail.com>
Authored: Fri Feb 17 13:26:46 2017 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Fri Feb 17 13:26:46 2017 +0800

----------------------------------------------------------------------
 examples/imagenet/resnet/README.md |  2 +-
 examples/imagenet/resnet/model.py  |  5 ++++-
 examples/imagenet/resnet/serve.py  | 15 ++++++++++-----
 3 files changed, 15 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8ddfab5f/examples/imagenet/resnet/README.md
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/README.md b/examples/imagenet/resnet/README.md
index 4d5f231..4a0f4da 100644
--- a/examples/imagenet/resnet/README.md
+++ b/examples/imagenet/resnet/README.md
@@ -26,7 +26,7 @@ In this example, we convert Residual Networks trained on [Torch](https://github.
 * Example
 
         # use cpu
-        $ python serve.py -C --parameter_file resnet-18.pickle --model resnet --depth 18 &
+        $ python serve.py --use_cpu --parameter_file resnet-18.pickle --model resnet --depth 18 &
         # use gpu
         $ python serve.py --parameter_file resnet-18.pickle --model resnet --depth 18 &
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8ddfab5f/examples/imagenet/resnet/model.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/model.py b/examples/imagenet/resnet/model.py
index 34dfd9f..bf90da3 100644
--- a/examples/imagenet/resnet/model.py
+++ b/examples/imagenet/resnet/model.py
@@ -21,6 +21,7 @@ from singa.layer import Conv2D, Activation, MaxPooling2D, AvgPooling2D,\
         Split, Merge, Flatten, Dense, BatchNormalization, Softmax
 from singa import net as ffnet
 from singa import initializer
+from singa import layer
 
 ffnet.verbose=True
 
@@ -257,7 +258,9 @@ def create_wide_resnet(depth=50):
     return net
 
 
-def create_net(name, depth):
+def create_net(name, depth, use_cpu):
+    if use_cpu:
+        layer.engine = 'singacpp'
     if name == 'resnet':
         return create_resnet(depth)
     elif name == 'wrn':

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8ddfab5f/examples/imagenet/resnet/serve.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/serve.py b/examples/imagenet/resnet/serve.py
index aea7b18..ba5adb1 100644
--- a/examples/imagenet/resnet/serve.py
+++ b/examples/imagenet/resnet/serve.py
@@ -117,8 +117,8 @@ def main():
         # Setup argument parser
         parser = ArgumentParser(description="Wide residual network")
 
-        parser.add_argument("-p", "--port", default=9999, help="listen port")
-        parser.add_argument("-c", "--use_cpu", action="store_true",
+        parser.add_argument("--port", default=9999, help="listen port")
+        parser.add_argument("--use_cpu", action="store_true",
                             help="If set, load models onto CPU devices")
         parser.add_argument("--parameter_file", default="wrn-50-2.pickle")
         parser.add_argument("--model", choices = ['resnet', 'wrn', 'preact', 'addbn'], default='wrn')
@@ -131,9 +131,14 @@ def main():
         # start to train
         agent = Agent(port)
 
-        net = model.create_net(args.model, args.depth)
-        dev = device.create_cuda_gpu()
-        net.to_device(dev)
+        net = model.create_net(args.model, args.depth, args.use_cpu)
+        if args.use_cpu:
+            print 'Using CPU'
+            dev = device.get_default_device()
+        else:
+            print 'Using GPU'
+            dev = device.create_cuda_gpu()
+            net.to_device(dev)
         model.init_params(net, args.parameter_file)
         print 'Finish loading models'

[5/5] incubator-singa git commit: SINGA-300 Add residual networks for imagenet classification

Posted by wa...@apache.org.

SINGA-300 Add residual networks for imagenet classification

Merge pull request 307 to master branch


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f2ad9338
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f2ad9338
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f2ad9338

Branch: refs/heads/master
Commit: f2ad933815a2ff5e5c78ee667c4221db3092a740
Parents: 8ddfab5 9b2d161
Author: WANG Sheng <wa...@gmail.com>
Authored: Fri Feb 17 14:22:37 2017 +0800
Committer: WANG Sheng <wa...@gmail.com>
Committed: Fri Feb 17 14:22:37 2017 +0800

----------------------------------------------------------------------
 .travis.yml                            |  2 +-
 doc/Doxyfile                           |  4 +--
 doc/README.md                          | 10 ++++--
 doc/_templates/layout.html             | 30 +++++++++---------
 doc/conf.py                            |  2 +-
 doc/en/downloads.md                    | 29 +++++++++++++++--
 doc/en/index.rst                       |  8 +++--
 doc/en/releases/RELEASE_NOTES_1.1.0.md | 49 +++++++++++++++++++++++++++++
 8 files changed, 106 insertions(+), 28 deletions(-)
----------------------------------------------------------------------

[2/5] incubator-singa git commit: SINGA-300 - Add residual networks for imagenet classification

Posted by wa...@apache.org.

SINGA-300 - Add residual networks for imagenet classification

udpate convert.py for general resnet models and pre-activation resnet

tested all models for serving; upload pickle files to s3


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/45ec92d8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/45ec92d8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/45ec92d8

Branch: refs/heads/master
Commit: 45ec92d8ffc1fa1385a9307fdf07e21da939ee2f
Parents: c0317d1
Author: Wei Wang <wa...@gmail.com>
Authored: Sat Feb 11 18:39:20 2017 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Sun Feb 12 22:01:19 2017 +0800

----------------------------------------------------------------------
 examples/imagenet/resnet/convert.py | 136 +++++++++++++-------
 examples/imagenet/resnet/model.py   | 214 +++++++++++++++++++++++++------
 examples/imagenet/resnet/serve.py   |   5 +-
 3 files changed, 271 insertions(+), 84 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/45ec92d8/examples/imagenet/resnet/convert.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/convert.py b/examples/imagenet/resnet/convert.py
index c976bf5..e630281 100644
--- a/examples/imagenet/resnet/convert.py
+++ b/examples/imagenet/resnet/convert.py
@@ -1,48 +1,98 @@
+import os
 import torchfile
 import numpy as np
 import cPickle as pickle
+from argparse import ArgumentParser
 
-def conv(m, name, params):
+'''Extract the net parameters from the torch file and store them as python dict
+using cPickle'''
+
+import model
+
+verbose=False
+
+def add_param(idx, name, val, params):
+    if type(params) == dict:
+        assert name not in params, 'duplicated param %s' % name
+        params[name] = val
+    else:
+        assert params[idx].size() == val.size, 'size mismatch for %s: %s - %s' % (name, (params[idx].shape,), (val.shape,))
+        params[idx].copy_from_numpy(val)
+
+    if verbose:
+        print name, val.shape
+
+
+def conv(m, idx, params, param_names):
     outplane = m['weight'].shape[0]
-    params[name + '-conv_weight'] = np.reshape(m['weight'], (outplane, -1))
-    return params
-
-def batchnorm(m, name, params):
-    params[name + '-bn_gamma'] = m['weight']
-    params[name + '-bn_beta'] = m['bias']
-    params[name + '-bn_mean'] = m['running_mean']
-    params[name + '-bn_var'] = m['running_var']
-    return params
-
-def block(m, name, params, has_identity):
-    branch=m[0].modules[0].modules
-    params = conv(branch[0], name + '-1', params)
-    params = batchnorm(branch[1], name + '-1', params)
-    params = conv(branch[3], name + '-2', params)
-    params = batchnorm(branch[4], name + '-2', params)
-    params = conv(branch[6], name + '-3', params)
-    params = batchnorm(branch[7], name + '-3', params)
-    if not has_identity:
-        shortcut = m[0].modules[1].modules
-        params = conv(shortcut[0], name + '-shortcut', params)
-        params = batchnorm(shortcut[1], name + '-shortcut', params)
-    return params
-
-def stage(sid, m, num_blk, params):
-    for i in range(num_blk):
-        params = block(m[i].modules, 'stage%d-blk%d' % (sid, i), params, i!=0)
-    return params
-
-params = {}
-model = torchfile.load('wrn-50-2.t7').modules
-params = conv(model[0], 'input', params)
-params = batchnorm(model[1], 'input', params)
-params = stage(0, model[4].modules, 3, params)
-params = stage(1, model[5].modules, 4, params)
-params = stage(2, model[6].modules, 6, params)
-params = stage(3, model[7].modules, 3, params)
-
-params['dense_weight'] = np.transpose(model[10]['weight'])
-params['dense_bias'] = model[10]['bias']
-with open('wrn-50-2.pickle', 'wb') as fd:
-    pickle.dump(params, fd)
+    name = param_names[idx]
+    val = np.reshape(m['weight'], (outplane, -1))
+    add_param(idx, name, val, params)
+    return idx + 1
+
+
+def batchnorm(m, idx, params, param_names):
+    add_param(idx, param_names[idx], m['weight'], params)
+    add_param(idx + 1, param_names[idx + 1], m['bias'], params)
+    add_param(idx + 2, param_names[idx + 2], m['running_mean'], params)
+    add_param(idx + 3, param_names[idx + 3], m['running_var'], params)
+    return idx + 4
+
+
+def linear(m, idx, params, param_names):
+    add_param(idx, param_names[idx], np.transpose(m['weight']), params)
+    add_param(idx + 1, param_names[idx + 1], m['bias'], params)
+    return idx + 2
+
+
+def traverse(m, idx, params, param_names):
+    ''' Traverse all modules of the torch checkpoint file to extract params.
+
+    Args:
+        m, a TorchObject
+        idx, index for the current cursor of param_names
+        params, an empty dictionary (name->numpy) to dump the params via pickle;
+            or a list of tensor objects which should be in the same order as
+            param_names, called to initialize net created in Singa directly
+            using param values from torch checkpoint file.
+
+    Returns:
+        the updated idx
+    '''
+    module_type = m.__dict__['_typename']
+    if module_type in ['nn.Sequential', 'nn.ConcatTable'] :
+        for x in m.modules:
+            idx = traverse(x, idx, params, param_names)
+    elif 'SpatialConvolution' in module_type:
+        idx = conv(m, idx, params, param_names)
+    elif 'SpatialBatchNormalization' in module_type:
+        idx = batchnorm(m, idx, params, param_names)
+    elif 'Linear' in module_type:
+        idx = linear(m, idx, params, param_names)
+    return idx
+
+
+if __name__ == '__main__':
+    parser = ArgumentParser(description='Convert params from torch to python '
+            'dict. \n resnet could have depth of 18, 34, 101, 152; \n
+            wrn has depth 50; preact has depth 200; addbn has depth 50')
+    parser.add_argument("infile", help="torch checkpoint file")
+    parser.add_argument("model", choices = ['resnet', 'wrn', 'preact', 'addbn'])
+    parser.add_argument("depth", type=int, choices = [18, 34, 50, 101, 152, 200])
+    args = parser.parse_args()
+
+    net = model.create_net(args.model, args.depth)
+    # model.init_params(net)
+    m = torchfile.load(args.infile)
+    params = {}
+    # params = net.param_values()
+    param_names = net.param_names()
+    traverse(m, 0, params, param_names)
+    miss = [name for name in param_names if name not in params]
+    if len(miss) > 0:
+        print 'The following params are missing from torch file'
+        print miss
+
+    outfile = os.path.splitext(args.infile)[0] + '.pickle'
+    with open(outfile, 'wb') as fd:
+        pickle.dump(params, fd)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/45ec92d8/examples/imagenet/resnet/model.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/model.py b/examples/imagenet/resnet/model.py
index 7c9a3cf..34dfd9f 100644
--- a/examples/imagenet/resnet/model.py
+++ b/examples/imagenet/resnet/model.py
@@ -14,7 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-''' This model is created following https://github.com/facebook/fb.resnet.torch.git
+''' This models are created following https://github.com/facebook/fb.resnet.torch.git
+and https://github.com/szagoruyko/wide-residual-networks
 '''
 from singa.layer import Conv2D, Activation, MaxPooling2D, AvgPooling2D,\
         Split, Merge, Flatten, Dense, BatchNormalization, Softmax
@@ -26,6 +27,17 @@ ffnet.verbose=True
 conv_bias = False
 
 def conv(net, prefix, n, ksize, stride=1, pad=0, bn=True, relu=True, src=None):
+    '''Add a convolution layer and optionally a batchnorm and relu layer.
+
+    Args:
+        prefix, a string for the prefix of the layer name
+        n, num of filters for the conv layer
+        bn, if true add batchnorm
+        relu, if true add relu
+
+    Returns:
+        the last added layer
+    '''
     ret = net.add(Conv2D(
         prefix + '-conv', n, ksize, stride, pad=pad, use_bias=conv_bias), src)
     if bn:
@@ -35,40 +47,95 @@ def conv(net, prefix, n, ksize, stride=1, pad=0, bn=True, relu=True, src=None):
     return ret
 
 
-def shortcut(net, prefix, inplane, outplane, stride, src):
+def shortcut(net, prefix, inplane, outplane, stride, src, bn=False):
+    '''Add a conv shortcut layer if inplane != outplane; or return the source
+    layer directly.
+
+    Args:
+        prefix, a string for the prefix of the layer name
+        bn, if true add a batchnorm layer after the conv layer
+
+    Returns:
+        return the last added layer or the source layer.
+    '''
     if inplane == outplane:
         return src
-    return conv(net, prefix + '-shortcut', outplane, 1, stride, 0, True, False, src)
+    return conv(net, prefix + '-shortcut', outplane, 1, stride, 0, bn, False, src)
+
+
+def bottleneck(name, net, inplane, midplane, outplane, stride=1, preact=False, add_bn=False):
+    '''Add three conv layers, with a>=b<=c filters.
 
+    The default structure is
+    input
+         -split - conv1-bn1-relu1-conv2-bn2-relu2-conv3-bn3
+                - conv-bn or dummy
+         -add
+         -relu
 
-def bottleneck(name, net, inplane, midplane, outplane, stride=1, preact=False):
+    Args:
+        inplane, num of feature maps of the input
+        midplane, num of featue maps of the middle layer
+        outplane, num of feature maps of the output
+        preact, if true, move the bn3 and relu before conv1, i.e., pre-activation ref identity mapping paper
+        add_bn, if true, move the last bn after the addition layer (for resnet-50)
+    '''
+    assert not (preact and add_bn), 'preact and batchnorm after addition cannot be true at the same time'
     split = net.add(Split(name + '-split', 2))
-    conv(net, name + '-1', midplane, 1, 1, 0, True, True, src=split)
-    conv(net, name + '-2', midplane, 3, stride, 1, True, True)
-    br0 = conv(net, name + '-3', outplane, 1, 1, 0, True, False)
-    br1 = shortcut(net, name, inplane, outplane, stride, split)
-    net.add(Merge(name + '-add'), [br0, br1])
-    return net.add(Activation(name + '-relu'))
-
-def basicblock(name, net, inplane, midplane, outplane, stride=1, preact=False):
-    assert midplane==outplane, 'midplan and outplane should be the same'
+    if preact:
+        net.add(BatchNormalization(name + '-preact-bn'))
+        net.add(Activation(name + '-preact-relu'))
+    conv(net, name + '-0', midplane, 1, 1, 0, True, True)
+    conv(net, name + '-1', midplane, 3, stride, 1, True, True)
+    br0 = conv(net, name + '-2', outplane, 1, 1, 0, not (preact or add_bn), False)
+    br1 = shortcut(net, name, inplane, outplane, stride, split, not add_bn)
+    ret = net.add(Merge(name + '-add'), [br0, br1])
+    if add_bn:
+        ret = net.add(BatchNormalization(name + '-add-bn'))
+    if not preact:
+        ret = net.add(Activation(name + '-add-relu'))
+    return ret
+
+
+def basicblock(name, net, inplane, midplane, outplane, stride=1, preact=False, add_bn=False):
+    '''Add two conv layers, with a<=b filters.
+
+    The default structure is
+    input
+         -split - conv1-bn1-relu1-conv2-bn2
+                - conv or dummy
+         -add
+         -relu
+
+    Args:
+        inplane, num of feature maps of the input
+        midplane, num of featue maps of the middle layer
+        outplane, num of feature maps of the output
+        preact, if true, move the bn2 and relu before conv1, i.e., pre-activation ref identity mapping paper
+        add_bn, if true, move the last bn after the addition layer (for resnet-50)
+    '''
+    assert not (preact and add_bn), 'preact and batchnorm after addition cannot be true at the same time'
     split = net.add(Split(name + '-split', 2))
     if preact:
-        net.add(BatchNormalization(name + '-preact-bn'), split)
+        net.add(BatchNormalization(name + '-preact-bn'))
         net.add(Activation(name + '-preact-relu'))
-    conv(net, name + '-1', outplane, 3, stride, 1, True, True, split)
-    br0 = conv(net, name + '-2', outplane, 3, 1, 1, True, False)
-    br1 = shortcut(net, name, inplane, outplane, stride, split)
-    net.add(Merge(name + '-add'), [br0, br1])
-    return net.add(Activation(name + '-add-relu'))
+    conv(net, name + '-0', midplane, 3, stride, 1, True, True)
+    br0 = conv(net, name + '-1', outplane, 3, 1, 1, not preact, False)
+    br1 = shortcut(net, name, inplane, outplane, stride, split, False)
+    ret = net.add(Merge(name + '-add'), [br0, br1])
+    if add_bn:
+        ret = net.add(BatchNormalization(name + '-add-bn'))
+    if not preact:
+        ret = net.add(Activation(name + '-add-relu'))
+    return ret
 
 
-def stage(sid, net, num_blk, inplane, midplane, outplane, stride, block):
-    block('stage%d-blk%d' % (sid, 0), net, inplane, midplane, outplane, stride)
+def stage(sid, net, num_blk, inplane, midplane, outplane, stride, block, preact=False, add_bn=False):
+    block('stage%d-blk%d' % (sid, 0), net, inplane, midplane, outplane, stride, preact, add_bn)
     for i in range(1, num_blk):
-        block('stage%d-blk%d' % (sid, i), net, outplane, midplane, outplane)
+        block('stage%d-blk%d' % (sid, i), net, outplane, midplane, outplane, 1, preact, add_bn)
 
-def init_params(net, weight_path):
+def init_params(net, weight_path=None):
     if weight_path == None:
         for pname, pval in zip(net.param_names(), net.param_values()):
             print pname, pval.shape
@@ -89,32 +156,90 @@ def init_params(net, weight_path):
     else:
         net.load(weight_path, use_pickle = 'pickle' in weight_path)
 
-def create_resnet(weight_path=None, depth=50):
-    cfg = {
-            50: ([3, 4, 6, 3], bottleneck),
-            101: ([3, 4, 23, 3], bottleneck),
-            152: ([3, 8, 36, 3], bottleneck),
-            }
+
+cfg = { 18: [2, 2, 2, 2],  # basicblock
+        34: [3, 4, 6, 3],  # basicblock
+        50: [3, 4, 6, 3],  # bottleneck
+        101: [3, 4, 23, 3], # bottleneck
+        152: [3, 8, 36, 3], # bottleneck
+        200: [3, 24, 36, 3]} # bottleneck
+
+
+def create_addbn_resnet(depth=50):
+    '''Original resnet with the last batchnorm of each block moved to after the addition layer'''
     net = ffnet.FeedForwardNet()
-    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, input_sample_shape=(3, 224, 224)))
+    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, use_bias=False, input_sample_shape=(3, 224, 224)))
     net.add(BatchNormalization('input-bn'))
     net.add(Activation('input_relu'))
     net.add(MaxPooling2D('input_pool', 3, 2, pad=1))
+    conf = cfg[depth]
+    if depth > 34:
+        stage(0, net, conf[0], 64, 64, 256, 1, bottleneck, add_bn=True)
+        stage(1, net, conf[1], 256, 128, 512, 2, bottleneck, add_bn=True)
+        stage(2, net, conf[2], 512, 256, 1024, 2, bottleneck, add_bn=True)
+        stage(3, net, conf[3], 1024, 512, 2048, 2, bottleneck, add_bn=True)
+    else:
+        stage(0, net, conf[0], 64, 64, 64, 1, basicblock, add_bn=True)
+        stage(1, net, conf[1], 64, 128, 128, 2, basicblock, add_bn=True)
+        stage(2, net, conf[2], 128, 256, 256, 2, basicblock, add_bn=True)
+        stage(3, net, conf[3], 256, 512, 512, 2, basicblock, add_bn=True)
+    net.add(AvgPooling2D('avg', 7, 1, pad=0))
+    net.add(Flatten('flat'))
+    net.add(Dense('dense', 1000))
+    return net
+
 
+def create_resnet(depth=18):
+    '''Original resnet, where the there is a relue after the addition layer'''
+    net = ffnet.FeedForwardNet()
+    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, use_bias=False, input_sample_shape=(3, 224, 224)))
+    net.add(BatchNormalization('input-bn'))
+    net.add(Activation('input_relu'))
+    net.add(MaxPooling2D('input_pool', 3, 2, pad=1))
     conf = cfg[depth]
-    stage(0, net, conf[0][0], 64, 64, 256, 1, conf[1])
-    stage(1, net, conf[0][1], 256, 128, 512, 2, conf[1])
-    stage(2, net, conf[0][2], 512, 256, 1024, 2, conf[1])
-    stage(3, net, conf[0][3], 1024, 512, 2048, 2, conf[1])
-    net.add(AvgPooling2D('avg', 7, 1))
+    if depth > 34:
+        stage(0, net, conf[0], 64, 64, 256, 1, bottleneck)
+        stage(1, net, conf[1], 256, 128, 512, 2, bottleneck)
+        stage(2, net, conf[2], 512, 256, 1024, 2, bottleneck)
+        stage(3, net, conf[3], 1024, 512, 2048, 2, bottleneck)
+    else:
+        stage(0, net, conf[0], 64, 64, 64, 1, basicblock)
+        stage(1, net, conf[1], 64, 128, 128, 2, basicblock)
+        stage(2, net, conf[2], 128, 256, 256, 2, basicblock)
+        stage(3, net, conf[3], 256, 512, 512, 2, basicblock)
+    net.add(AvgPooling2D('avg', 7, 1, pad=0))
     net.add(Flatten('flat'))
     net.add(Dense('dense', 1000))
+    return net
 
-    init_params(net, weight_path)
+def create_preact_resnet(depth=200):
+    '''Resnet with the batchnorm and relu moved to before the conv layer for each block'''
+    net = ffnet.FeedForwardNet()
+    net.add(Conv2D('input-conv', 64, 7, 2, pad=3, use_bias=False, input_sample_shape=(3, 224, 224)))
+    net.add(BatchNormalization('input-bn'))
+    net.add(Activation('input_relu'))
+    net.add(MaxPooling2D('input_pool', 3, 2, pad=1))
+    conf = cfg[depth]
+    if depth > 34:
+        stage(0, net, conf[0], 64, 64, 256, 1, bottleneck, preact=True)
+        stage(1, net, conf[1], 256, 128, 512, 2, bottleneck, preact=True)
+        stage(2, net, conf[2], 512, 256, 1024, 2, bottleneck, preact=True)
+        stage(3, net, conf[3], 1024, 512, 2048, 2, bottleneck, preact=True)
+    else:
+        stage(0, net, conf[0], 64, 64, 64, 1, basicblock, preact=True)
+        stage(1, net, conf[1], 64, 128, 128, 2, basicblock, preact=True)
+        stage(2, net, conf[2], 128, 256, 256, 2, basicblock, preact=True)
+        stage(3, net, conf[3], 256, 512, 512, 2, basicblock, preact=True)
+    net.add(BatchNormalization('final-bn'))
+    net.add(Activation('final-relu'))
+    net.add(AvgPooling2D('avg', 7, 1, pad=0))
+    net.add(Flatten('flat'))
+    net.add(Dense('dense', 1000))
     return net
 
 
-def create_wide_resnet(weight_path=None):
+def create_wide_resnet(depth=50):
+    '''Similar original resnet except that a<=b<=c for the bottleneck block'''
     net = ffnet.FeedForwardNet()
     net.add(Conv2D('input-conv', 64, 7, 2, pad=3, use_bias=False, input_sample_shape=(3, 224, 224)))
     net.add(BatchNormalization('input-bn'))
@@ -129,10 +254,19 @@ def create_wide_resnet(weight_path=None):
     net.add(AvgPooling2D('avg_pool', 7, 1, pad=0))
     net.add(Flatten('flag'))
     net.add(Dense('dense', 1000))
-
-    init_params(net, weight_path)
     return net
 
 
+def create_net(name, depth):
+    if name == 'resnet':
+        return create_resnet(depth)
+    elif name == 'wrn':
+        return create_wide_resnet(depth)
+    elif name == 'preact':
+        return create_preact_resnet(depth)
+    elif name == 'addbn':
+        return create_addbn_resnet(depth)
+
+
 if __name__ == '__main__':
-    create_net('wrn-50-2.pickle')
+    create_net('wrn', 50)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/45ec92d8/examples/imagenet/resnet/serve.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/serve.py b/examples/imagenet/resnet/serve.py
index d9609ce..a6163f7 100644
--- a/examples/imagenet/resnet/serve.py
+++ b/examples/imagenet/resnet/serve.py
@@ -105,6 +105,8 @@ def main():
         parser.add_argument("-c", "--use_cpu", action="store_true",
                             help="If set, load models onto CPU devices")
         parser.add_argument("--parameter_file", default="wrn-50-2.pickle")
+        parser.add_argument("--model", choices = ['resnet', 'wrn', 'preact', 'addbn'], default='wrn')
+        parser.add_argument("--depth", type=int, choices = [18, 34, 50, 101, 152, 200], default='50')
 
         # Process arguments
         args = parser.parse_args()
@@ -113,9 +115,10 @@ def main():
         # start to train
         agent = Agent(port)
 
-        net = model.create_wide_resnet(args.parameter_file)
+        net = model.create_net(args.model, args.depth)
         dev = device.create_cuda_gpu()
         net.to_device(dev)
+        model.init_params(net, args.parameter_file)
         print 'Finish loading models'
 
         labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')