You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2017/08/04 08:32:46 UTC
[02/15] incubator-singa git commit: SINGA-290 Upgrade to Python 3

SINGA-290 Upgrade to Python 3


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/c94b3dfd
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/c94b3dfd
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/c94b3dfd

Branch: refs/heads/master
Commit: c94b3dfd273994ecd17d361558b8e019c1cf3ed3
Parents: 14f0d8c
Author: Moaz Reyad <mo...@gmail.com>
Authored: Wed May 31 13:34:26 2017 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Aug 3 18:15:59 2017 +0800

----------------------------------------------------------------------
 cmake/Dependencies.cmake             |  3 +-
 doc/en/docs/notebook/utils.py        |  6 +++-
 examples/caffe/predict.py            |  9 ++++--
 examples/char-rnn/sample.py          | 12 ++++++--
 examples/char-rnn/train.py           | 46 ++++++++++++++++++-------------
 examples/cifar10/alexnet.py          |  4 ++-
 examples/cifar10/caffe/caffe_net.py  | 12 +++++---
 examples/cifar10/download_data.py    | 19 +++++++------
 examples/cifar10/predict.py          | 10 +++++--
 examples/cifar10/resnet.py           |  8 ++++--
 examples/cifar10/train.py            | 40 ++++++++++++++++-----------
 examples/cifar10/vgg.py              |  8 ++++--
 examples/imagenet/googlenet/serve.py | 15 ++++++----
 examples/imagenet/resnet/convert.py  |  3 +-
 examples/imagenet/resnet/model.py    |  5 +++-
 examples/imagenet/resnet/serve.py    | 25 ++++++++++-------
 examples/mnist/train.py              | 32 ++++++++++++---------
 python/CMakeLists.txt                |  9 +++++-
 python/rafiki/agent.py               | 10 ++++---
 python/singa/layer.py                |  9 +++---
 python/singa/loss.py                 |  2 +-
 python/singa/optimizer.py            | 19 ++++++-------
 python/singa/snapshot.py             |  4 +--
 python/singa/tensor.py               | 17 +++++++++++-
 src/api/model_layer.i                |  1 +
 src/api/model_optimizer.i            |  1 +
 test/python/test_layer.py            |  1 +
 test/python/test_loss.py             |  4 ++-
 test/python/test_metric.py           |  8 ++++--
 test/python/test_net.py              |  5 +++-
 test/python/test_optimizer.py        |  8 ++++--
 test/python/test_tensor.py           |  4 ++-
 tool/debian/postinst                 |  2 +-
 tool/opencl/clsrc_to_str.py          |  2 +-
 34 files changed, 233 insertions(+), 130 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/cmake/Dependencies.cmake
----------------------------------------------------------------------
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index aa3c090..180732b 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -127,11 +127,12 @@ IF(USE_PYTHON)
     IF(PYTHON2)
         FIND_PACKAGE(PythonLibs 2.7 REQUIRED)
         FIND_PACKAGE(PythonInterp 2.7 REQUIRED)
+	FIND_PACKAGE(SWIG 3.0.10 REQUIRED)
     ELSE()
         FIND_PACKAGE(PythonLibs 3.0 REQUIRED)
         FIND_PACKAGE(PythonInterp 3.0 REQUIRED)
+	FIND_PACKAGE(SWIG 3.0.10 REQUIRED)
     ENDIF()
-    FIND_PACKAGE(SWIG 3.0 REQUIRED)
 ENDIF()
 
 IF(USE_JAVA)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/doc/en/docs/notebook/utils.py
----------------------------------------------------------------------
diff --git a/doc/en/docs/notebook/utils.py b/doc/en/docs/notebook/utils.py
index ff772ad..3af9ec5 100755
--- a/doc/en/docs/notebook/utils.py
+++ b/doc/en/docs/notebook/utils.py
@@ -5,6 +5,10 @@ processing the outputs into a more understandable way.
 For example ``tile_raster_images`` helps in generating a easy to grasp
 image from a set of samples or weights.
 """
+from __future__ import division
+from builtins import zip
+from builtins import range
+from past.utils import old_div
 
 import numpy
 
@@ -13,7 +17,7 @@ def scale_to_unit_interval(ndar, eps=1e-8):
     """ Scales all values in the ndarray ndar to be between 0 and 1 """
     ndar = ndar.copy()
     ndar -= ndar.min()
-    ndar *= 1.0 / (ndar.max() + eps)
+    ndar *= old_div(1.0, (ndar.max() + eps))
     return ndar
 
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/caffe/predict.py
----------------------------------------------------------------------
diff --git a/examples/caffe/predict.py b/examples/caffe/predict.py
index 663cd87..62e6a86 100644
--- a/examples/caffe/predict.py
+++ b/examples/caffe/predict.py
@@ -1,3 +1,6 @@
+from __future__ import print_function
+from builtins import input
+from builtins import range
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -69,11 +72,11 @@ def predict(net, dev, synset_list, topk=5):
         topk, return the topk labels for each image.
     '''
     while True:
-        img_path = raw_input("Enter input image path('quit' to exit): ")
+        img_path = eval(input("Enter input image path('quit' to exit): "))
         if img_path == 'quit':
             return
         if not os.path.exists(img_path):
-            print 'Path is invalid'
+            print('Path is invalid')
             continue
         img = read_image(img_path)
         x = tensor.from_numpy(img.astype(np.float32)[np.newaxis,:])
@@ -82,7 +85,7 @@ def predict(net, dev, synset_list, topk=5):
         y.to_host()
         prob = tensor.to_numpy(y)
         lbl = np.argsort(-prob[0])  # sort prob in descending order
-        print [synset_list[lbl[i]] for i in range(topk)]
+        print([synset_list[lbl[i]] for i in range(topk)])
 
 
 if __name__ == '__main__':

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/char-rnn/sample.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/sample.py b/examples/char-rnn/sample.py
index 9b6e757..5b0b66a 100644
--- a/examples/char-rnn/sample.py
+++ b/examples/char-rnn/sample.py
@@ -15,8 +15,14 @@
 # limitations under the License.
 # =============================================================================
 '''Sample characters from the pre-trained model'''
+from __future__ import division
+from __future__ import print_function
+from future import standard_library
+standard_library.install_aliases()
+from builtins import range
+from past.utils import old_div
 import sys
-import cPickle as pickle
+import pickle as pickle
 import numpy as np
 import argparse
 
@@ -69,7 +75,7 @@ def sample(model_path, nsamples=100, seed_text='', do_sample=True):
         sys.stdout.write(seed_text)
     else:
         y = tensor.Tensor((1, vocab_size), cuda)
-        y.set_value(1.0 / vocab_size)
+        y.set_value(old_div(1.0, vocab_size))
 
     for i in range(nsamples):
         y.to_host()
@@ -89,7 +95,7 @@ def sample(model_path, nsamples=100, seed_text='', do_sample=True):
         y = tensor.softmax(y)
         hx = outputs[1]
         cx = outputs[2]
-    print ''
+    print('')
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser(description='sample chars from char-rnn')

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/char-rnn/train.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py
index d28646e..0eeeb35 100644
--- a/examples/char-rnn/train.py
+++ b/examples/char-rnn/train.py
@@ -19,7 +19,15 @@ The model is created following https://github.com/karpathy/char-rnn
 The train file could be any text file,
 e.g., http://cs.stanford.edu/people/karpathy/char-rnn/
 '''
-import cPickle as pickle
+from __future__ import division
+from __future__ import print_function
+from future import standard_library
+standard_library.install_aliases()
+from builtins import zip
+from builtins import range
+from builtins import object
+from past.utils import old_div
+import pickle as pickle
 import numpy as np
 import argparse
 
@@ -51,18 +59,18 @@ class Data(object):
         self.idx_to_char = {i: ch for i, ch in enumerate(chars)}
         data = [self.char_to_idx[c] for c in self.raw_data]
         # seq_length + 1 for the data + label
-        nsamples = len(data) / (1 + seq_length)
+        nsamples = old_div(len(data), (1 + seq_length))
         data = data[0:nsamples * (1 + seq_length)]
         data = np.asarray(data, dtype=np.int32)
         data = np.reshape(data, (-1, seq_length + 1))
         # shuffle all sequences
         np.random.shuffle(data)
         self.train_dat = data[0:int(data.shape[0]*train_ratio)]
-        self.num_train_batch = self.train_dat.shape[0] / batch_size
+        self.num_train_batch = old_div(self.train_dat.shape[0], batch_size)
         self.val_dat = data[self.train_dat.shape[0]:]
-        self.num_test_batch = self.val_dat.shape[0] / batch_size
-        print 'train dat', self.train_dat.shape
-        print 'val dat', self.val_dat.shape
+        self.num_test_batch = old_div(self.val_dat.shape[0], batch_size)
+        print('train dat', self.train_dat.shape)
+        print('val dat', self.val_dat.shape)
 
 
 def numpy2tensors(npx, npy, dev):
@@ -94,7 +102,7 @@ def convert(batch, batch_size, seq_length, vocab_size, dev):
 
 
 def get_lr(epoch):
-    return 0.001 / float(1 << (epoch / 50))
+    return old_div(0.001, float(1 << (old_div(epoch, 50))))
 
 
 def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
@@ -111,10 +119,10 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
             data.vocab_size,
         ))
     rnn.to_device(cuda)
-    print 'created rnn'
+    print('created rnn')
     rnn_w = rnn.param_values()[0]
     rnn_w.uniform(-0.08, 0.08)  # init all rnn parameters
-    print 'rnn weight l1 = %f' % (rnn_w.l1())
+    print('rnn weight l1 = %f' % (rnn_w.l1()))
     dense = layer.Dense(
         'dense',
         data.vocab_size,
@@ -124,12 +132,12 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
     dense.to_device(cuda)
     dense_w = dense.param_values()[0]
     dense_b = dense.param_values()[1]
-    print 'dense w ', dense_w.shape
-    print 'dense b ', dense_b.shape
+    print('dense w ', dense_w.shape)
+    print('dense b ', dense_b.shape)
     initializer.uniform(dense_w, dense_w.shape[0], 0)
-    print 'dense weight l1 = %f' % (dense_w.l1())
+    print('dense weight l1 = %f' % (dense_w.l1()))
     dense_b.set_value(0)
-    print 'dense b l1 = %f' % (dense_b.l1())
+    print('dense b l1 = %f' % (dense_b.l1()))
 
     g_dense_w = tensor.Tensor(dense_w.shape, cuda)
     g_dense_b = tensor.Tensor(dense_b.shape, cuda)
@@ -162,7 +170,7 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
                 # print output.l1(), act.l1()
             utils.update_progress(
                 b * 1.0 / data.num_train_batch, 'training loss = %f' %
-                (batch_loss / seq_length))
+                (old_div(batch_loss, seq_length)))
             train_loss += batch_loss
 
             grads.append(tensor.Tensor())
@@ -176,8 +184,8 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
             opt.apply_with_lr(
                 epoch, get_lr(epoch),
                 g_dense_b, dense_b, 'dense_b')
-        print '\nEpoch %d, train loss is %f' % \
-            (epoch, train_loss / data.num_train_batch / seq_length)
+        print('\nEpoch %d, train loss is %f' % \
+            (epoch, train_loss / data.num_train_batch / seq_length))
 
         eval_loss = 0
         for b in range(data.num_test_batch):
@@ -191,13 +199,13 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16,
                 output = dense.forward(model_pb2.kEval, output)
                 eval_loss += lossfun.forward(model_pb2.kEval,
                                              output, label).l1()
-        print 'Epoch %d, evaluation loss is %f' % \
-            (epoch, eval_loss / data.num_test_batch / seq_length)
+        print('Epoch %d, evaluation loss is %f' % \
+            (epoch, eval_loss / data.num_test_batch / seq_length))
 
         if (epoch + 1) % 30 == 0:
             # checkpoint the file model
             with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:
-                print 'saving model to %s' % model_path
+                print('saving model to %s' % model_path)
                 d = {}
                 for name, w in zip(
                         ['rnn_w', 'dense_w', 'dense_b'],

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index 02437b3..b056e70 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -19,6 +19,8 @@ https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers
 Following the same setting for hyper-parameters and data pre-processing, the final
 validation accuracy would be about 82%.
 '''
+from __future__ import print_function
+from builtins import zip
 
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
@@ -56,6 +58,6 @@ def create_net(use_cpu=False):
             p.gaussian(filler.mean, filler.std)
         else:
             p.set_value(0)
-        print specs.name, filler.type, p.l1()
+        print(specs.name, filler.type, p.l1())
 
     return net

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/cifar10/caffe/caffe_net.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/caffe/caffe_net.py b/examples/cifar10/caffe/caffe_net.py
index 2db131a..dd1eb7d 100644
--- a/examples/cifar10/caffe/caffe_net.py
+++ b/examples/cifar10/caffe/caffe_net.py
@@ -1,3 +1,7 @@
+from __future__ import print_function
+from future import standard_library
+standard_library.install_aliases()
+from builtins import zip
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -14,13 +18,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-import urllib
+import urllib.request, urllib.parse, urllib.error
 from singa import converter
 
 
 def create_net(use_cpu):
-    urllib.urlretrieve("https://raw.githubusercontent.com/BVLC/caffe/master/examples/cifar10/cifar10_full_train_test.prototxt", "train_test.prototxt")
-    urllib.urlretrieve("https://raw.githubusercontent.com/BVLC/caffe/master/examples/cifar10/cifar10_full_solver.prototxt", "solver.prototxt")
+    urllib.request.urlretrieve("https://raw.githubusercontent.com/BVLC/caffe/master/examples/cifar10/cifar10_full_train_test.prototxt", "train_test.prototxt")
+    urllib.request.urlretrieve("https://raw.githubusercontent.com/BVLC/caffe/master/examples/cifar10/cifar10_full_solver.prototxt", "solver.prototxt")
     input_sample_shape = [3, 32, 32, ]
 
     cvt = converter.CaffeConverter("train_test.prototxt", "solver.prototxt",
@@ -32,6 +36,6 @@ def create_net(use_cpu):
             p.gaussian(filler.mean, filler.std)
         else:
             p.set_value(0)
-        print specs.name, filler.type, p.l1()
+        print(specs.name, filler.type, p.l1())
 
     return net

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/cifar10/download_data.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/download_data.py b/examples/cifar10/download_data.py
index 7129b03..a0b73c5 100755
--- a/examples/cifar10/download_data.py
+++ b/examples/cifar10/download_data.py
@@ -17,7 +17,10 @@
 # limitations under the License.
 # 
 
-import urllib
+from __future__ import print_function
+from future import standard_library
+standard_library.install_aliases()
+import urllib.request, urllib.parse, urllib.error
 import tarfile
 import os
 import sys
@@ -26,17 +29,17 @@ import argparse
 
 def extract_tarfile(filepath):
     if os.path.exists(filepath):
-        print 'The tar file does exist. Extracting it now..'
+        print('The tar file does exist. Extracting it now..')
         with tarfile.open(filepath, 'r') as f:
             f.extractall('.')
-        print 'Finished!'
+        print('Finished!')
         sys.exit(0)
 
 
 def check_dir_exist(dirpath):
     if os.path.exists(dirpath):
-        print 'Directory %s does exist. To redownload the files, '\
-            'remove the existing directory and %s.tar.gz' % (dirpath, dirpath)
+        print('Directory %s does exist. To redownload the files, '\
+            'remove the existing directory and %s.tar.gz' % (dirpath, dirpath))
         return True
     else:
         return False
@@ -45,10 +48,10 @@ def check_dir_exist(dirpath):
 def do_download(dirpath, gzfile, url):
     if check_dir_exist(dirpath):
         sys.exit(0)
-    print 'Downloading CIFAR10 from %s' % (url)
-    urllib.urlretrieve(url, gzfile)
+    print('Downloading CIFAR10 from %s' % (url))
+    urllib.request.urlretrieve(url, gzfile)
     extract_tarfile(gzfile)
-    print 'Finished!'
+    print('Finished!')
 
 
 if __name__ == '__main__':

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/cifar10/predict.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/predict.py b/examples/cifar10/predict.py
index dca44fe..123818a 100644
--- a/examples/cifar10/predict.py
+++ b/examples/cifar10/predict.py
@@ -15,7 +15,11 @@
 # limitations under the License.
 # =============================================================================
 '''Predicting the labels for new images using the pre-trained alexnet model'''
-import cPickle as pickle
+from __future__ import print_function
+from future import standard_library
+standard_library.install_aliases()
+from builtins import range
+import pickle as pickle
 import numpy as np
 
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
@@ -46,7 +50,7 @@ def predict(net, images, dev, topk=5):
 
 
 def load_dataset(filepath):
-    print 'Loading data file %s' % filepath
+    print('Loading data file %s' % filepath)
     with open(filepath, 'rb') as fd:
         cifar10 = pickle.load(fd)
     image = cifar10['data'].astype(dtype=np.uint8)
@@ -88,4 +92,4 @@ if __name__ == '__main__':
     mean = compute_image_mean('cifar-10-batches-py')
     test_images, _ = load_test_data('cifar-10-batches-py')
     # predict for two images
-    print predict(model, test_images[0:2] - mean, dev)
+    print(predict(model, test_images[0:2] - mean, dev))

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/cifar10/resnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/resnet.py b/examples/cifar10/resnet.py
index 6b573e9..4b9bad0 100644
--- a/examples/cifar10/resnet.py
+++ b/examples/cifar10/resnet.py
@@ -19,8 +19,12 @@ The best validation accuracy we achieved is about 83% without data augmentation.
 The performance could be improved by tuning some hyper-parameters, including
 learning rate, weight decay, max_epoch, parameter initialization, etc.
 """
+from __future__ import print_function
+from future import standard_library
+standard_library.install_aliases()
+from builtins import zip
 
-import cPickle as pickle
+import pickle as pickle
 
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 # use the python modules by installing py singa in build/python
@@ -73,7 +77,7 @@ def create_net(use_cpu=False):
     net.add(layer.AvgPooling2D("pool4", 8, 8, border_mode='valid'))
     net.add(layer.Flatten('flat'))
     net.add(layer.Dense('ip5', 10))
-    print 'Start intialization............'
+    print('Start intialization............')
     for (p, name) in zip(net.param_values(), net.param_names()):
         # print name, p.shape
         if 'mean' in name or 'beta' in name:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index d54d694..8204055 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -18,8 +18,16 @@
 It includes 5 binary dataset, each contains 10000 images. 1 row (1 image)
 includes 1 label & 3072 pixels.  3072 pixels are 3 channels of a 32x32 image
 """
-
-import cPickle
+from __future__ import division
+from __future__ import print_function
+from future import standard_library
+standard_library.install_aliases()
+from builtins import zip
+from builtins import str
+from builtins import range
+from past.utils import old_div
+
+import pickle
 import numpy as np
 import os
 import argparse
@@ -38,9 +46,9 @@ import resnet
 
 
 def load_dataset(filepath):
-    print 'Loading data file %s' % filepath
+    print('Loading data file %s' % filepath)
     with open(filepath, 'rb') as fd:
-        cifar10 = cPickle.load(fd)
+        cifar10 = pickle.load(fd)
     image = cifar10['data'].astype(dtype=np.uint8)
     image = image.reshape((-1, 3, 32, 32))
     label = np.asarray(cifar10['labels'], dtype=np.uint8)
@@ -85,7 +93,7 @@ def normalize_for_alexnet(train_x, test_x):
 
 
 def vgg_lr(epoch):
-    return 0.1 / float(1 << ((epoch / 25)))
+    return old_div(0.1, float(1 << ((old_div(epoch, 25)))))
 
 
 def alexnet_lr(epoch):
@@ -115,12 +123,12 @@ def caffe_lr(epoch):
 
 def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
           use_cpu=False):
-    print 'Start intialization............'
+    print('Start intialization............')
     if use_cpu:
-        print 'Using CPU'
+        print('Using CPU')
         dev = device.get_default_device()
     else:
-        print 'Using GPU'
+        print('Using GPU')
         dev = device.create_cuda_gpu()
 
     net.to_device(dev)
@@ -131,13 +139,13 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
     tx = tensor.Tensor((batch_size, 3, 32, 32), dev)
     ty = tensor.Tensor((batch_size,), dev, core_pb2.kInt)
     train_x, train_y, test_x, test_y = data
-    num_train_batch = train_x.shape[0] / batch_size
-    num_test_batch = test_x.shape[0] / batch_size
+    num_train_batch = old_div(train_x.shape[0], batch_size)
+    num_test_batch = old_div(test_x.shape[0], batch_size)
     idx = np.arange(train_x.shape[0], dtype=np.int32)
     for epoch in range(max_epoch):
         np.random.shuffle(idx)
         loss, acc = 0.0, 0.0
-        print 'Epoch %d' % epoch
+        print('Epoch %d' % epoch)
         for b in range(num_train_batch):
             x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
             y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
@@ -152,8 +160,8 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
             utils.update_progress(b * 1.0 / num_train_batch,
                                   'training loss = %f, accuracy = %f' % (l, a))
         info = '\ntraining loss = %f, training accuracy = %f, lr = %f' \
-            % (loss / num_train_batch, acc / num_train_batch, get_lr(epoch))
-        print info
+            % (old_div(loss, num_train_batch), old_div(acc, num_train_batch), get_lr(epoch))
+        print(info)
 
         loss, acc = 0.0, 0.0
         for b in range(num_test_batch):
@@ -165,8 +173,8 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100,
             loss += l
             acc += a
 
-        print 'test loss = %f, test accuracy = %f' \
-            % (loss / num_test_batch, acc / num_test_batch)
+        print('test loss = %f, test accuracy = %f' \
+            % (old_div(loss, num_test_batch), old_div(acc, num_test_batch)))
     net.save('model', 20)  # save model params into checkpoint file
 
 if __name__ == '__main__':
@@ -178,7 +186,7 @@ if __name__ == '__main__':
     args = parser.parse_args()
     assert os.path.exists(args.data), \
         'Pls download the cifar10 dataset via "download_data.py py"'
-    print 'Loading data ..................'
+    print('Loading data ..................')
     train_x, train_y = load_train_data(args.data)
     test_x, test_y = load_test_data(args.data)
     if args.model == 'caffe':

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index ce0c210..ec893a9 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -19,6 +19,8 @@ The best validation accuracy we achieved is about 89% without data augmentation.
 The performance could be improved by tuning some hyper-parameters, including
 learning rate, weight decay, max_epoch, parameter initialization, etc.
 """
+from __future__ import print_function
+from builtins import zip
 
 # sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 
@@ -74,9 +76,9 @@ def create_net(use_cpu=False):
     net.add(layer.Activation('relu_ip1'))
     net.add(layer.Dropout('drop_ip2', 0.5))
     net.add(layer.Dense('ip2', 10))
-    print 'Start intialization............'
+    print('Start intialization............')
     for (p, name) in zip(net.param_values(), net.param_names()):
-        print name, p.shape
+        print(name, p.shape)
         if 'mean' in name or 'beta' in name:
             p.set_value(0.0)
         elif 'var' in name:
@@ -90,6 +92,6 @@ def create_net(use_cpu=False):
                 p.gaussian(0, 0.02)
         else:
             p.set_value(0)
-        print name, p.l1()
+        print(name, p.l1())
 
     return net

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/imagenet/googlenet/serve.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/googlenet/serve.py b/examples/imagenet/googlenet/serve.py
index aee890d..308acd6 100644
--- a/examples/imagenet/googlenet/serve.py
+++ b/examples/imagenet/googlenet/serve.py
@@ -17,6 +17,9 @@
 ''' This model is created following Caffe implementation of GoogleNet
 https://github.com/BVLC/caffe/blob/master/models/bvlc_googlenet/
 '''
+from __future__ import print_function
+from builtins import zip
+from builtins import str
 import os
 import sys
 import time
@@ -157,14 +160,14 @@ def serve(agent, use_cpu, parameter_file, topk=5):
         dev = device.get_default_device()
         layer.engine = 'singacpp'
     else:
-        print "runing with gpu"
+        print("runing with gpu")
         dev = device.create_cuda_gpu()
     agent = agent
 
-    print 'Start intialization............'
+    print('Start intialization............')
     net = create_net((3, 224, 224), parameter_file)
     net.to_device(dev)
-    print 'End intialization............'
+    print('End intialization............')
 
     labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
     while True:
@@ -199,15 +202,15 @@ def serve(agent, use_cpu, parameter_file, topk=5):
                 response = "Sorry, system error during prediction."
             agent.push(MsgType.kResponse, response)
         elif MsgType.kCommandStop.equal(msg_type):
-                print 'get stop command'
+                print('get stop command')
                 agent.push(MsgType.kStatus, "success")
                 break
         else:
-            print 'get unsupported message %s' % str(msg_type)
+            print('get unsupported message %s' % str(msg_type))
             agent.push(MsgType.kStatus, "Unknown command")
             break
         # while loop
-    print "server stop"
+    print("server stop")
 
 
 def main():

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/imagenet/resnet/convert.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/convert.py b/examples/imagenet/resnet/convert.py
index 042d2ec..7c98139 100644
--- a/examples/imagenet/resnet/convert.py
+++ b/examples/imagenet/resnet/convert.py
@@ -90,8 +90,7 @@ def traverse(m, idx, params, param_names):
 
 if __name__ == '__main__':
     parser = ArgumentParser(description='Convert params from torch to python '
-            'dict. \n resnet could have depth of 18, 34, 101, 152; \n
-            wrn has depth 50; preact has depth 200; addbn has depth 50')
+            'dict. \n resnet could have depth of 18, 34, 101, 152; \n wrn has depth 50; preact has depth 200; addbn has depth 50')
     parser.add_argument("infile", help="torch checkpoint file")
     parser.add_argument("model", choices = ['resnet', 'wrn', 'preact', 'addbn'])
     parser.add_argument("depth", type=int, choices = [18, 34, 50, 101, 152, 200])

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/imagenet/resnet/model.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/model.py b/examples/imagenet/resnet/model.py
index bf90da3..6ab8741 100644
--- a/examples/imagenet/resnet/model.py
+++ b/examples/imagenet/resnet/model.py
@@ -17,6 +17,9 @@
 ''' This models are created following https://github.com/facebook/fb.resnet.torch.git
 and https://github.com/szagoruyko/wide-residual-networks
 '''
+from __future__ import print_function
+from builtins import zip
+from builtins import range
 from singa.layer import Conv2D, Activation, MaxPooling2D, AvgPooling2D,\
         Split, Merge, Flatten, Dense, BatchNormalization, Softmax
 from singa import net as ffnet
@@ -139,7 +142,7 @@ def stage(sid, net, num_blk, inplane, midplane, outplane, stride, block, preact=
 def init_params(net, weight_path=None):
     if weight_path == None:
         for pname, pval in zip(net.param_names(), net.param_values()):
-            print pname, pval.shape
+            print(pname, pval.shape)
             if 'conv' in pname and len(pval.shape) > 1:
                 initializer.gaussian(pval, 0, pval.shape[1])
             elif 'dense' in pname:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/imagenet/resnet/serve.py
----------------------------------------------------------------------
diff --git a/examples/imagenet/resnet/serve.py b/examples/imagenet/resnet/serve.py
index ba5adb1..fde06fe 100644
--- a/examples/imagenet/resnet/serve.py
+++ b/examples/imagenet/resnet/serve.py
@@ -1,3 +1,8 @@
+from __future__ import division
+from __future__ import print_function
+from builtins import str
+from builtins import range
+from past.utils import old_div
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -52,7 +57,7 @@ def predict(net, images, num=10):
     '''
     prob = net.predict(images)
     prob = tensor.to_numpy(prob)
-    prob = prob.reshape((images.shape[0] / num, num, -1))
+    prob = prob.reshape((old_div(images.shape[0], num), num, -1))
     prob = np.average(prob, 1)
     return prob
 
@@ -82,11 +87,11 @@ def serve(net, label_map, dev, agent, topk=5):
             try:
                 # process images
                 im = [np.array(x.convert('RGB'), dtype=np.float32).transpose(2, 0, 1) for x in image_transform(val['image'])]
-                im = np.array(im) / 256
+                im = old_div(np.array(im), 256)
                 im -= mean[np.newaxis, :, np.newaxis, np.newaxis]
                 im /= std[np.newaxis, :, np.newaxis, np.newaxis]
                 images.copy_from_numpy(im)
-                print "input: ", images.l1()
+                print("input: ", images.l1())
                 # do prediction
                 prob = predict(net, images, num_augmentation)[0]
                 idx = np.argsort(-prob)
@@ -100,17 +105,17 @@ def serve(net, label_map, dev, agent, topk=5):
             agent.push(MsgType.kResponse, response)
         elif msg.is_command():
             if MsgType.kCommandStop.equal(msg):
-                print 'get stop command'
+                print('get stop command')
                 agent.push(MsgType.kStatus, "success")
                 break
             else:
-                print 'get unsupported command %s' % str(msg)
+                print('get unsupported command %s' % str(msg))
                 agent.push(MsgType.kStatus, "Unknown command")
         else:
-            print 'get unsupported message %s' % str(msg)
+            print('get unsupported message %s' % str(msg))
             agent.push(MsgType.kStatus, "unsupported msg; going to shutdown")
             break
-    print "server stop"
+    print("server stop")
 
 def main():
     try:
@@ -133,14 +138,14 @@ def main():
 
         net = model.create_net(args.model, args.depth, args.use_cpu)
         if args.use_cpu:
-            print 'Using CPU'
+            print('Using CPU')
             dev = device.get_default_device()
         else:
-            print 'Using GPU'
+            print('Using GPU')
             dev = device.create_cuda_gpu()
             net.to_device(dev)
         model.init_params(net, args.parameter_file)
-        print 'Finish loading models'
+        print('Finish loading models')
 
         labels = np.loadtxt('synset_words.txt', str, delimiter='\t ')
         serve(net, labels, dev, agent)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/examples/mnist/train.py
----------------------------------------------------------------------
diff --git a/examples/mnist/train.py b/examples/mnist/train.py
index 0a00358..82d9a5a 100644
--- a/examples/mnist/train.py
+++ b/examples/mnist/train.py
@@ -1,3 +1,9 @@
+from __future__ import division
+from __future__ import print_function
+from future import standard_library
+standard_library.install_aliases()
+from builtins import range
+from past.utils import old_div
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -19,7 +25,7 @@ import numpy as np
 import os
 import gzip
 import argparse
-import cPickle
+import pickle
 from singa import initializer
 from singa import utils
 from singa import optimizer
@@ -33,16 +39,16 @@ from singa.proto import core_pb2
 
 def load_train_data(file_path):
     f = gzip.open(file_path, 'rb')
-    train_set, valid_set, test_set = cPickle.load(f)
+    train_set, valid_set, test_set = pickle.load(f)
     traindata = train_set[0].astype(np.float32)
     validdata = valid_set[0].astype(np.float32)
-    print traindata.shape, validdata.shape
+    print(traindata.shape, validdata.shape)
     return traindata, validdata
 
 
 
 def train(data_file, use_gpu, num_epoch=10, batch_size=100):
-    print 'Start intialization............'
+    print('Start intialization............')
     lr = 0.1   # Learning rate
     weight_decay  = 0.0002
     hdim = 1000
@@ -55,7 +61,7 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
     thbias = tensor.from_numpy(np.zeros(hdim, dtype = np.float32))
     opt = optimizer.SGD(momentum=0.5, weight_decay=weight_decay)
 
-    print 'Loading data ..................'
+    print('Loading data ..................')
     train_x, valid_x = load_train_data(data_file)
 
     if use_gpu:
@@ -66,11 +72,11 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
     for t in [tweight, tvbias, thbias]:
         t.to_device(dev)
 
-    num_train_batch = train_x.shape[0] / batch_size
-    print "num_train_batch = %d " % (num_train_batch)
+    num_train_batch = old_div(train_x.shape[0], batch_size)
+    print("num_train_batch = %d " % (num_train_batch))
     for epoch in range(num_epoch):
         trainerrorsum = 0.0
-        print 'Epoch %d' % epoch
+        print('Epoch %d' % epoch)
         for b in range(num_train_batch):
             # positive phase
             tdata = tensor.from_numpy(
@@ -99,11 +105,11 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
             tgvbias = tensor.sum(tnegdata, 0) - tensor.sum(tdata, 0)
             tghbias = tensor.sum(tneghidprob, 0) - tensor.sum(tposhidprob, 0)
 
-            opt.apply_with_lr(epoch, lr / batch_size, tgweight, tweight, 'w')
-            opt.apply_with_lr(epoch, lr / batch_size, tgvbias, tvbias, 'vb')
-            opt.apply_with_lr(epoch, lr / batch_size, tghbias, thbias, 'hb')
+            opt.apply_with_lr(epoch, old_div(lr, batch_size), tgweight, tweight, 'w')
+            opt.apply_with_lr(epoch, old_div(lr, batch_size), tgvbias, tvbias, 'vb')
+            opt.apply_with_lr(epoch, old_div(lr, batch_size), tghbias, thbias, 'hb')
 
-        print 'training errorsum = %f' % (trainerrorsum)
+        print('training errorsum = %f' % (trainerrorsum))
 
         tvaliddata = tensor.from_numpy(valid_x)
         tvaliddata.to_device(dev)
@@ -119,7 +125,7 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100):
         tvalidnegdata = tensor.sigmoid(tvalidnegdata)
 
         validerrorsum = tensor.sum(tensor.square((tvaliddata - tvalidnegdata)))
-        print 'valid errorsum = %f' % (validerrorsum)
+        print('valid errorsum = %f' % (validerrorsum))
 
 
 if __name__ == '__main__':

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/python/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 177326e..01396ff 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -93,8 +93,15 @@ PROTOBUF_GENERATE_PYTHON(proto_pys ${proto_files})
 file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python/singa/proto)
 file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/python/rafiki)
 file(MAKE_DIRECTORY ${CMAKE_BINARY_DIR}/src/api)
+
+IF(PYTHON2)
+	SET(SWIG_PYTHON3 "")
+ELSE()
+	SET(SWIG_PYTHON3 "-py3")
+ENDIF()
+
 execute_process(
-    COMMAND swig -c++ -python -I${CMAKE_SOURCE_DIR}/include
+    COMMAND swig -c++ -python ${SWIG_PYTHON3} -I${CMAKE_SOURCE_DIR}/include
     -outdir ${CMAKE_BINARY_DIR}/python/singa
     -o ${CMAKE_BINARY_DIR}/src/api/singa_wrap.cxx
     ${CMAKE_SOURCE_DIR}/src/api/singa.i)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/python/rafiki/agent.py
----------------------------------------------------------------------
diff --git a/python/rafiki/agent.py b/python/rafiki/agent.py
index d9e4e7a..98d9b01 100644
--- a/python/rafiki/agent.py
+++ b/python/rafiki/agent.py
@@ -1,3 +1,5 @@
+from builtins import str
+from builtins import object
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -25,7 +27,7 @@ from werkzeug.datastructures import CombinedMultiDict, MultiDict
 import pickle
 import uuid
 
-class MsgType:
+class MsgType(object):
    def __init__(self, name):
        self.name = name
    def __str__(self):
@@ -72,7 +74,7 @@ for t in types:
 app = Flask(__name__)
 top_k_=5
 
-class Agent():
+class Agent(object):
 
     def __init__(self,port):
         info_queue = Queue()
@@ -203,7 +205,7 @@ def failure(message):
 
 def transformFile(files):
     result= MultiDict([])
-    for f in files.keys():
+    for f in list(files.keys()):
         file = files[f]
         unique_filename = str(uuid.uuid4())+secure_filename(file.filename)
         filepath=os.path.join(os.getcwd(),unique_filename)
@@ -212,7 +214,7 @@ def transformFile(files):
     return result
 
 def deleteFiles(files):
-    for f in files.keys():
+    for f in list(files.keys()):
         filepath = files[f]    
         os.remove(filepath)
         #print "remove",filepath

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/python/singa/layer.py b/python/singa/layer.py
index 2cdfe69..153768b 100644
--- a/python/singa/layer.py
+++ b/python/singa/layer.py
@@ -50,12 +50,11 @@ from __future__ import absolute_import
 from builtins import str
 from builtins import range
 from builtins import object
-from sets import Set
+from builtins import set
 from . import singa_wrap
 from .proto import model_pb2
 from . import tensor
 
-
 engine = 'cudnn'
 '''engine is the prefix of layer identifier.
 
@@ -1048,7 +1047,7 @@ class RNN(Layer):
         conf = self.conf.rnn_conf
         assert hidden_size > 0, 'Hidden feature size must > 0'
         conf.hidden_size = hidden_size
-        assert rnn_mode in Set(['lstm', 'gru', 'tanh', 'relu']),  \
+        assert rnn_mode in set(['lstm', 'gru', 'tanh', 'relu']),  \
             'rnn mode %s is not available' % (rnn_mode)
         conf.rnn_mode = rnn_mode
         conf.num_stacks = num_stacks
@@ -1164,7 +1163,7 @@ class GRU(RNN):
 
 
 def _check_engine(engine, allowed_engines):
-    assert engine.lower() in Set(allowed_engines), \
+    assert engine.lower() in set(allowed_engines), \
         '%s is not a supported engine. Pls use one of %s' % \
         (engine, ', '.join(allowed_engines))
 
@@ -1181,7 +1180,7 @@ def _create_layer(eng, layer):
     assert eng != 'cudnn' or cudnn_version > 0, 'CUDNN is not enabled, please '\
         'change the engine, e.g., layer.engine=singacpp'
     layer_type = eng + '_' + layer
-    return singa_wrap.CreateLayer(layer_type.lower())
+    return singa_wrap.CreateLayer(layer_type.lower().encode())
 
 
 def _set_kernel_stride_pad(conf, kernel, stride, border_mode, pad, in_shape):

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/python/singa/loss.py
----------------------------------------------------------------------
diff --git a/python/singa/loss.py b/python/singa/loss.py
index d643218..800a113 100644
--- a/python/singa/loss.py
+++ b/python/singa/loss.py
@@ -41,7 +41,7 @@ from __future__ import absolute_import
 from past.utils import old_div
 from builtins import object
 from . import singa_wrap as singa
-from proto import model_pb2
+from .proto import model_pb2
 from . import tensor
 import numpy as np
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/python/singa/optimizer.py
----------------------------------------------------------------------
diff --git a/python/singa/optimizer.py b/python/singa/optimizer.py
index 8f1fe8e..54b81e8 100644
--- a/python/singa/optimizer.py
+++ b/python/singa/optimizer.py
@@ -39,8 +39,7 @@ from builtins import object
 import math
 from . import singa_wrap as singa
 from . import tensor
-from proto import model_pb2
-
+from .proto import model_pb2
 
 class Optimizer(object):
     '''The base python optimizer class.
@@ -205,7 +204,7 @@ class SGD(Optimizer):
         if self.momentum is not None:
             conf.momentum = self.momentum
         conf.type = 'sgd'
-        self.opt = singa.CreateOptimizer('SGD')
+        self.opt = singa.CreateOptimizer('SGD'.encode())
         self.opt.Setup(conf.SerializeToString())
 
     def apply_with_lr(self, epoch, lr, grad, value, name, step=-1):
@@ -214,7 +213,7 @@ class SGD(Optimizer):
         grad = self.apply_regularizer_constraint(epoch, value, grad, name, step)
         if name is not None and name in self.learning_rate_multiplier:
             lr = lr * self.learning_rate_multiplier[name]
-        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
+        self.opt.Apply(epoch, lr, name.encode(), grad.singa_tensor, value.singa_tensor)
         return value
 
 
@@ -232,7 +231,7 @@ class Nesterov(Optimizer):
         if self.momentum is not None:
             conf.momentum = momentum
         conf.type = 'nesterov'
-        self.opt = singa.CreateOptimizer('Nesterov')
+        self.opt = singa.CreateOptimizer('Nesterov'.encode())
         self.opt.Setup(conf.SerializeToString())
 
     def apply_with_lr(self, epoch, lr, grad, value, name, step=-1):
@@ -242,7 +241,7 @@ class Nesterov(Optimizer):
         grad = self.apply_regularizer_constraint(epoch, value, grad, name, step)
         if name is not None and name in self.learning_rate_multiplier:
             lr = lr * self.learning_rate_multiplier[name]
-        self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor)
+        self.opt.Apply(epoch, lr, name.encode(), grad.singa_tensor, value.singa_tensor)
         return value
 
 
@@ -263,7 +262,7 @@ class RMSProp(Optimizer):
         conf = model_pb2.OptimizerConf()
         conf.rho = rho
         conf.delta = epsilon
-        self.opt = singa.CreateOptimizer('RMSProp')
+        self.opt = singa.CreateOptimizer('RMSProp'.encode())
         self.opt.Setup(conf.SerializeToString())
 
     def apply_with_lr(self, epoch, lr, grad, value, name, step=-1):
@@ -273,7 +272,7 @@ class RMSProp(Optimizer):
         grad = self.apply_regularizer_constraint(epoch, value, grad, name, step)
         if name is not None and name in self.learning_rate_multiplier:
             lr = lr * self.learning_rate_multiplier[name]
-        self.opt.Apply(step, lr,  name, grad.singa_tensor, value.singa_tensor)
+        self.opt.Apply(step, lr,  name.encode(), grad.singa_tensor, value.singa_tensor)
         return value
 
 
@@ -293,7 +292,7 @@ class AdaGrad(Optimizer):
         conf = model_pb2.OptimizerConf()
         conf.delta = epsilon
         conf.type = 'adagrad'
-        self.opt = singa.CreateOptimizer('AdaGrad')
+        self.opt = singa.CreateOptimizer('AdaGrad'.encode())
         self.opt.Setup(conf.SerializeToString())
 
     def apply_with_lr(self, epoch, lr, grad, value, name, step=-1):
@@ -303,7 +302,7 @@ class AdaGrad(Optimizer):
         grad = self.apply_regularizer_constraint(epoch, value, grad, name, step)
         if name is not None and name in self.learning_rate_multiplier:
             lr = lr * self.learning_rate_multiplier[name]
-        self.opt.Apply(epoch, lr,  name, grad.singa_tensor, value.singa_tensor)
+        self.opt.Apply(epoch, lr,  name.encode(), grad.singa_tensor, value.singa_tensor)
         return value
 
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/python/singa/snapshot.py
----------------------------------------------------------------------
diff --git a/python/singa/snapshot.py b/python/singa/snapshot.py
index 4c359fc..392ab3d 100644
--- a/python/singa/snapshot.py
+++ b/python/singa/snapshot.py
@@ -49,7 +49,7 @@ class Snapshot(object):
             mode (boolean): True for write, False for read
             buffer_size (int): Buffer size (in MB), default is 10
         '''
-        self.snapshot = singa.Snapshot(f, mode, buffer_size)
+        self.snapshot = singa.Snapshot(f.encode(), mode, buffer_size)
 
     def write(self, param_name, param_val):
         '''Call Write method to write a parameter
@@ -58,7 +58,7 @@ class Snapshot(object):
             param_name (string): name of the parameter
             param_val (Tensor): value tensor of the parameter
         '''
-        self.snapshot.Write(str(param_name), param_val.singa_tensor)
+        self.snapshot.Write(str(param_name).encode(), param_val.singa_tensor)
 
     def read(self):
         '''Call read method to load all (param_name, param_val)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index fabd84a..144ed61 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -423,6 +423,14 @@ class Tensor(object):
             return _call_singa_func(singa.DivFloat,
                                     self.singa_tensor, rhs)
 
+    def __truediv__(self, rhs):
+        if isinstance(rhs, Tensor):
+            return from_raw_tensor(
+                singa.__div__(self.singa_tensor, rhs.singa_tensor))
+        else:
+            return _call_singa_func(singa.DivFloat,
+                                    self.singa_tensor, rhs)
+
     def __lt__(self, rhs):
         if isinstance(rhs, Tensor):
             return from_raw_tensor(
@@ -479,6 +487,13 @@ class Tensor(object):
         one /= self
         return one
 
+    def __rtruediv__(self, lhs):
+        lhs = float(lhs)
+        one = Tensor(self.shape, self.device, self.dtype)
+        one.set_value(lhs)
+        one /= self
+        return one
+
 ''' python functions for global functions in Tensor.h
 '''
 
@@ -938,7 +953,7 @@ def div(lhs, rhs, ret=None):
     '''
     if ret is None:
         # call Tensor.__div__()
-        return old_div(lhs, rhs)
+        return lhs / rhs
     else:
         if isinstance(rhs, Tensor):
             singa.Div(lhs.singa_tensor, rhs.singa_tensor, ret.singa_tensor)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/src/api/model_layer.i
----------------------------------------------------------------------
diff --git a/src/api/model_layer.i b/src/api/model_layer.i
index 92919fd..4760da3 100644
--- a/src/api/model_layer.i
+++ b/src/api/model_layer.i
@@ -29,6 +29,7 @@
 
 
 %{
+#define SWIG_PYTHON_STRICT_BYTE_CHAR
 #include "singa/model/layer.h"
 #include "../src/model/layer/rnn.h"
 #include "../src/model/layer/cudnn_rnn.h"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/src/api/model_optimizer.i
----------------------------------------------------------------------
diff --git a/src/api/model_optimizer.i b/src/api/model_optimizer.i
index 793df28..9b73d81 100644
--- a/src/api/model_optimizer.i
+++ b/src/api/model_optimizer.i
@@ -28,6 +28,7 @@
 %include "std_shared_ptr.i"
 
 %{
+#define SWIG_PYTHON_STRICT_BYTE_CHAR
 #include "singa/model/optimizer.h"
 #include "singa/proto/model.pb.h"
 using singa::Tensor;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/test/python/test_layer.py
----------------------------------------------------------------------
diff --git a/test/python/test_layer.py b/test/python/test_layer.py
index c0f19f3..ec5becf 100644
--- a/test/python/test_layer.py
+++ b/test/python/test_layer.py
@@ -1,3 +1,4 @@
+from builtins import str
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/test/python/test_loss.py
----------------------------------------------------------------------
diff --git a/test/python/test_loss.py b/test/python/test_loss.py
index 78356f2..eb06b81 100644
--- a/test/python/test_loss.py
+++ b/test/python/test_loss.py
@@ -1,3 +1,5 @@
+from __future__ import division
+from past.utils import old_div
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -45,7 +47,7 @@ class TestLoss(unittest.TestCase):
         sig.backward()
         l2 = sig.evaluate(True, self.x, self.y)
 
-        p = 1.0 / (1 + np.exp(-self.x_np))
+        p = old_div(1.0, (1 + np.exp(-self.x_np)))
         l = - (self.y_np * np.log(p) + (1-self.y_np) * np.log(1-p))
         self.assertAlmostEqual(l1.l1(), l2)
         self.assertAlmostEqual(l1.l1(), np.average(l))

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/test/python/test_metric.py
----------------------------------------------------------------------
diff --git a/test/python/test_metric.py b/test/python/test_metric.py
index e7a51c3..8a22372 100644
--- a/test/python/test_metric.py
+++ b/test/python/test_metric.py
@@ -1,3 +1,5 @@
+from __future__ import division
+from past.utils import old_div
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -50,7 +52,7 @@ class TestPrecision(unittest.TestCase):
 
     def test_evaluate(self):
         e = self.prcs.evaluate(self.x,self.y)
-        self.assertAlmostEqual(e, (0.5 + 1 + 0) / 3)
+        self.assertAlmostEqual(e, old_div((0.5 + 1 + 0), 3))
 
 class TestRecall(unittest.TestCase):
     def setUp(self):
@@ -72,13 +74,13 @@ class TestRecall(unittest.TestCase):
     def test_forward(self):
         r = self.recall.forward(self.x,self.y)
         self.assertAlmostEqual(tensor.to_numpy(r)[0], 0.5)
-        self.assertAlmostEqual(tensor.to_numpy(r)[1], 2.0 / 3)
+        self.assertAlmostEqual(tensor.to_numpy(r)[1], old_div(2.0, 3))
         self.assertAlmostEqual(tensor.to_numpy(r)[2], 0)
 
 
     def test_evaluate(self):
         e = self.recall.evaluate(self.x,self.y)
-        self.assertAlmostEqual(e, (0.5 + 2.0 / 3 + 0) / 3)
+        self.assertAlmostEqual(e, old_div((0.5 + old_div(2.0, 3) + 0), 3))
 
 if __name__ == '__main__':
     unittest.main()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/test/python/test_net.py
----------------------------------------------------------------------
diff --git a/test/python/test_net.py b/test/python/test_net.py
index b19d868..afabc0d 100644
--- a/test/python/test_net.py
+++ b/test/python/test_net.py
@@ -1,3 +1,6 @@
+from __future__ import division
+from builtins import zip
+from past.utils import old_div
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@@ -41,7 +44,7 @@ class TestFeedForwardNet(unittest.TestCase):
         y.set_value(0)
         out, _ = ffn.evaluate(x, y)
         self.assertAlmostEqual(out * 3,
-                               - math.log(1.0/(1+math.exp(1))) -
+                               - math.log(old_div(1.0,(1+math.exp(1)))) -
                                math.log(0.5) - math.log(0.5),
                                5)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/test/python/test_optimizer.py
----------------------------------------------------------------------
diff --git a/test/python/test_optimizer.py b/test/python/test_optimizer.py
index cfd13c0..11374f5 100644
--- a/test/python/test_optimizer.py
+++ b/test/python/test_optimizer.py
@@ -1,3 +1,7 @@
+from __future__ import division
+from builtins import zip
+from builtins import range
+from past.utils import old_div
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -106,7 +110,7 @@ class TestOptimizer(unittest.TestCase):
         cons = opt.L2Constraint(threshold)
         cons.apply(0, self.W, self.g)
         g = tensor.to_numpy(self.g)
-        nrm = np.linalg.norm(self.np_g) / self.np_g.size
+        nrm = old_div(np.linalg.norm(self.np_g), self.np_g.size)
         for i in range(g.size):
             self.assertAlmostEqual(g[i], self.np_g[i] * threshold / nrm)
 
@@ -118,7 +122,7 @@ class TestOptimizer(unittest.TestCase):
         cons.apply(0, self.W, self.g)
         self.g.to_host()
         g = tensor.to_numpy(self.g)
-        nrm = np.linalg.norm(self.np_g) / self.np_g.size
+        nrm = old_div(np.linalg.norm(self.np_g), self.np_g.size)
         for i in range(g.size):
             self.assertAlmostEqual(g[i], self.np_g[i] * threshold / nrm)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/test/python/test_tensor.py
----------------------------------------------------------------------
diff --git a/test/python/test_tensor.py b/test/python/test_tensor.py
index 9cd2411..3f86899 100644
--- a/test/python/test_tensor.py
+++ b/test/python/test_tensor.py
@@ -1,3 +1,5 @@
+from __future__ import division
+from past.utils import old_div
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
@@ -150,7 +152,7 @@ class TestTensorMethods(unittest.TestCase):
     def test_rdiv(self):
         x = tensor.Tensor((3,))
         x.set_value(1)
-        y = 2 / x
+        y = old_div(2, x)
         self.assertEqual(tensor.average(y), 2.)
 
     def test_numpy_convert(self):

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/tool/debian/postinst
----------------------------------------------------------------------
diff --git a/tool/debian/postinst b/tool/debian/postinst
index 2d63734..433ca49 100644
--- a/tool/debian/postinst
+++ b/tool/debian/postinst
@@ -16,6 +16,6 @@
 # limitations under the License.
 #
 
-pip install /usr/local/lib/singa/python
+pip3 install /usr/local/lib/singa/python
 rm -r /usr/local/lib/singa
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/c94b3dfd/tool/opencl/clsrc_to_str.py
----------------------------------------------------------------------
diff --git a/tool/opencl/clsrc_to_str.py b/tool/opencl/clsrc_to_str.py
index 24400f7..8ca94a0 100755
--- a/tool/opencl/clsrc_to_str.py
+++ b/tool/opencl/clsrc_to_str.py
@@ -57,7 +57,7 @@ if __name__ == "__main__":
         fout.write(license)
         fout.write("#include <string>\n\n")
         fout.write("namespace singa {\n namespace opencl {\n")
-        for name, path in files.items():
+        for name, path in list(files.items()):
             with open(path, 'r') as fin:
                 src = fin.read()
                 src = repr(src)