You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/01/01 13:29:17 UTC
[08/10] incubator-singa git commit: SINGA-81 Add Python Helper
SINGA-81 Add Python Helper
Add comments for some functions in model.py.
Remove the rnnlm related code, which could be added later when it can be run sucessfully using python.
Move datasets/ into examples as they are used mainly by the examples.
Update .gitinore to ignore the pb2 foder in tool/python/.
TODO add comments for other methods in files under singa/ folder.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8914750e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8914750e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8914750e
Branch: refs/heads/master
Commit: 8914750e8c6d6fd0d9d0d8aed53fd775a1367b88
Parents: 3c12730
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Tue Dec 29 11:36:28 2015 +0800
Committer: chonho <le...@comp.nus.edu.sg>
Committed: Fri Jan 1 15:59:15 2016 +0800
----------------------------------------------------------------------
tool/python/examples/cifar10_cnn.py | 4 +-
tool/python/examples/cifar10_cnn_cudnn.py | 4 +-
.../python/examples/cifar10_cnn_cudnn_hybrid.py | 34 -----
tool/python/examples/cifar10_cnn_parameter.py | 4 +-
tool/python/examples/datasets/__init__.py | 0
tool/python/examples/datasets/cifar10.py | 34 +++++
tool/python/examples/datasets/mnist.py | 32 +++++
tool/python/examples/rnnlm_usermodel.py | 22 ----
tool/python/singa.py | 26 ++--
tool/python/singa/datasets/__init__.py | 0
tool/python/singa/datasets/cifar10.py | 34 -----
tool/python/singa/datasets/mnist.py | 32 -----
tool/python/singa/datasets/rnnlm.py | 20 ---
tool/python/singa/model.py | 132 +++++++++++--------
14 files changed, 164 insertions(+), 214 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/cifar10_cnn.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/cifar10_cnn.py b/tool/python/examples/cifar10_cnn.py
index 9ef552b..859a9a4 100755
--- a/tool/python/examples/cifar10_cnn.py
+++ b/tool/python/examples/cifar10_cnn.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
+sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
from singa.model import *
from singa.datasets import cifar10
@@ -24,7 +24,7 @@ m.add(AvgPooling2D(pool_size=(3,3), stride=2))
m.add(Dense(10, w_wd=250, b_lr=2, b_wd=0, activation='softmax'))
-sgd = SGD(decay=0.004, lr_type='fixed', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
+sgd = SGD(decay=0.004, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
topo = Cluster(workspace)
m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo)
m.fit(X_train, nb_epoch=1000, with_test=True)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/cifar10_cnn_cudnn.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/cifar10_cnn_cudnn.py b/tool/python/examples/cifar10_cnn_cudnn.py
index e3c5c49..d4f4b7c 100755
--- a/tool/python/examples/cifar10_cnn_cudnn.py
+++ b/tool/python/examples/cifar10_cnn_cudnn.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
+sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
from singa.model import *
from singa.datasets import cifar10
@@ -24,7 +24,7 @@ m.add(AvgPooling2D(pool_size=(3,3), stride=2))
m.add(Dense(10, w_wd=250, b_lr=2, b_wd=0, activation='softmax'))
-sgd = SGD(decay=0.004, lr_type='fixed', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
+sgd = SGD(decay=0.004, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
topo = Cluster(workspace)
m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/cifar10_cnn_cudnn_hybrid.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/cifar10_cnn_cudnn_hybrid.py b/tool/python/examples/cifar10_cnn_cudnn_hybrid.py
deleted file mode 100755
index f5e4c27..0000000
--- a/tool/python/examples/cifar10_cnn_cudnn_hybrid.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from singa.datasets import cifar10
-
-X_train, X_test, workspace = cifar10.load_data()
-
-m = Sequential('cifar10-cnn', sys.argv)
-
-m.add(Convolution2D(32, 5, 1, 2, w_std=0.0001, b_lr=2))
-m.add(MaxPooling2D(pool_size=(3,3), stride=2))
-m.add(Activation('relu'))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-m.add(Convolution2D(32, 5, 1, 2, b_lr=2))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size=(3,3), stride=2))
-m.add(LRN2D(3, alpha=0.00005, beta=0.75))
-
-m.add(Convolution2D(64, 5, 1, 2))
-m.add(Activation('relu'))
-m.add(AvgPooling2D(pool_size=(3,3), stride=2))
-
-m.add(Dense(10, w_wd=250, b_lr=2, b_wd=0, activation='softmax'))
-
-sgd = SGD(decay=0.004, lr_type='fixed', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
-topo = Cluster(workspace, nworkers_per_group=2, nworkers_per_procs=2)
-m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo)
-
-gpu_id = [0,1]
-m.fit(X_train, nb_epoch=10000, with_test=True, device=gpu_id)
-result = m.evaluate(X_test, test_steps=0, test_freq=200)
-
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/cifar10_cnn_parameter.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/cifar10_cnn_parameter.py b/tool/python/examples/cifar10_cnn_parameter.py
index dd03f5c..4144fa5 100755
--- a/tool/python/examples/cifar10_cnn_parameter.py
+++ b/tool/python/examples/cifar10_cnn_parameter.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
+sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
from singa.model import *
from singa.datasets import cifar10
@@ -27,7 +27,7 @@ m.add(AvgPooling2D(pool_size=(3,3), stride=2))
m.add(Dense(10, w_param=parw, w_wd=250, b_param=parb, b_lr=2, b_wd=0, activation='softmax'))
-sgd = SGD(decay=0.004, lr_type='fixed', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
+sgd = SGD(decay=0.004, lr_type='manual', step=(0,60000,65000), step_lr=(0.001,0.0001,0.00001))
topo = Cluster(workspace)
m.compile(loss='categorical_crossentropy', optimizer=sgd, cluster=topo)
m.fit(X_train, nb_epoch=100, with_test=True)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/datasets/__init__.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/datasets/__init__.py b/tool/python/examples/datasets/__init__.py
new file mode 100644
index 0000000..e69de29
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/datasets/cifar10.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/datasets/cifar10.py b/tool/python/examples/datasets/cifar10.py
new file mode 100644
index 0000000..65bcd60
--- /dev/null
+++ b/tool/python/examples/datasets/cifar10.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python
+from singa.model import *
+
+def load_data(
+ workspace = None,
+ backend = 'kvfile',
+ batchsize = 64,
+ random = 5000,
+ shape = (3, 32, 32),
+ std = 127.5,
+ mean = 127.5
+ ):
+
+ # using cifar10 dataset
+ data_dir = 'examples/cifar10'
+ path_train = data_dir + '/train_data.bin'
+ path_test = data_dir + '/test_data.bin'
+ path_mean = data_dir + '/image_mean.bin'
+ if workspace == None: workspace = data_dir
+
+ store = Store(path=path_train, mean_file=path_mean, backend=backend,
+ random_skip=random, batchsize=batchsize,
+ shape=shape)
+
+ data_train = Data(load='recordinput', phase='train', conf=store)
+
+ store = Store(path=path_test, mean_file=path_mean, backend=backend,
+ batchsize=batchsize,
+ shape=shape)
+
+ data_test = Data(load='recordinput', phase='test', conf=store)
+
+ return data_train, data_test, workspace
+
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/datasets/mnist.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/datasets/mnist.py b/tool/python/examples/datasets/mnist.py
new file mode 100644
index 0000000..c8695ec
--- /dev/null
+++ b/tool/python/examples/datasets/mnist.py
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+from singa.model import *
+
+def load_data(
+ workspace = None,
+ backend = 'kvfile',
+ nb_rbm = 0, # the number of layers for RBM and Autoencoder
+ checkpoint_steps = 0,
+ **pvalues
+ ):
+
+ # using mnist dataset
+ data_dir = 'examples/mnist'
+ path_train = data_dir + '/train_data.bin'
+ path_test = data_dir + '/test_data.bin'
+ if workspace == None: workspace = data_dir
+
+ # checkpoint path to load
+ checkpoint_list = None
+ if checkpoint_steps > 0:
+ workerid = 0
+ checkpoint_list = []
+ for i in range(nb_rbm-1, 0, -1):
+ checkpoint_list.append('examples/rbm/rbm{0}/checkpoint/step{1}-worker{2}'.format(str(i),checkpoint_steps,workerid))
+
+ store = Store(path=path_train, backend=backend, **pvalues)
+ data_train = Data(load='recordinput', phase='train', conf=store, checkpoint=checkpoint_list)
+
+ store = Store(path=path_test, backend=backend, **pvalues)
+ data_test = Data(load='recordinput', phase='test', conf=store)
+
+ return data_train, data_test, workspace
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/examples/rnnlm_usermodel.py
----------------------------------------------------------------------
diff --git a/tool/python/examples/rnnlm_usermodel.py b/tool/python/examples/rnnlm_usermodel.py
deleted file mode 100755
index 1b49321..0000000
--- a/tool/python/examples/rnnlm_usermodel.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python
-import sys, os
-sys.path.append(os.path.join(os.path.dirname(__file__),'..'))
-from singa.model import *
-from singa.datasets import rnnlm
-
-vocab_size = 3720
-
-X_train, X_valid, workspace = rnnlm.load_data()
-
-m = Sequential('rnnlm', sys.argv)
-
-parw = Parameter(init='uniform', range=0.3)
-m.add(Embedding(in_dim=vocab_size, out_dim=15, w_param=parw))
-m.add(RNNLM(1, w_param=parw))
-
-sgd = SGD(lr_type='fixed', step=(0,48810,56945,65080,73215), step_lr=(0.1,0.05,0.025,0.0125,0.00625))
-topo = Cluster(workspace)
-m.compile(loss='user_loss_rnnlm', in_dim=vocab_size, nclass=100, optimizer=sgd, cluster=topo)
-
-m.fit(X_train, validate=X_valid, validate_steps=683, nb_epoch=81350, execpath='examples/rnnlm/rnnlm.bin')
-#result = m.evaluate(X_valid, validate_steps=683, validate_freq=8135, execpath='examples/rnnlm/rnnlm.bin')
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa.py
----------------------------------------------------------------------
diff --git a/tool/python/singa.py b/tool/python/singa.py
index 6d7fbdf..986a6b8 100755
--- a/tool/python/singa.py
+++ b/tool/python/singa.py
@@ -31,14 +31,18 @@ import singa.driver as driver
from google.protobuf.text_format import Merge
if __name__ == '__main__':
- i = sys.argv.index("-conf")
- s = open(sys.argv[i+1], 'r').read()
- s = str(s)
- j = job_pb2.JobProto()
- Merge(s,j)
- b = j.SerializeToString()
- d = driver.Driver()
- d.InitLog(sys.argv[0])
- d.Init(sys.argv)
-# d.Train(False,b)
- d.Test(b)
+ """Invoke the training program using this python script.
+ ./bin/singa-run.sh -exec tool/python/singa.py -conf examples/cifar10/job.conf
+ """"
+
+ i = sys.argv.index("-conf")
+ s = open(sys.argv[i+1], 'r').read()
+ s = str(s)
+ j = job_pb2.JobProto()
+ Merge(s,j)
+ b = j.SerializeToString()
+ d = driver.Driver()
+ d.InitLog(sys.argv[0])
+ d.Init(sys.argv)
+ d.Train(False,b)
+ #d.Test(b)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa/datasets/__init__.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/datasets/__init__.py b/tool/python/singa/datasets/__init__.py
deleted file mode 100644
index e69de29..0000000
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa/datasets/cifar10.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/datasets/cifar10.py b/tool/python/singa/datasets/cifar10.py
deleted file mode 100644
index 65bcd60..0000000
--- a/tool/python/singa/datasets/cifar10.py
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/usr/bin/env python
-from singa.model import *
-
-def load_data(
- workspace = None,
- backend = 'kvfile',
- batchsize = 64,
- random = 5000,
- shape = (3, 32, 32),
- std = 127.5,
- mean = 127.5
- ):
-
- # using cifar10 dataset
- data_dir = 'examples/cifar10'
- path_train = data_dir + '/train_data.bin'
- path_test = data_dir + '/test_data.bin'
- path_mean = data_dir + '/image_mean.bin'
- if workspace == None: workspace = data_dir
-
- store = Store(path=path_train, mean_file=path_mean, backend=backend,
- random_skip=random, batchsize=batchsize,
- shape=shape)
-
- data_train = Data(load='recordinput', phase='train', conf=store)
-
- store = Store(path=path_test, mean_file=path_mean, backend=backend,
- batchsize=batchsize,
- shape=shape)
-
- data_test = Data(load='recordinput', phase='test', conf=store)
-
- return data_train, data_test, workspace
-
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa/datasets/mnist.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/datasets/mnist.py b/tool/python/singa/datasets/mnist.py
deleted file mode 100644
index c8695ec..0000000
--- a/tool/python/singa/datasets/mnist.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#!/usr/bin/env python
-from singa.model import *
-
-def load_data(
- workspace = None,
- backend = 'kvfile',
- nb_rbm = 0, # the number of layers for RBM and Autoencoder
- checkpoint_steps = 0,
- **pvalues
- ):
-
- # using mnist dataset
- data_dir = 'examples/mnist'
- path_train = data_dir + '/train_data.bin'
- path_test = data_dir + '/test_data.bin'
- if workspace == None: workspace = data_dir
-
- # checkpoint path to load
- checkpoint_list = None
- if checkpoint_steps > 0:
- workerid = 0
- checkpoint_list = []
- for i in range(nb_rbm-1, 0, -1):
- checkpoint_list.append('examples/rbm/rbm{0}/checkpoint/step{1}-worker{2}'.format(str(i),checkpoint_steps,workerid))
-
- store = Store(path=path_train, backend=backend, **pvalues)
- data_train = Data(load='recordinput', phase='train', conf=store, checkpoint=checkpoint_list)
-
- store = Store(path=path_test, backend=backend, **pvalues)
- data_test = Data(load='recordinput', phase='test', conf=store)
-
- return data_train, data_test, workspace
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa/datasets/rnnlm.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/datasets/rnnlm.py b/tool/python/singa/datasets/rnnlm.py
deleted file mode 100644
index ef8142a..0000000
--- a/tool/python/singa/datasets/rnnlm.py
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/usr/bin/env python
-from singa.model import *
-
-def load_data(
- workspace = 'examples/rnnlm',
- backend = 'kvfile',
- max_window = 10
- ):
-
- path_train = workspace + '/train_data.bin'
- path_valid = workspace + '/valid_data.bin'
- path_test = workspace + '/test_data.bin'
-
-
- data_train = Data(load='kData', phase='train', path=path_train, backend=backend, max_window=max_window)
-
- data_valid = Data(load='kData', phase='val', path=path_valid, max_window=max_window)
-
- return data_train, data_valid, workspace
-
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8914750e/tool/python/singa/model.py
----------------------------------------------------------------------
diff --git a/tool/python/singa/model.py b/tool/python/singa/model.py
index d68d143..29db70e 100644
--- a/tool/python/singa/model.py
+++ b/tool/python/singa/model.py
@@ -1,8 +1,8 @@
#!/usr/bin/env python
import sys, re, subprocess
from layer import *
-from utils.utility import *
-from utils.message import *
+from utils.utility import *
+from utils.message import *
from google.protobuf import text_format
class Model(object):
@@ -14,14 +14,14 @@ class Model(object):
argv // pass sys.argv to source
label = (bool) // exist label layer (depreciated)
'''
- self.jobconf = Message('Job', name=name).proto
+ self.jobconf = Message('Job', name=name).proto
self.layers = []
self.label = label
self.argv = argv
self.result = None
self.last_checkpoint_path = None
self.cudnn = False
-
+
def exist_datalayer(self, phase):
for ly in self.layers:
if enumPhase(phase) in ly.layer.include:
@@ -38,7 +38,7 @@ class Model(object):
topk = (int) // the number of results considered to compute accuracy
'''
assert optimizer != None, 'optimizer (Updater component) should be set'
- assert cluster != None, 'cluster (Cluster component) should be set'
+ assert cluster != None, 'cluster (Cluster component) should be set'
setval(self.jobconf, updater=optimizer.proto)
setval(self.jobconf, cluster=cluster.proto)
@@ -56,7 +56,7 @@ class Model(object):
# revise the last layer
if loss == 'categorical_crossentropy':
setval(ly, type=enumLayerType('softmaxloss'))
- setval(ly.softmaxloss_conf, topk=topk)
+ setval(ly.softmaxloss_conf, topk=topk)
elif loss == 'mean_squared_error':
setval(ly, type=enumLayerType('euclideanloss'))
else:
@@ -72,7 +72,7 @@ class Model(object):
'''
construct neuralnet proto
'''
- net = NetProto()
+ net = NetProto()
slyname = self.layers[0].layer.name
for i in range(len(self.layers)):
ly = net.layer.add()
@@ -95,7 +95,7 @@ class Model(object):
# deal with label layer (depreciated)
if self.label == True:
- label_layer = Layer(name='label', type=kLabel)
+ label_layer = Layer(name='label', type=kLabel)
ly = net.layer.add()
ly.CopyFrom(label_layer.layer)
getattr(ly, 'srclayers').append(self.layers[0].layer.name)
@@ -108,7 +108,7 @@ class Model(object):
# use of cudnn
if self.cudnn == True:
- self.setCudnnLayerType(net)
+ self.setCudnnLayerType(net)
setval(self.jobconf, neuralnet=net)
@@ -127,7 +127,7 @@ class Model(object):
batch_size = (int) // batch size for training data
train_steps = (int) // the number of steps for training, i.e., epoch
disp_freq = (int) // frequency to display training info
- disp_after = (int) // display after this number
+ disp_after = (int) // display after this number
validate_data = (Data) // validation data, specified in load_data()
validate_freq = (int) // frequency of validation
validate_steps = (int) // total number of steps for validation
@@ -143,7 +143,7 @@ class Model(object):
setval(data.layer.store_conf, batchsize=fields['batch_size'])
# insert layer for training
- if self.exist_datalayer('train') == False:
+ if self.exist_datalayer('train') == False:
self.layers.insert(0, data)
setval(self.jobconf, train_steps=nb_epoch)
setval(self.jobconf, disp_freq=nb_epoch/10)
@@ -163,8 +163,8 @@ class Model(object):
# save model parameter (i.e., checkpoint_path)
setval(self.jobconf, checkpoint_freq=nb_epoch)
self.last_checkpoint_path = '{0}/step{1}-worker0'.format(
- self.jobconf.cluster.workspace, nb_epoch)
-
+ self.jobconf.cluster.workspace, nb_epoch)
+
# set Train_one_batch component, using backprogapation at default
setval(self.jobconf, train_one_batch=Algorithm(type=enumAlgType(alg)).proto)
@@ -174,7 +174,7 @@ class Model(object):
self.cudnn = True
# start to run singa for training
- if with_test == False:
+ if with_test == False:
self.build() # construct Nneuralnet Component
#self.display()
return SingaRun(jobproto=self.jobconf, argv=self.argv, execpath=execpath)
@@ -191,13 +191,13 @@ class Model(object):
optional
alg = (string) // algorithm type, (backpropagation at default)
checkpoint_path = (list) // checkpoint path is necessary only for testing
- execpaths = (string) // path to user's own executable
+ execpaths = (string) // path to user's own executable
device = (int/list) // a list of gpu ids
**fields (KEY=VALUE)
batch_size = (int) // batch size for testing data
test_freq = (int) // frequency of testing
- test_steps = (int) // total number of steps for testing
- test_after = (int) // start testing after this number of steps
+ test_steps = (int) // total number of steps for testing
+ test_after = (int) // start testing after this number of steps
'''
assert data != None, 'Testing data should be set'
is_testonly = False
@@ -206,11 +206,11 @@ class Model(object):
setval(data.layer.store_conf, batchsize=fields['batch_size'])
# insert layer for testing
- if self.exist_datalayer('test') == False:
+ if self.exist_datalayer('test') == False:
self.layers.insert(0, data)
# loading checkpoint if singa runs only for testing
- if self.exist_datalayer('train') == False:
+ if self.exist_datalayer('train') == False:
is_testonly = True
if checkpoint_path == None:
print 'checkpoint_path has not been specified'
@@ -220,7 +220,7 @@ class Model(object):
steps = fields['test_steps'] if 'test_steps' in fields else 10
setval(self.jobconf, test_steps=steps)
setval(self.jobconf, **fields)
-
+
# set Train_one_batch component, using backprogapation at default
setval(self.jobconf, train_one_batch=Algorithm(type=enumAlgType(alg)).proto)
@@ -231,16 +231,16 @@ class Model(object):
self.build() # construct Nneuralnet Component
- #--- generate job.conf file for debug purpose
+ #--- generate job.conf file for debug purpose
#filename = 'job.conf'
#with open(filename, 'w') as f:
# f.write(text_format.MessageToString(self.jobconf.cluster))
#self.display()
- #--- run singa ---
+ #--- run singa ---
return SingaRun(jobproto=self.jobconf, argv=self.argv, execpath=execpath, testmode=is_testonly)
#return SingaRun_script(filename=filename, execpath=execpath)
-
+
def display(self):
''' print out job proto
@@ -260,13 +260,13 @@ class Model(object):
elif ly_type == kSoftmaxLoss: cudnn_ly_type = kCudnnSoftmaxLoss
elif ly_type == kSTanh:
cudnn_ly_type = kCudnnActivation
- net.layer[i].activation_conf.type = STANH
+ net.layer[i].activation_conf.type = STANH
elif ly_type == kSigmoid:
cudnn_ly_type = kCudnnActivation
- net.layer[i].activation_conf.type = SIGMOID
+ net.layer[i].activation_conf.type = SIGMOID
elif ly_type == kReLU:
cudnn_ly_type = kCudnnActivation
- net.layer[i].activation_conf.type = RELU
+ net.layer[i].activation_conf.type = RELU
net.layer[i].type = cudnn_ly_type
@@ -277,7 +277,7 @@ class Energy(Model):
def add(self, layer):
if hasattr(layer, 'layer_type'):
if layer.layer_type == kRBMVis:
- dim = 0
+ dim = 0
for i in range(1, len(layer.out_dim)):
parw = Parameter(name='w', init='none', level=i)
parb = Parameter(name='b', init='none', level=i)
@@ -293,7 +293,7 @@ class Sequential(Model):
def add(self, layer):
if hasattr(layer, 'layer_type'):
if layer.layer_type == 'AutoEncoder':
- dim = 0
+ dim = 0
if layer.param_share == True:
# Encoding
for i in range(1, len(layer.hid_dim)+1):
@@ -331,9 +331,9 @@ class Store(object):
'''
**kwargs
path = (string) // path to dataset
- backend = (string) //
+ backend = (string) //
batch_size = (int) // batch size of dataset
- shape = (int) //
+ shape = (int) //
'''
self.proto = Message('Store', **kwargs).proto
@@ -357,23 +357,23 @@ class Updater(object):
lr_type = (string) // type of the learning rate (Fixed at default)
'''
upd = Message('Updater', type=upd_type, **fields).proto
- setval(upd.learning_rate, base_lr=lr)
+ setval(upd.learning_rate, base_lr=lr)
if decay > 0:
- setval(upd, weight_decay=decay)
+ setval(upd, weight_decay=decay)
if momentum > 0:
- setval(upd, momentum=momentum)
+ setval(upd, momentum=momentum)
- if lr_type == None:
- setval(upd.learning_rate, type=kFixed)
+ if lr_type == None or lr_type == "fixed":
+ setval(upd.learning_rate, type=kFixed)
elif lr_type == 'step':
cp = Message('Step', change_freq=60, gamma=0.997)
- setval(upd.learning_rate, type=kStep, step_conf=cp.proto)
- elif lr_type == 'fixedstep':
+ setval(upd.learning_rate, type=kStep, step_conf=cp.proto)
+ elif lr_type == 'manual':
cp = Message('FixedStep', step=step, step_lr=step_lr)
- setval(upd.learning_rate, type=kFixedStep, fixedstep_conf=cp.proto)
+ setval(upd.learning_rate, type=kFixedStep, fixedstep_conf=cp.proto)
elif lr_type == 'linear':
cp = Message('Linear', change_freq=10, final_lr=0.1)
- setval(upd.learning_rate, type=kLinear, linear_conf=cp.proto)
+ setval(upd.learning_rate, type=kLinear, linear_conf=cp.proto)
self.proto = upd
@@ -422,6 +422,15 @@ class AdaGrad(Updater):
class Cluster(object):
+ """ Specify the cluster topology, e.g., number of workers/servers.
+
+ Currently we need to create this object in the .py file and also provide a
+ cluster configuration file to the command line. TODO(wangwei) update SINGA
+ code to eliminate the requirement of the cluster configuration file for
+ training on a single node or the cluster object in the pyfile for training
+ in a cluster.
+ """
+
def __init__(self, workspace=None,
nworker_groups=1, nserver_groups=1,
nworkers_per_group=1, nservers_per_group=1,
@@ -443,65 +452,78 @@ class Cluster(object):
assert workspace != None, 'need to set workspace'
self.proto = Message('Cluster', workspace=workspace).proto
# optional
- self.proto.nworker_groups = nworker_groups
- self.proto.nserver_groups = nserver_groups
- self.proto.nworkers_per_group = nworkers_per_group
- self.proto.nservers_per_group = nservers_per_group
- self.proto.nworkers_per_procs = nworkers_per_procs
- self.proto.nservers_per_procs = nservers_per_procs
+ self.proto.nworker_groups = nworker_groups
+ self.proto.nserver_groups = nserver_groups
+ self.proto.nworkers_per_group = nworkers_per_group
+ self.proto.nservers_per_group = nservers_per_group
+ self.proto.nworkers_per_procs = nworkers_per_procs
+ self.proto.nservers_per_procs = nservers_per_procs
# other fields
setval(self.proto, **fields)
def StoreResults(lines):
+ """ Parsing metrics from each line in the log file.
- resultDic = {}
+ TODO(wangwei) format the log string to make them uniform for easy parsing
+ Another approach is creating a protobuf message for metrics, which can be
+ used for dumping metrics to string and loading perf string back to messages.
+ """
+
+ resultDic = {}
for line in lines:
line = re.findall(r'[\w|*.*]+', line)
if 'Train' in line:
step = line[line.index('step')+1]
if 'accuracy' in line:
- resultDic.setdefault(step,{})['acc'] = line[line.index('accuracy')+1]
+ resultDic.setdefault(step,{})['acc'] = line[line.index('accuracy')+1]
if 'loss' in line:
- resultDic.setdefault(step,{})['loss'] = line[line.index('loss')+1]
+ resultDic.setdefault(step,{})['loss'] = line[line.index('loss')+1]
if 'ppl' in line:
- resultDic.setdefault(step,{})['ppl'] = line[line.index('ppl')+1]
+ resultDic.setdefault(step,{})['ppl'] = line[line.index('ppl')+1]
if 'Squared' in line:
- resultDic.setdefault(step,{})['se'] = line[line.index('Squared')+2]
+ resultDic.setdefault(step,{})['se'] = line[line.index('Squared')+2]
return resultDic
def SingaRun(jobproto='', argv=[], execpath='', testmode=False):
import singa.driver as driver
d = driver.Driver()
- d.InitLog(argv[0])
+ d.InitLog(argv[0])
d.Init(argv)
if testmode == True:
d.Test(jobproto.SerializeToString())
else:
d.Train(False, jobproto.SerializeToString())
+ # Get the performance from the latest log file.
+ # TODO(wangwei) the log file would be overwritten by other running instance of
+ # the same program, e.g., lt-singa
logfile = '/tmp/singa-log/{0}.ERROR'.format(argv[0].split('/')[-1])
fin = open(logfile, 'r')
result = StoreResults(fin.readlines())
-
+
return result
def SingaRun_script(filename='', execpath=''):
+ """
+ Deprecated.
+ Generate the job conf file and run the shell command.
+ """
SINGAROOT = '../../../'
conf = 'examples/' + filename
if execpath=='':
cmd = SINGAROOT+'bin/singa-run.sh ' \
- + '-conf %s ' % conf
+ + '-conf %s ' % conf
else:
cmd = SINGAROOT+'bin/singa-run.sh ' \
+ '-conf %s ' % conf \
- + '-exec %s ' % execpath
+ + '-exec %s ' % execpath
procs = subprocess.Popen(cmd.strip().split(' '), stdout = subprocess.PIPE, stderr = subprocess.STDOUT)
- resultDic = {}
+ resultDic = {}
outputlines = iter(procs.stdout.readline, '')
resultDic = StoreResults(outputlines)