You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ta...@apache.org on 2019/01/11 14:06:02 UTC
[incubator-mxnet] branch master updated: Code modification for
testcases of various network models in directory example (#12498)
This is an automated email from the ASF dual-hosted git repository.
taolv pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new a6ed619 Code modification for testcases of various network models in directory example (#12498)
a6ed619 is described below
commit a6ed6194e5b006ead69f79ad575998b3754ae6fe
Author: Luobao <lu...@intel.com>
AuthorDate: Fri Jan 11 22:05:41 2019 +0800
Code modification for testcases of various network models in directory example (#12498)
* example testcase modified
* rcnn file add
* license add
* license init
* CI test trigger
* rcnn modify give up
* trigger
* modify for better user experience
* change the default parameter to xpu=None
* Update bdk_demo.py
* Update fcn_xs.py
* Update test.py
* Update train.py
* Update bdk_demo.py
* Update bdk_demo.py
* modify review comments
* refine
* modify Readmes according to the changed code.
* finetune READMEs
* re-trigger ci
* re-trigger ci twice
---
example/bayesian-methods/README.md | 24 ++++++++++++
example/bayesian-methods/bdk_demo.py | 73 ++++++++++++++++++------------------
example/fcn-xs/README.md | 27 +++++++++++--
example/fcn-xs/fcn_xs.py | 4 +-
example/rcnn/README.md | 2 +-
example/rcnn/demo.py | 2 +-
example/rcnn/test.py | 4 +-
example/rcnn/train.py | 4 +-
8 files changed, 93 insertions(+), 47 deletions(-)
diff --git a/example/bayesian-methods/README.md b/example/bayesian-methods/README.md
index ec9e8be..fc35b94 100644
--- a/example/bayesian-methods/README.md
+++ b/example/bayesian-methods/README.md
@@ -11,3 +11,27 @@ and *Bayesian Dark Knowledge (BDK)* [<cite>(Balan, Rathod, Murphy and Welling, 2
**bdk.ipynb** shows how to use MXNet to implement the DistilledSGLD algorithm in Bayesian Dark Knowledge.
**bdk_demo.py** contains scripts (more than the notebook) related to Bayesian Dark Knowledge. Use `python bdk_demo.py -d 1 -l 2 -t 50000` to run classification on MNIST.
+
+View parameters we can use with the following command.
+
+```shell
+python bdk_demo.py -h
+
+
+usage: bdk_demo.py [-h] [-d DATASET] [-l ALGORITHM] [-t TRAINING] [--gpu GPU]
+
+Examples in the paper [NIPS2015]Bayesian Dark Knowledge and [ICML2011]Bayesian
+Learning via Stochastic Gradient Langevin Dynamics
+
+optional arguments:
+ -h, --help show this help message and exit
+ -d DATASET, --dataset DATASET
+ Dataset to use. 0 --> TOY, 1 --> MNIST, 2 -->
+ Synthetic Data in the SGLD paper
+ -l ALGORITHM, --algorithm ALGORITHM
+ Type of algorithm to use. 0 --> SGD, 1 --> SGLD,
+ other-->DistilledSGLD
+ -t TRAINING, --training TRAINING
+ Number of training samples
+ --gpu GPU 0 to use GPU, not set to use CPU
+```
diff --git a/example/bayesian-methods/bdk_demo.py b/example/bayesian-methods/bdk_demo.py
index 145dac1..cd39bfd 100644
--- a/example/bayesian-methods/bdk_demo.py
+++ b/example/bayesian-methods/bdk_demo.py
@@ -156,34 +156,34 @@ def get_toy_sym(teacher=True, teacher_noise_precision=None):
return net
-def dev():
- return mx.gpu()
+def dev(gpu_id=None):
+ return mx.gpu(gpu_id) if gpu_id else mx.cpu()
-def run_mnist_SGD(training_num=50000):
+def run_mnist_SGD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
- data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
- 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
+ data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
+ 'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))}
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
- exe, exe_params, _ = SGD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
+ exe, exe_params, _ = SGD(sym=net, dev=dev(gpu_id), data_inputs=data_inputs, X=X, Y=Y,
X_test=X_test, Y_test=Y_test,
total_iter_num=1000000,
initializer=initializer,
lr=5E-6, prior_precision=1.0, minibatch_size=100)
-def run_mnist_SGLD(training_num=50000):
+def run_mnist_SGLD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
minibatch_size = 100
net = get_mnist_sym()
data_shape = (minibatch_size,) + X.shape[1::]
- data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
- 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
+ data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
+ 'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))}
initializer = mx.init.Xavier(factor_type="in", magnitude=2.34)
- exe, sample_pool = SGLD(sym=net, dev=dev(), data_inputs=data_inputs, X=X, Y=Y,
+ exe, sample_pool = SGLD(sym=net, dev=dev(gpu_id), data_inputs=data_inputs, X=X, Y=Y,
X_test=X_test, Y_test=Y_test,
total_iter_num=1000000,
initializer=initializer,
@@ -191,7 +191,7 @@ def run_mnist_SGLD(training_num=50000):
thin_interval=100, burn_in_iter_num=1000)
-def run_mnist_DistilledSGLD(training_num=50000):
+def run_mnist_DistilledSGLD(training_num=50000, gpu_id=None):
X, Y, X_test, Y_test = load_mnist(training_num)
minibatch_size = 100
if training_num >= 10000:
@@ -214,10 +214,10 @@ def run_mnist_DistilledSGLD(training_num=50000):
logsoftmax = LogSoftmax()
student_net = get_mnist_sym(output_op=logsoftmax, num_hidden=num_hidden)
data_shape = (minibatch_size,) + X.shape[1::]
- teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
- 'softmax_label': nd.zeros((minibatch_size,), ctx=dev())}
- student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
- 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
+ teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
+ 'softmax_label': nd.zeros((minibatch_size,), ctx=dev(gpu_id))}
+ student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
+ 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev(gpu_id))}
teacher_initializer = BiasXavier(factor_type="in", magnitude=1)
student_initializer = BiasXavier(factor_type="in", magnitude=1)
student_exe, student_params, _ = \
@@ -231,17 +231,17 @@ def run_mnist_DistilledSGLD(training_num=50000):
teacher_learning_rate=teacher_learning_rate,
student_learning_rate=student_learning_rate,
teacher_prior_precision=teacher_prior, student_prior_precision=student_prior,
- perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev())
+ perturb_deviation=perturb_deviation, minibatch_size=100, dev=dev(gpu_id))
-def run_toy_SGLD():
+def run_toy_SGLD(gpu_id=None):
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0 / 9.0
net = get_toy_sym(True, teacher_noise_precision)
data_shape = (minibatch_size,) + X.shape[1::]
- data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
- 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
+ data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
+ 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
initializer = mx.init.Uniform(0.07)
exe, params, _ = \
SGLD(sym=net, data_inputs=data_inputs,
@@ -253,20 +253,20 @@ def run_toy_SGLD():
burn_in_iter_num=1000,
thin_interval=10,
task='regression',
- minibatch_size=minibatch_size, dev=dev())
+ minibatch_size=minibatch_size, dev=dev(gpu_id))
-def run_toy_DistilledSGLD():
+def run_toy_DistilledSGLD(gpu_id=None):
X, Y, X_test, Y_test = load_toy()
minibatch_size = 1
teacher_noise_precision = 1.0
teacher_net = get_toy_sym(True, teacher_noise_precision)
student_net = get_toy_sym(False)
data_shape = (minibatch_size,) + X.shape[1::]
- teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
- 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
- student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev())}
- # 'softmax_label': nd.zeros((minibatch_size, 10), ctx=dev())}
+ teacher_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
+ 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
+ student_data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id))}
+
teacher_initializer = mx.init.Uniform(0.07)
student_initializer = mx.init.Uniform(0.07)
student_grad_f = lambda student_outputs, teacher_pred: \
@@ -284,21 +284,21 @@ def run_toy_DistilledSGLD():
student_grad_f=student_grad_f,
teacher_prior_precision=0.1, student_prior_precision=0.001,
perturb_deviation=0.1, minibatch_size=minibatch_size, task='regression',
- dev=dev())
+ dev=dev(gpu_id))
-def run_toy_HMC():
+def run_toy_HMC(gpu_id=None):
X, Y, X_test, Y_test = load_toy()
minibatch_size = Y.shape[0]
noise_precision = 1 / 9.0
net = get_toy_sym(True, noise_precision)
data_shape = (minibatch_size,) + X.shape[1::]
- data_inputs = {'data': nd.zeros(data_shape, ctx=dev()),
- 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev())}
+ data_inputs = {'data': nd.zeros(data_shape, ctx=dev(gpu_id)),
+ 'teacher_output_label': nd.zeros((minibatch_size, 1), ctx=dev(gpu_id))}
initializer = mx.init.Uniform(0.07)
sample_pool = HMC(net, data_inputs=data_inputs, X=X, Y=Y, X_test=X_test, Y_test=Y_test,
sample_num=300000, initializer=initializer, prior_precision=1.0,
- learning_rate=1E-3, L=10, dev=dev())
+ learning_rate=1E-3, L=10, dev=dev(gpu_id))
def run_synthetic_SGLD():
@@ -350,21 +350,22 @@ if __name__ == '__main__':
help="Type of algorithm to use. 0 --> SGD, 1 --> SGLD, other-->DistilledSGLD")
parser.add_argument("-t", "--training", type=int, default=50000,
help="Number of training samples")
+ parser.add_argument("--gpu", type=int, help="0 to use GPU, not set to use CPU")
args = parser.parse_args()
training_num = args.training
if args.dataset == 1:
if 0 == args.algorithm:
- run_mnist_SGD(training_num)
+ run_mnist_SGD(training_num, gpu_id=args.gpu)
elif 1 == args.algorithm:
- run_mnist_SGLD(training_num)
+ run_mnist_SGLD(training_num, gpu_id=args.gpu)
else:
- run_mnist_DistilledSGLD(training_num)
+ run_mnist_DistilledSGLD(training_num, gpu_id=args.gpu)
elif args.dataset == 0:
if 1 == args.algorithm:
- run_toy_SGLD()
+ run_toy_SGLD(gpu_id=args.gpu)
elif 2 == args.algorithm:
- run_toy_DistilledSGLD()
+ run_toy_DistilledSGLD(gpu_id=args.gpu)
elif 3 == args.algorithm:
- run_toy_HMC()
+ run_toy_HMC(gpu_id=args.gpu)
else:
run_synthetic_SGLD()
diff --git a/example/fcn-xs/README.md b/example/fcn-xs/README.md
index 145aa31..49c57fc 100644
--- a/example/fcn-xs/README.md
+++ b/example/fcn-xs/README.md
@@ -40,14 +40,33 @@ this is the fully convolution style of the origin
Once you completed all these steps, your working directory should contain a ```.\VOC2012``` directory, which contains the following: ```JPEGImages folder```, ```SegmentationClass folder```, ```train.lst```, ```val.lst```
#### Step 3: Train the fcn-xs model
-* Based on your hardware, configure GPU or CPU for training in `fcn_xs.py`. It is recommended to use GPU due to the computational complexity and data load.
-```python
-# ctx = mx.cpu(0)
-ctx = mx.gpu(0)
+* Based on your hardware, configure CPU or GPU for training by parameter ```--gpu```. It is recommended to use GPU due to the computational complexity and data load.
+View parameters we can use with the following command.
+```shell
+python fcn_xs.py -h
+
+
+usage: fcn_xs.py [-h] [--model MODEL] [--prefix PREFIX] [--epoch EPOCH]
+ [--init-type INIT_TYPE] [--retrain] [--gpu GPU]
+
+Convert vgg16 model to vgg16fc model.
+
+optional arguments:
+ -h, --help show this help message and exit
+ --model MODEL The type of fcn-xs model, e.g. fcnxs, fcn16s, fcn8s.
+ --prefix PREFIX The prefix(include path) of vgg16 model with mxnet
+ format.
+ --epoch EPOCH The epoch number of vgg16 model.
+ --init-type INIT_TYPE
+ the init type of fcn-xs model, e.g. vgg16, fcnxs
+ --retrain true means continue training.
+ --gpu GPU 0 to use GPU, not set to use CPU
```
+
* It is recommended to train fcn-32s and fcn-16s before training the fcn-8s model
To train the fcn-32s model, run the following:
+
```shell
python -u fcn_xs.py --model=fcn32s --prefix=VGG_FC_ILSVRC_16_layers --epoch=74 --init-type=vgg16
```
diff --git a/example/fcn-xs/fcn_xs.py b/example/fcn-xs/fcn_xs.py
index 53244a1..5b799f3 100644
--- a/example/fcn-xs/fcn_xs.py
+++ b/example/fcn-xs/fcn_xs.py
@@ -28,9 +28,10 @@ from solver import Solver
logger = logging.getLogger()
logger.setLevel(logging.INFO)
-ctx = mx.gpu(0)
+
def main():
+ ctx = mx.cpu() if not args.gpu else mx.gpu(args.gpu)
fcnxs = symbol_fcnxs.get_fcn32s_symbol(numclass=21, workspace_default=1536)
fcnxs_model_prefix = "model_pascal/FCN32s_VGG16"
if args.model == "fcn16s":
@@ -85,6 +86,7 @@ if __name__ == "__main__":
help='the init type of fcn-xs model, e.g. vgg16, fcnxs')
parser.add_argument('--retrain', action='store_true', default=False,
help='true means continue training.')
+ parser.add_argument("--gpu", type=int, help="0 to use GPU, not set to use CPU")
args = parser.parse_args()
logging.info(args)
main()
diff --git a/example/rcnn/README.md b/example/rcnn/README.md
index b528418..5e6127c 100644
--- a/example/rcnn/README.md
+++ b/example/rcnn/README.md
@@ -9,7 +9,7 @@ For a gluon imperative version, checkout https://github.com/dmlc/gluon-cv.
### Out-of-box inference models
Download any of the following models to the current directory and run `python3 demo.py --dataset $Dataset$ --network $Network$ --params $MODEL_FILE$ --image $YOUR_IMAGE$` to get single image inference.
-For example `python3 demo.py --dataset voc --network vgg16 --params vgg16_voc0712.params --image myimage.jpg`, add `--gpu 0` to use GPU optionally.
+For example `python3 demo.py --dataset voc --network vgg16 --params vgg16_voc0712.params --image myimage.jpg`, add `--gpu 0` to use GPU, not set to use CPU.
Different network has different configuration. Different dataset has different object class names. You must pass them explicitly as command line arguments.
| Network | Dataset | Imageset | Reference | Result | Link |
diff --git a/example/rcnn/demo.py b/example/rcnn/demo.py
index 2315bb8..b0a4ddb 100644
--- a/example/rcnn/demo.py
+++ b/example/rcnn/demo.py
@@ -92,7 +92,7 @@ def parse_args():
parser.add_argument('--params', type=str, default='', help='path to trained model')
parser.add_argument('--dataset', type=str, default='voc', help='training dataset')
parser.add_argument('--image', type=str, default='', help='path to test image')
- parser.add_argument('--gpu', type=str, default='', help='gpu device eg. 0')
+ parser.add_argument('--gpu', type=str, default='', help='GPU devices, eg."0,1,2,3" , not set to use CPU.')
parser.add_argument('--vis', action='store_true', help='display results')
parser.add_argument('--vis-thresh', type=float, default=0.7, help='threshold display boxes')
# faster rcnn params
diff --git a/example/rcnn/test.py b/example/rcnn/test.py
index 3c047d2..e964c90 100644
--- a/example/rcnn/test.py
+++ b/example/rcnn/test.py
@@ -35,7 +35,7 @@ def test_net(sym, imdb, args):
logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))
# setup context
- ctx = mx.gpu(args.gpu)
+ ctx = mx.cpu() if not args.gpu else mx.gpu(args.gpu)
# load testing data
test_data = TestLoader(imdb.roidb, batch_size=1, short=args.img_short_side, max_size=args.img_long_side,
@@ -94,7 +94,7 @@ def parse_args():
parser.add_argument('--params', type=str, default='', help='path to trained model')
parser.add_argument('--dataset', type=str, default='voc', help='training dataset')
parser.add_argument('--imageset', type=str, default='', help='imageset splits')
- parser.add_argument('--gpu', type=int, default=0, help='gpu device eg. 0')
+ parser.add_argument('--gpu', type=int, default=0, help='0 to use GPU, not set to use CPU')
# faster rcnn params
parser.add_argument('--img-short-side', type=int, default=600)
parser.add_argument('--img-long-side', type=int, default=1000)
diff --git a/example/rcnn/train.py b/example/rcnn/train.py
index 0739069..7b1f2f7 100644
--- a/example/rcnn/train.py
+++ b/example/rcnn/train.py
@@ -33,7 +33,7 @@ def train_net(sym, roidb, args):
logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))
# setup multi-gpu
- ctx = [mx.gpu(int(i)) for i in args.gpus.split(',')]
+ ctx = [mx.cpu()] if not args.gpus else [mx.gpu(int(i)) for i in args.gpus.split(',')]
batch_size = args.rcnn_batch_size * len(ctx)
# load training data
@@ -127,7 +127,7 @@ def parse_args():
parser.add_argument('--pretrained', type=str, default='', help='path to pretrained model')
parser.add_argument('--dataset', type=str, default='voc', help='training dataset')
parser.add_argument('--imageset', type=str, default='', help='imageset splits')
- parser.add_argument('--gpus', type=str, default='0', help='gpu devices eg. 0,1')
+ parser.add_argument('--gpus', type=str, help='GPU devices, eg: "0,1,2,3" , not set to use CPU')
parser.add_argument('--epochs', type=int, default=10, help='training epochs')
parser.add_argument('--lr', type=float, default=0.001, help='base learning rate')
parser.add_argument('--lr-decay-epoch', type=str, default='7', help='epoch to decay lr')