You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by sk...@apache.org on 2019/02/07 17:46:03 UTC
[incubator-mxnet] branch master updated: hybridize rnn and add
model graph (#13244)
This is an automated email from the ASF dual-hosted git repository.
skm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 506a25c hybridize rnn and add model graph (#13244)
506a25c is described below
commit 506a25c71cf1d861c957b61e5ab7f142ac094d5a
Author: yifeim <ym...@yma.io>
AuthorDate: Thu Feb 7 09:45:37 2019 -0800
hybridize rnn and add model graph (#13244)
* hybridize rnn and add model graph
* trigger CI
* separate mxboard visualization
* add options and she-bang
* add defaults
* trigger CI
* rename export-model
---
example/gluon/word_language_model/README.md | 26 +++++++++++++++--
example/gluon/word_language_model/model-graph.png | Bin 0 -> 126048 bytes
example/gluon/word_language_model/model.py | 4 +--
example/gluon/word_language_model/train.py | 33 ++++++++++++++++++++--
4 files changed, 56 insertions(+), 7 deletions(-)
diff --git a/example/gluon/word_language_model/README.md b/example/gluon/word_language_model/README.md
index 43d173b..4a77950 100644
--- a/example/gluon/word_language_model/README.md
+++ b/example/gluon/word_language_model/README.md
@@ -28,7 +28,9 @@ python train.py --cuda --tied --nhid 650 --emsize 650 --epochs 40 --dropout 0.5
```
python train.py --cuda --tied --nhid 1500 --emsize 1500 --epochs 60 --dropout 0.65 # Test ppl of 88.42
```
-
+```
+python train.py --export-model # hybridize and export model graph. See below for visualization options.
+```
<br>
@@ -38,7 +40,8 @@ usage: train.py [-h] [--model MODEL] [--emsize EMSIZE] [--nhid NHID]
[--nlayers NLAYERS] [--lr LR] [--clip CLIP] [--epochs EPOCHS]
[--batch_size N] [--bptt BPTT] [--dropout DROPOUT] [--tied]
[--cuda] [--log-interval N] [--save SAVE] [--gctype GCTYPE]
- [--gcthreshold GCTHRESHOLD]
+ [--gcthreshold GCTHRESHOLD] [--hybridize] [--static-alloc]
+ [--static-shape] [--export-model]
MXNet Autograd RNN/LSTM Language Model on Wikitext-2.
@@ -62,4 +65,23 @@ optional arguments:
`none` for now.
--gcthreshold GCTHRESHOLD
threshold for 2bit gradient compression
+ --hybridize whether to hybridize in mxnet>=1.3 (default=False)
+ --static-alloc whether to use static-alloc hybridize in mxnet>=1.3
+ (default=False)
+ --static-shape whether to use static-shape hybridize in mxnet>=1.3
+ (default=False)
+ --export-model export a symbol graph and exit (default=False)
+```
+
+You may visualize the graph with `mxnet.viz.plot_network` without any additional dependencies. Alternatively, if [mxboard](https://github.com/awslabs/mxboard) is installed, use the following approach for interactive visualization.
+```python
+#!python
+import mxnet, mxboard
+with mxboard.SummaryWriter(logdir='./model-graph') as sw:
+ sw.add_graph(mxnet.sym.load('./model-symbol.json'))
+```
+```bash
+#!/bin/bash
+tensorboard --logdir=./model-graph/
```
+![model graph](./model-graph.png?raw=true "rnn model graph")
diff --git a/example/gluon/word_language_model/model-graph.png b/example/gluon/word_language_model/model-graph.png
new file mode 100644
index 0000000..c621518
Binary files /dev/null and b/example/gluon/word_language_model/model-graph.png differ
diff --git a/example/gluon/word_language_model/model.py b/example/gluon/word_language_model/model.py
index a810c41..ec6e700 100644
--- a/example/gluon/word_language_model/model.py
+++ b/example/gluon/word_language_model/model.py
@@ -19,7 +19,7 @@ import mxnet as mx
from mxnet import gluon
from mxnet.gluon import nn, rnn
-class RNNModel(gluon.Block):
+class RNNModel(gluon.HybridBlock):
"""A model with an encoder, recurrent layer, and a decoder."""
def __init__(self, mode, vocab_size, num_embed, num_hidden,
@@ -53,7 +53,7 @@ class RNNModel(gluon.Block):
self.num_hidden = num_hidden
- def forward(self, inputs, hidden):
+ def hybrid_forward(self, F, inputs, hidden):
emb = self.drop(self.encoder(inputs))
output, hidden = self.rnn(emb, hidden)
output = self.drop(output)
diff --git a/example/gluon/word_language_model/train.py b/example/gluon/word_language_model/train.py
index 7f0a916..d08c07e 100644
--- a/example/gluon/word_language_model/train.py
+++ b/example/gluon/word_language_model/train.py
@@ -58,6 +58,14 @@ parser.add_argument('--gctype', type=str, default='none',
takes `2bit` or `none` for now.')
parser.add_argument('--gcthreshold', type=float, default=0.5,
help='threshold for 2bit gradient compression')
+parser.add_argument('--hybridize', action='store_true',
+ help='whether to hybridize in mxnet>=1.3 (default=False)')
+parser.add_argument('--static-alloc', action='store_true',
+ help='whether to use static-alloc hybridize in mxnet>=1.3 (default=False)')
+parser.add_argument('--static-shape', action='store_true',
+ help='whether to use static-shape hybridize in mxnet>=1.3 (default=False)')
+parser.add_argument('--export-model', action='store_true',
+ help='export a symbol graph and exit (default=False)')
args = parser.parse_args()
print(args)
@@ -72,6 +80,15 @@ if args.cuda:
else:
context = mx.cpu(0)
+if args.export_model:
+ args.hybridize = True
+
+# optional parameters only for mxnet >= 1.3
+hybridize_optional = dict(filter(lambda kv:kv[1],
+ {'static_alloc':args.static_alloc, 'static_shape':args.static_shape}.items()))
+if args.hybridize:
+ print('hybridize_optional', hybridize_optional)
+
dirname = './data'
dirname = os.path.expanduser(dirname)
if not os.path.exists(dirname):
@@ -114,6 +131,8 @@ test_data = gluon.data.DataLoader(test_dataset,
ntokens = len(vocab)
model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
args.nlayers, args.dropout, args.tied)
+if args.hybridize:
+ model.hybridize(**hybridize_optional)
model.initialize(mx.init.Xavier(), ctx=context)
compression_params = None if args.gctype == 'none' else {'type': args.gctype, 'threshold': args.gcthreshold}
@@ -123,6 +142,8 @@ trainer = gluon.Trainer(model.collect_params(), 'sgd',
'wd': 0},
compression_params=compression_params)
loss = gluon.loss.SoftmaxCrossEntropyLoss()
+if args.hybridize:
+ loss.hybridize(**hybridize_optional)
###############################################################################
# Training code
@@ -177,6 +198,10 @@ def train():
epoch, i, cur_L, math.exp(cur_L)))
total_L = 0.0
+ if args.export_model:
+ model.export('model')
+ return
+
val_L = eval(val_data)
print('[Epoch %d] time cost %.2fs, valid loss %.2f, valid ppl %.2f'%(
@@ -193,6 +218,8 @@ def train():
if __name__ == '__main__':
train()
- model.load_parameters(args.save, context)
- test_L = eval(test_data)
- print('Best test loss %.2f, test ppl %.2f'%(test_L, math.exp(test_L)))
+ if not args.export_model:
+ model.load_parameters(args.save, context)
+ test_L = eval(test_data)
+ print('Best test loss %.2f, test ppl %.2f'%(test_L, math.exp(test_L)))
+