You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by sk...@apache.org on 2019/02/07 17:46:03 UTC

[incubator-mxnet] branch master updated: hybridize rnn and add model graph (#13244)

This is an automated email from the ASF dual-hosted git repository.

skm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 506a25c  hybridize rnn and add model graph (#13244)
506a25c is described below

commit 506a25c71cf1d861c957b61e5ab7f142ac094d5a
Author: yifeim <ym...@yma.io>
AuthorDate: Thu Feb 7 09:45:37 2019 -0800

    hybridize rnn and add model graph (#13244)
    
    * hybridize rnn and add model graph
    
    * trigger CI
    
    * separate mxboard visualization
    
    * add options and she-bang
    
    * add defaults
    
    * trigger CI
    
    * rename export-model
---
 example/gluon/word_language_model/README.md       |  26 +++++++++++++++--
 example/gluon/word_language_model/model-graph.png | Bin 0 -> 126048 bytes
 example/gluon/word_language_model/model.py        |   4 +--
 example/gluon/word_language_model/train.py        |  33 ++++++++++++++++++++--
 4 files changed, 56 insertions(+), 7 deletions(-)

diff --git a/example/gluon/word_language_model/README.md b/example/gluon/word_language_model/README.md
index 43d173b..4a77950 100644
--- a/example/gluon/word_language_model/README.md
+++ b/example/gluon/word_language_model/README.md
@@ -28,7 +28,9 @@ python train.py --cuda --tied --nhid 650 --emsize 650 --epochs 40  --dropout 0.5
 ```
 python train.py --cuda --tied --nhid 1500 --emsize 1500 --epochs 60  --dropout 0.65     # Test ppl of 88.42
 ```
-
+```
+python train.py --export-model # hybridize and export model graph. See below for visualization options.
+```
 
 <br>
 
@@ -38,7 +40,8 @@ usage: train.py [-h] [--model MODEL] [--emsize EMSIZE] [--nhid NHID]
                 [--nlayers NLAYERS] [--lr LR] [--clip CLIP] [--epochs EPOCHS]
                 [--batch_size N] [--bptt BPTT] [--dropout DROPOUT] [--tied]
                 [--cuda] [--log-interval N] [--save SAVE] [--gctype GCTYPE]
-                [--gcthreshold GCTHRESHOLD]
+                [--gcthreshold GCTHRESHOLD] [--hybridize] [--static-alloc]
+                [--static-shape] [--export-model]
 
 MXNet Autograd RNN/LSTM Language Model on Wikitext-2.
 
@@ -62,4 +65,23 @@ optional arguments:
                         `none` for now.
   --gcthreshold GCTHRESHOLD
                         threshold for 2bit gradient compression
+  --hybridize           whether to hybridize in mxnet>=1.3 (default=False)
+  --static-alloc        whether to use static-alloc hybridize in mxnet>=1.3
+                        (default=False)
+  --static-shape        whether to use static-shape hybridize in mxnet>=1.3
+                        (default=False)
+  --export-model        export a symbol graph and exit (default=False)
+```
+
+You may visualize the graph with `mxnet.viz.plot_network` without any additional dependencies. Alternatively, if [mxboard](https://github.com/awslabs/mxboard) is installed, use the following approach for interactive visualization.
+```python
+#!python
+import mxnet, mxboard
+with mxboard.SummaryWriter(logdir='./model-graph') as sw:
+    sw.add_graph(mxnet.sym.load('./model-symbol.json'))
+```
+```bash
+#!/bin/bash
+tensorboard --logdir=./model-graph/
 ```
+![model graph](./model-graph.png?raw=true "rnn model graph")
diff --git a/example/gluon/word_language_model/model-graph.png b/example/gluon/word_language_model/model-graph.png
new file mode 100644
index 0000000..c621518
Binary files /dev/null and b/example/gluon/word_language_model/model-graph.png differ
diff --git a/example/gluon/word_language_model/model.py b/example/gluon/word_language_model/model.py
index a810c41..ec6e700 100644
--- a/example/gluon/word_language_model/model.py
+++ b/example/gluon/word_language_model/model.py
@@ -19,7 +19,7 @@ import mxnet as mx
 from mxnet import gluon
 from mxnet.gluon import nn, rnn
 
-class RNNModel(gluon.Block):
+class RNNModel(gluon.HybridBlock):
     """A model with an encoder, recurrent layer, and a decoder."""
 
     def __init__(self, mode, vocab_size, num_embed, num_hidden,
@@ -53,7 +53,7 @@ class RNNModel(gluon.Block):
 
             self.num_hidden = num_hidden
 
-    def forward(self, inputs, hidden):
+    def hybrid_forward(self, F, inputs, hidden):
         emb = self.drop(self.encoder(inputs))
         output, hidden = self.rnn(emb, hidden)
         output = self.drop(output)
diff --git a/example/gluon/word_language_model/train.py b/example/gluon/word_language_model/train.py
index 7f0a916..d08c07e 100644
--- a/example/gluon/word_language_model/train.py
+++ b/example/gluon/word_language_model/train.py
@@ -58,6 +58,14 @@ parser.add_argument('--gctype', type=str, default='none',
                           takes `2bit` or `none` for now.')
 parser.add_argument('--gcthreshold', type=float, default=0.5,
                     help='threshold for 2bit gradient compression')
+parser.add_argument('--hybridize', action='store_true',
+                    help='whether to hybridize in mxnet>=1.3 (default=False)')
+parser.add_argument('--static-alloc', action='store_true',
+                    help='whether to use static-alloc hybridize in mxnet>=1.3 (default=False)')
+parser.add_argument('--static-shape', action='store_true',
+                    help='whether to use static-shape hybridize in mxnet>=1.3 (default=False)')
+parser.add_argument('--export-model', action='store_true',
+                    help='export a symbol graph and exit (default=False)')
 args = parser.parse_args()
 
 print(args)
@@ -72,6 +80,15 @@ if args.cuda:
 else:
     context = mx.cpu(0)
 
+if args.export_model:
+    args.hybridize = True
+
+# optional parameters only for mxnet >= 1.3
+hybridize_optional = dict(filter(lambda kv:kv[1],
+    {'static_alloc':args.static_alloc, 'static_shape':args.static_shape}.items()))
+if args.hybridize:
+    print('hybridize_optional', hybridize_optional)
+
 dirname = './data'
 dirname = os.path.expanduser(dirname)
 if not os.path.exists(dirname):
@@ -114,6 +131,8 @@ test_data = gluon.data.DataLoader(test_dataset,
 ntokens = len(vocab)
 model = model.RNNModel(args.model, ntokens, args.emsize, args.nhid,
                        args.nlayers, args.dropout, args.tied)
+if args.hybridize:
+    model.hybridize(**hybridize_optional)
 model.initialize(mx.init.Xavier(), ctx=context)
 
 compression_params = None if args.gctype == 'none' else {'type': args.gctype, 'threshold': args.gcthreshold}
@@ -123,6 +142,8 @@ trainer = gluon.Trainer(model.collect_params(), 'sgd',
                          'wd': 0},
                         compression_params=compression_params)
 loss = gluon.loss.SoftmaxCrossEntropyLoss()
+if args.hybridize:
+    loss.hybridize(**hybridize_optional)
 
 ###############################################################################
 # Training code
@@ -177,6 +198,10 @@ def train():
                     epoch, i, cur_L, math.exp(cur_L)))
                 total_L = 0.0
 
+            if args.export_model:
+                model.export('model')
+                return
+
         val_L = eval(val_data)
 
         print('[Epoch %d] time cost %.2fs, valid loss %.2f, valid ppl %.2f'%(
@@ -193,6 +218,8 @@ def train():
 
 if __name__ == '__main__':
     train()
-    model.load_parameters(args.save, context)
-    test_L = eval(test_data)
-    print('Best test loss %.2f, test ppl %.2f'%(test_L, math.exp(test_L)))
+    if not args.export_model:
+        model.load_parameters(args.save, context)
+        test_L = eval(test_data)
+        print('Best test loss %.2f, test ppl %.2f'%(test_L, math.exp(test_L)))
+