You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2018/03/24 04:25:53 UTC
[incubator-mxnet] branch nlp_toolkit updated: fix stale grad,
rename base s2s class (#17)
This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch nlp_toolkit
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/nlp_toolkit by this push:
new 6875333 fix stale grad, rename base s2s class (#17)
6875333 is described below
commit 6875333244e16813efdb814ead66e20b2ba2c292
Author: Sheng Zha <sz...@users.noreply.github.com>
AuthorDate: Fri Mar 23 21:24:41 2018 -0700
fix stale grad, rename base s2s class (#17)
* fix stale grad, remove base s2s class
* model presets, serialize vocab, load logic, rename blocks
* fix lint
* update per comments
* fix lint
* update
---
example/gluon/word_language_model.py | 23 +--
python/mxnet/gluon/data/text/sentiment.py | 5 +-
python/mxnet/gluon/data/text/utils.py | 53 +++++
python/mxnet/gluon/model_zoo/text/__init__.py | 39 +++-
python/mxnet/gluon/model_zoo/text/base.py | 68 ++-----
python/mxnet/gluon/model_zoo/text/lm.py | 276 +++++++++++++++++++++-----
python/mxnet/gluon/parameter.py | 7 +-
python/mxnet/gluon/text/vocab.py | 28 +++
tests/python/unittest/test_gluon_data_text.py | 4 +
tests/python/unittest/test_gluon_model_zoo.py | 30 ++-
10 files changed, 407 insertions(+), 126 deletions(-)
diff --git a/example/gluon/word_language_model.py b/example/gluon/word_language_model.py
index 5f8223d..382f0b1 100644
--- a/example/gluon/word_language_model.py
+++ b/example/gluon/word_language_model.py
@@ -22,7 +22,7 @@ import math
import mxnet as mx
from mxnet import gluon, autograd
from mxnet.gluon import data, text
-from mxnet.gluon.model_zoo.text.lm import RNNModel, AWDLSTM
+from mxnet.gluon.model_zoo.text.lm import SimpleRNN, AWDRNN
parser = argparse.ArgumentParser(description='MXNet Autograd RNN/LSTM Language Model on Wikitext-2.')
parser.add_argument('--model', type=str, default='lstm',
@@ -47,11 +47,9 @@ parser.add_argument('--dropout', type=float, default=0.4,
help='dropout applied to layers (0 = no dropout)')
parser.add_argument('--dropout_h', type=float, default=0.3,
help='dropout applied to hidden layer (0 = no dropout)')
-parser.add_argument('--dropout_i', type=float, default=0.4,
+parser.add_argument('--dropout_i', type=float, default=0.65,
help='dropout applied to input layer (0 = no dropout)')
-parser.add_argument('--dropout_e', type=float, default=0.1,
- help='dropout applied to embedding layer (0 = no dropout)')
-parser.add_argument('--weight_dropout', type=float, default=0.65,
+parser.add_argument('--weight_dropout', type=float, default=0.5,
help='weight dropout applied to h2h weight matrix (0 = no weight dropout)')
parser.add_argument('--tied', action='store_true',
help='tie the word embedding and softmax weights')
@@ -123,12 +121,11 @@ test_data = gluon.data.DataLoader(test_dataset.transform(index_tokens),
ntokens = len(vocab)
if args.weight_dropout:
- model = AWDLSTM(args.model, vocab, args.emsize, args.nhid, args.nlayers,
- args.dropout, args.dropout_h, args.dropout_i, args.dropout_e, args.weight_dropout,
- args.tied)
+ model = AWDRNN(args.model, len(vocab), args.emsize, args.nhid, args.nlayers,
+ args.tied, args.dropout, args.weight_dropout, args.dropout_h, args.dropout_i)
else:
- model = RNNModel(args.model, vocab, args.emsize, args.nhid,
- args.nlayers, args.dropout, args.tied)
+ model = SimpleRNN(args.model, len(vocab), args.emsize, args.nhid, args.nlayers,
+ args.tied, args.dropout)
model.initialize(mx.init.Xavier(), ctx=context)
@@ -147,7 +144,7 @@ loss = gluon.loss.SoftmaxCrossEntropyLoss()
def detach(hidden):
if isinstance(hidden, (tuple, list)):
- hidden = [i.detach() for i in hidden]
+ hidden = [detach(i) for i in hidden]
else:
hidden = hidden.detach()
return hidden
@@ -155,7 +152,7 @@ def detach(hidden):
def eval(data_source):
total_L = 0.0
ntotal = 0
- hidden = model.begin_state(func=mx.nd.zeros, batch_size=args.batch_size, ctx=context[0])
+ hidden = model.begin_state(args.batch_size, func=mx.nd.zeros, ctx=context[0])
for i, (data, target) in enumerate(data_source):
data = data.as_in_context(context[0]).T
target= target.as_in_context(context[0]).T
@@ -172,7 +169,7 @@ def train():
for epoch in range(args.epochs):
total_L = 0.0
start_epoch_time = time.time()
- hiddens = [model.begin_state(func=mx.nd.zeros, batch_size=args.batch_size, ctx=ctx) for ctx in context]
+ hiddens = [model.begin_state(args.batch_size, func=mx.nd.zeros, ctx=ctx) for ctx in context]
for i, (data, target) in enumerate(train_data):
start_batch_time = time.time()
data = data.T
diff --git a/python/mxnet/gluon/data/text/sentiment.py b/python/mxnet/gluon/data/text/sentiment.py
index 932b4d8..f88fa49 100644
--- a/python/mxnet/gluon/data/text/sentiment.py
+++ b/python/mxnet/gluon/data/text/sentiment.py
@@ -21,11 +21,8 @@
__all__ = ['IMDB']
-import glob
-import io
import json
import os
-import tarfile
from ..dataset import SimpleDataset
from ...utils import download, check_sha1, _get_repo_file_url
@@ -67,7 +64,7 @@ class IMDB(SimpleDataset):
path = os.path.join(root, data_file_name)
if not os.path.exists(path) or not check_sha1(path, data_hash):
download(_get_repo_file_url('gluon/dataset/imdb', data_file_name),
- path=root, sha1_hash=data_hash)
+ path=root, sha1_hash=data_hash)
def _read_data(self):
diff --git a/python/mxnet/gluon/data/text/utils.py b/python/mxnet/gluon/data/text/utils.py
index b923f74..721cdf9 100644
--- a/python/mxnet/gluon/data/text/utils.py
+++ b/python/mxnet/gluon/data/text/utils.py
@@ -20,6 +20,10 @@
"""Utility functions."""
+import os
+
+from ...text import Vocabulary
+
def flatten_samples(samples):
"""Flatten list of list of tokens into a single flattened list of tokens.
@@ -75,3 +79,52 @@ def collate_pad_length(num_items, seq_len, overlap=0):
step = seq_len-overlap
span = num_items-seq_len
return (span // step + 1) * step - span
+
+_vocab_sha1 = {}
+
+def _load_pretrained_vocab(name, root=os.path.join('~', '.mxnet', 'models')):
+ """Load the accompanying vocabulary object for pretrained model.
+
+ Parameters
+ ----------
+ name : str
+ Name of the model.
+ root : str, default '~/.mxnet/models'
+ Location for keeping the model parameters.
+
+ Returns
+ -------
+ file_path
+ Path to the requested vocabulary object file of pretrained model.
+ """
+ file_name = '{name}-{short_hash}'.format(name=name,
+ short_hash=short_hash(name))
+ root = os.path.expanduser(root)
+ file_path = os.path.join(root, file_name+'.vocab')
+ sha1_hash = _vocab_sha1[name]
+ if os.path.exists(file_path):
+ if check_sha1(file_path, sha1_hash):
+ return file_path
+ else:
+ print('Detected mismatch in the content of model vocab file. Downloading again.')
+ else:
+ print('Vocab file is not found. Downloading.')
+
+ if not os.path.exists(root):
+ os.makedirs(root)
+
+ zip_file_path = os.path.join(root, file_name+'.zip')
+ repo_url = os.environ.get('MXNET_GLUON_REPO', apache_repo_url)
+ if repo_url[-1] != '/':
+ repo_url = repo_url + '/'
+ download(_url_format.format(repo_url=repo_url, file_name=file_name),
+ path=zip_file_path,
+ overwrite=True)
+ with zipfile.ZipFile(zip_file_path) as zf:
+ zf.extractall(root)
+ os.remove(zip_file_path)
+
+ if check_sha1(file_path, sha1_hash):
+ return Vocabulary.json_deserialize(open(file_path, "rb").read())
+ else:
+ raise ValueError('Downloaded file has different hash. Please try again.')
diff --git a/python/mxnet/gluon/model_zoo/text/__init__.py b/python/mxnet/gluon/model_zoo/text/__init__.py
index 9aabd8a..dd1ec19 100644
--- a/python/mxnet/gluon/model_zoo/text/__init__.py
+++ b/python/mxnet/gluon/model_zoo/text/__init__.py
@@ -22,13 +22,21 @@ r"""Module for pre-defined NLP models.
This module contains definitions for the following model architectures:
- `AWD`_
-You can construct a model with random weights by calling its constructor:
+You can construct a model with random weights by calling its constructor. Because NLP models
+are tied to vocabularies, you can either specify a dataset name to load and use the vocabulary
+of that dataset:
.. code::
from mxnet.gluon.model_zoo import text
- # TODO
- awd = text.awd_variant()
+ awd, vocab = text.awd_lstm_lm_1150(dataset_name='wikitext-2')
+
+or directly specify a vocabulary object:
+
+.. code::
+
+ from mxnet.gluon.model_zoo import text
+ awd, vocab = text.awd_lstm_lm_1150(None, vocab=custom_vocab)
We provide pre-trained models for all the listed models.
These models can constructed by passing ``pretrained=True``:
@@ -36,8 +44,8 @@ These models can constructed by passing ``pretrained=True``:
.. code::
from mxnet.gluon.model_zoo import text
- # TODO
- awd = text.awd_variant(pretrained=True)
+ awd, vocab = text.awd_lstm_lm_1150(dataset_name='wikitext-2'
+ pretrained=True)
.. _AWD: https://arxiv.org/abs/1404.5997
"""
@@ -46,17 +54,26 @@ from .base import *
from . import lm
+from .lm import standard_lstm_lm_650, standard_lstm_lm_1500, awd_lstm_lm_1150
+
def get_model(name, **kwargs):
- """Returns a pre-defined model by name
+ """Returns a pre-defined model by name.
Parameters
----------
name : str
Name of the model.
- pretrained : bool
+ dataset_name : str or None, default None
+ The dataset name on which the pretrained model is trained.
+ Options are 'wikitext-2'. If specified, then the returned vocabulary is extracted from
+ the training set of the dataset.
+ If None, then vocab is required, for specifying embedding weight size, and is directly
+ returned.
+ vocab : gluon.text.Vocabulary or None, default None
+ Vocabulary object to be used with the language model.
+ Required when dataset_name is not specified.
+ pretrained : bool, default False
Whether to load the pretrained weights for model.
- classes : int
- Number of classes for the output layer.
ctx : Context, default CPU
The context in which to load the pretrained weights.
root : str, default '~/.mxnet/models'
@@ -67,7 +84,9 @@ def get_model(name, **kwargs):
HybridBlock
The model.
"""
- #models = {'awd_variant': awd_variant}
+ models = {'standard_lstm_lm_650': standard_lstm_lm_650,
+ 'standard_lstm_lm_1500': standard_lstm_lm_1500,
+ 'awd_lstm_lm_1150': awd_lstm_lm_1150}
name = name.lower()
if name not in models:
raise ValueError(
diff --git a/python/mxnet/gluon/model_zoo/text/base.py b/python/mxnet/gluon/model_zoo/text/base.py
index 7967eb9..6f5f555 100644
--- a/python/mxnet/gluon/model_zoo/text/base.py
+++ b/python/mxnet/gluon/model_zoo/text/base.py
@@ -15,28 +15,18 @@
# specific language governing permissions and limitations
# under the License.
"""Building blocks and utility for models."""
+__all__ = ['StatefulBlock', 'get_rnn_layer', 'get_rnn_cell',
+ 'RNNCellLayer', 'apply_weight_drop', 'WeightDropParameter']
-from ... import Block, HybridBlock, Parameter, contrib, nn, rnn
+from ... import Block, HybridBlock, Parameter, contrib, rnn
from .... import nd
-
-class _TextSeq2SeqModel(Block):
- def __init__(self, src_vocab, tgt_vocab, **kwargs):
- super(_TextSeq2SeqModel, self).__init__(**kwargs)
- self._src_vocab = src_vocab
- self._tgt_vocab = tgt_vocab
+class StatefulBlock(Block):
+ def __init__(self, **kwargs):
+ super(StatefulBlock, self).__init__(**kwargs)
def begin_state(self, *args, **kwargs):
- return self.encoder.begin_state(*args, **kwargs)
-
- def forward(self, inputs, begin_state=None): # pylint: disable=arguments-differ
- embedded_inputs = self.embedding(inputs)
- if not begin_state:
- begin_state = self.begin_state()
- encoded, state = self.encoder(embedded_inputs, begin_state)
- out = self.decoder(encoded)
- return out, state
-
+ raise NotImplementedError()
def apply_weight_drop(block, local_param_name, rate, axes=(),
weight_dropout_mode='training'):
@@ -94,7 +84,7 @@ def _find_param(block, full_param_name, local_param_name):
return param_dict_results, reg_dict_results
-def get_rnn_cell(mode, num_layers, num_embed, num_hidden,
+def get_rnn_cell(mode, num_layers, input_size, hidden_size,
dropout, weight_dropout,
var_drop_in, var_drop_state, var_drop_out):
"""create rnn cell given specs"""
@@ -102,13 +92,13 @@ def get_rnn_cell(mode, num_layers, num_embed, num_hidden,
with rnn_cell.name_scope():
for i in range(num_layers):
if mode == 'rnn_relu':
- cell = rnn.RNNCell(num_hidden, 'relu', input_size=num_embed)
+ cell = rnn.RNNCell(hidden_size, 'relu', input_size=input_size)
elif mode == 'rnn_tanh':
- cell = rnn.RNNCell(num_hidden, 'tanh', input_size=num_embed)
+ cell = rnn.RNNCell(hidden_size, 'tanh', input_size=input_size)
elif mode == 'lstm':
- cell = rnn.LSTMCell(num_hidden, input_size=num_embed)
+ cell = rnn.LSTMCell(hidden_size, input_size=input_size)
elif mode == 'gru':
- cell = rnn.GRUCell(num_hidden, input_size=num_embed)
+ cell = rnn.GRUCell(hidden_size, input_size=input_size)
if var_drop_in + var_drop_state + var_drop_out != 0:
cell = contrib.rnn.VariationalDropoutCell(cell,
var_drop_in,
@@ -125,20 +115,20 @@ def get_rnn_cell(mode, num_layers, num_embed, num_hidden,
return rnn_cell
-def get_rnn_layer(mode, num_layers, num_embed, num_hidden, dropout, weight_dropout):
+def get_rnn_layer(mode, num_layers, input_size, hidden_size, dropout, weight_dropout):
"""create rnn layer given specs"""
if mode == 'rnn_relu':
- block = rnn.RNN(num_hidden, 'relu', num_layers, dropout=dropout,
- input_size=num_embed)
+ block = rnn.RNN(hidden_size, 'relu', num_layers, dropout=dropout,
+ input_size=input_size)
elif mode == 'rnn_tanh':
- block = rnn.RNN(num_hidden, num_layers, dropout=dropout,
- input_size=num_embed)
+ block = rnn.RNN(hidden_size, num_layers, dropout=dropout,
+ input_size=input_size)
elif mode == 'lstm':
- block = rnn.LSTM(num_hidden, num_layers, dropout=dropout,
- input_size=num_embed)
+ block = rnn.LSTM(hidden_size, num_layers, dropout=dropout,
+ input_size=input_size)
elif mode == 'gru':
- block = rnn.GRU(num_hidden, num_layers, dropout=dropout,
- input_size=num_embed)
+ block = rnn.GRU(hidden_size, num_layers, dropout=dropout,
+ input_size=input_size)
if weight_dropout:
apply_weight_drop(block, 'h2h_weight', rate=weight_dropout)
@@ -148,7 +138,7 @@ def get_rnn_layer(mode, num_layers, num_embed, num_hidden, dropout, weight_dropo
class RNNCellLayer(Block):
"""A block that takes an rnn cell and makes it act like rnn layer."""
def __init__(self, rnn_cell, layout='TNC', **kwargs):
- super(RNNCellBlock, self).__init__(**kwargs)
+ super(RNNCellLayer, self).__init__(**kwargs)
self.cell = rnn_cell
assert layout == 'TNC' or layout == 'NTC', \
"Invalid layout %s; must be one of ['TNC' or 'NTC']"%layout
@@ -177,20 +167,6 @@ class RNNCellLayer(Block):
return outputs
return outputs, states
-class ExtendedSequential(nn.Sequential):
- def forward(self, *x): # pylint: disable=arguments-differ
- for block in self._children:
- x = block(*x)
- return x
-
-class TransformerBlock(Block):
- def __init__(self, *blocks, **kwargs):
- super(TransformerBlock, self).__init__(**kwargs)
- self._blocks = blocks
-
- def forward(self, *inputs):
- return [block(data) if block else data for block, data in zip(self._blocks, inputs)]
-
class WeightDropParameter(Parameter):
"""A Container holding parameters (weights) of Blocks and performs dropout.
diff --git a/python/mxnet/gluon/model_zoo/text/lm.py b/python/mxnet/gluon/model_zoo/text/lm.py
index 3ada7d0..7607cf5 100644
--- a/python/mxnet/gluon/model_zoo/text/lm.py
+++ b/python/mxnet/gluon/model_zoo/text/lm.py
@@ -15,110 +15,286 @@
# specific language governing permissions and limitations
# under the License.
"""Language models."""
+__all__ = ['AWDRNN', 'StandardRNN', 'awd_lstm_lm_1150',
+ 'standard_lstm_lm_650', 'standard_lstm_lm_1500']
-from .base import _TextSeq2SeqModel, ExtendedSequential, TransformerBlock
-from .base import get_rnn_layer, apply_weight_drop
+import os
+import warnings
+
+from .base import StatefulBlock, get_rnn_layer
+from ..model_store import get_model_file
+from ...data.text.utils import _load_pretrained_vocab
from ... import nn
-from .... import init
+from .... import init, nd, cpu
-class AWDLSTM(_TextSeq2SeqModel):
+class AWDRNN(StatefulBlock):
"""AWD language model."""
- def __init__(self, mode, vocab, embed_dim, hidden_dim, num_layers,
- dropout=0.5, drop_h=0.5, drop_i=0.5, drop_e=0.1, weight_drop=0,
- tie_weights=False, **kwargs):
- super(AWDLSTM, self).__init__(vocab, vocab, **kwargs)
+ def __init__(self, mode, vocab_size, embed_size, hidden_size, num_layers,
+ tie_weights=False, dropout=0.5, weight_drop=0, drop_h=0.5, drop_i=0.5,
+ **kwargs):
+ super(AWDRNN, self).__init__(**kwargs)
self._mode = mode
- self._embed_dim = embed_dim
- self._hidden_dim = hidden_dim
+ self._vocab_size = vocab_size
+ self._embed_size = embed_size
+ self._hidden_size = hidden_size
self._num_layers = num_layers
self._dropout = dropout
self._drop_h = drop_h
self._drop_i = drop_i
- self._drop_e = drop_e
self._weight_drop = weight_drop
self._tie_weights = tie_weights
- self.embedding = self._get_embedding()
- self.encoder = self._get_encoder()
- self.decoder = self._get_decoder()
+
+ with self.name_scope():
+ self.embedding = self._get_embedding()
+ self.encoder = self._get_encoder()
+ self.decoder = self._get_decoder()
def _get_embedding(self):
embedding = nn.HybridSequential()
with embedding.name_scope():
- embedding_block = nn.Embedding(len(self._src_vocab), self._embed_dim,
+ embedding_block = nn.Embedding(self._vocab_size, self._embed_size,
weight_initializer=init.Uniform(0.1))
- if self._drop_e:
- apply_weight_drop(embedding_block, 'weight', self._drop_e, axes=(1,))
embedding.add(embedding_block)
if self._drop_i:
embedding.add(nn.Dropout(self._drop_i, axes=(0,)))
return embedding
def _get_encoder(self):
- encoder = ExtendedSequential()
+ encoder = nn.Sequential()
with encoder.name_scope():
for l in range(self._num_layers):
- encoder.add(get_rnn_layer(self._mode, 1, self._embed_dim if l == 0 else
- self._hidden_dim, self._hidden_dim if
+ encoder.add(get_rnn_layer(self._mode, 1, self._embed_size if l == 0 else
+ self._hidden_size, self._hidden_size if
l != self._num_layers - 1 or not self._tie_weights
- else self._embed_dim, 0, self._weight_drop))
- if self._drop_h:
- encoder.add(TransformerBlock(nn.Dropout(self._drop_h, axes=(0,)), None))
+ else self._embed_size, 0, self._weight_drop))
return encoder
def _get_decoder(self):
- vocab_size = len(self._tgt_vocab)
if self._tie_weights:
- output = nn.Dense(vocab_size, flatten=False, params=self.embedding.params)
+ output = nn.Dense(self._vocab_size, flatten=False, params=self.embedding[0].params)
else:
- output = nn.Dense(vocab_size, flatten=False)
+ output = nn.Dense(self._vocab_size, flatten=False)
return output
def begin_state(self, *args, **kwargs):
- return self.encoder[0].begin_state(*args, **kwargs)
+ return [c.begin_state(*args, **kwargs) for c in self.encoder]
+
+ def forward(self, inputs, begin_state=None): # pylint: disable=arguments-differ
+ encoded = self.embedding(inputs)
+ if not begin_state:
+ begin_state = self.begin_state(batch_size=inputs.shape[1])
+ out_states = []
+ for e, s in zip(self.encoder, begin_state):
+ encoded, state = e(encoded, s)
+ out_states.append(state)
+ if self._drop_h:
+ encoded = nd.Dropout(encoded, p=self._drop_h, axes=(0,))
+ out = self.decoder(encoded)
+ return out, out_states
+
-class RNNModel(_TextSeq2SeqModel):
- """Simple RNN language model."""
- def __init__(self, mode, vocab, embed_dim, hidden_dim,
+class StandardRNN(StatefulBlock):
+ """Standard RNN language model."""
+ def __init__(self, mode, vocab_size, embed_size, hidden_size,
num_layers, dropout=0.5, tie_weights=False, **kwargs):
- super(RNNModel, self).__init__(vocab, vocab, **kwargs)
+ if tie_weights:
+ assert embed_size == hidden_size, "Embedding dimension must be equal to " \
+ "hidden dimension in order to tie weights. " \
+ "Got: emb: {}, hid: {}.".format(embed_size,
+ hidden_size)
+ super(StandardRNN, self).__init__(**kwargs)
self._mode = mode
- self._embed_dim = embed_dim
- self._hidden_dim = hidden_dim
+ self._embed_size = embed_size
+ self._hidden_size = hidden_size
self._num_layers = num_layers
self._dropout = dropout
self._tie_weights = tie_weights
- self.embedding = self._get_embedding()
- self.encoder = self._get_encoder()
- self.decoder = self._get_decoder()
+ self._vocab_size = vocab_size
+
+ with self.name_scope():
+ self.embedding = self._get_embedding()
+ self.encoder = self._get_encoder()
+ self.decoder = self._get_decoder()
def _get_embedding(self):
embedding = nn.HybridSequential()
with embedding.name_scope():
- embedding.add(nn.Embedding(len(self._src_vocab), self._embed_dim,
+ embedding.add(nn.Embedding(self._vocab_size, self._embed_size,
weight_initializer=init.Uniform(0.1)))
if self._dropout:
embedding.add(nn.Dropout(self._dropout))
return embedding
def _get_encoder(self):
- encoder = ExtendedSequential()
- with encoder.name_scope():
- for l in range(self._num_layers):
- encoder.add(get_rnn_layer(self._mode, 1, self._embed_dim if l == 0 else
- self._hidden_dim, self._hidden_dim if
- l != self._num_layers - 1 or not self._tie_weights
- else self._embed_dim, 0, 0))
-
- return encoder
+ return get_rnn_layer(self._mode, self._num_layers, self._embed_size,
+ self._hidden_size, self._dropout, 0)
def _get_decoder(self):
- vocab_size = len(self._tgt_vocab)
if self._tie_weights:
- output = nn.Dense(vocab_size, flatten=False, params=self.embedding[0].params)
+ output = nn.Dense(self._vocab_size, flatten=False, params=self.embedding[0].params)
else:
- output = nn.Dense(vocab_size, flatten=False)
+ output = nn.Dense(self._vocab_size, flatten=False)
return output
def begin_state(self, *args, **kwargs):
- return self.encoder[0].begin_state(*args, **kwargs)
+ return self.encoder.begin_state(*args, **kwargs)
+
+ def forward(self, inputs, begin_state=None): # pylint: disable=arguments-differ
+ embedded_inputs = self.embedding(inputs)
+ if not begin_state:
+ begin_state = self.begin_state(batch_size=inputs.shape[1])
+ encoded, state = self.encoder(embedded_inputs, begin_state)
+ out = self.decoder(encoded)
+ return out, state
+
+
+def _load_vocab(dataset_name, vocab, root):
+ if dataset_name:
+ if vocab is not None:
+ warnings.warn('Both dataset_name and vocab are specified. Loading vocab for dataset. '
+ 'vocab will be ignored.')
+ vocab = _load_pretrained_vocab(dataset_name, root)
+ else:
+ assert vocab is not None, "Must specify vocab if not loading from predefined datasets."
+ return vocab
+
+
+def _load_pretrained_params(net, model_name, root, ctx):
+ model_file = get_model_file(model_name, root=root)
+ net.load_params(model_file, ctx=ctx)
+
+
+def _get_rnn_model(model_cls, model_name, dataset_name, vocab, pretrained, ctx, root, **kwargs):
+ vocab = _load_vocab(dataset_name, vocab, root)
+ kwargs['vocab_size'] = len(vocab)
+ net = model_cls(**kwargs)
+ if pretrained:
+ _load_pretrained_params(net, model_name, root, ctx)
+ return net, vocab
+
+
+def awd_lstm_lm_1150(dataset_name=None, vocab=None, pretrained=False, ctx=cpu(),
+ root=os.path.join('~', '.mxnet', 'models'), **kwargs):
+ r"""3-layer LSTM language model with weight-drop, variational dropout, and tied weights.
+
+ Embedding size is 400, and hidden layer size is 1150.
+
+ Parameters
+ ----------
+ dataset_name : str or None, default None
+ The dataset name on which the pretrained model is trained.
+ Options are 'wikitext-2'. If specified, then the returned vocabulary is extracted from
+ the training set of the dataset.
+ If None, then vocab is required, for specifying embedding weight size, and is directly
+ returned.
+ vocab : gluon.text.Vocabulary or None, default None
+ Vocabulary object to be used with the language model.
+ Required when dataset_name is not specified.
+ pretrained : bool, default False
+ Whether to load the pretrained weights for model.
+ ctx : Context, default CPU
+ The context in which to load the pretrained weights.
+ root : str, default '~/.mxnet/models'
+ Location for keeping the model parameters.
+
+ Returns
+ -------
+ gluon.Block, gluon.text.Vocabulary
+ """
+ predefined_args = {'embed_size': 400,
+ 'hidden_size': 1150,
+ 'mode': 'lstm',
+ 'num_layers': 3,
+ 'tie_weights': True,
+ 'dropout': 0.4,
+ 'weight_drop': 0.5,
+ 'drop_h': 0.3,
+ 'drop_i': 0.65}
+ assert all(k not in kwargs for k in predefined_args), \
+ "Cannot override predefined model settings."
+ kwargs.update(predefined_args)
+ return _get_rnn_model(AWDRNN, 'awd_lstm_lm_1150', dataset_name, vocab, pretrained,
+ ctx, root, **kwargs)
+
+
+def standard_lstm_lm_650(dataset_name=None, vocab=None, pretrained=False, ctx=cpu(),
+ root=os.path.join('~', '.mxnet', 'models'), **kwargs):
+ r"""Standard 2-layer LSTM language model with tied embedding and output weights.
+
+ Both embedding and hidden dimensions are 650.
+
+ Parameters
+ ----------
+ dataset_name : str or None, default None
+ The dataset name on which the pretrained model is trained.
+ Options are 'wikitext-2'. If specified, then the returned vocabulary is extracted from
+ the training set of the dataset.
+ If None, then vocab is required, for specifying embedding weight size, and is directly
+ returned.
+ vocab : gluon.text.Vocabulary or None, default None
+ Vocabulary object to be used with the language model.
+ Required when dataset_name is not specified.
+ pretrained : bool, default False
+ Whether to load the pretrained weights for model.
+ ctx : Context, default CPU
+ The context in which to load the pretrained weights.
+ root : str, default '~/.mxnet/models'
+ Location for keeping the model parameters.
+
+ Returns
+ -------
+ gluon.Block, gluon.text.Vocabulary
+ """
+ predefined_args = {'embed_size': 650,
+ 'hidden_size': 650,
+ 'mode': 'lstm',
+ 'num_layers': 2,
+ 'tie_weights': True,
+ 'dropout': 0.5}
+ assert all(k not in kwargs for k in predefined_args), \
+ "Cannot override predefined model settings."
+ kwargs.update(predefined_args)
+ return _get_rnn_model(StandardRNN, 'standard_lstm_lm_650', dataset_name, vocab, pretrained,
+ ctx, root, **kwargs)
+
+
+def standard_lstm_lm_1500(dataset_name=None, vocab=None, pretrained=False, ctx=cpu(),
+ root=os.path.join('~', '.mxnet', 'models'), **kwargs):
+ r"""Standard 2-layer LSTM language model with tied embedding and output weights.
+
+ Both embedding and hidden dimensions are 1500.
+
+ Parameters
+ ----------
+ dataset_name : str or None, default None
+ The dataset name on which the pretrained model is trained.
+ Options are 'wikitext-2'. If specified, then the returned vocabulary is extracted from
+ the training set of the dataset.
+ If None, then vocab is required, for specifying embedding weight size, and is directly
+ returned.
+ vocab : gluon.text.Vocabulary or None, default None
+ Vocabulary object to be used with the language model.
+ Required when dataset_name is not specified.
+ pretrained : bool, default False
+ Whether to load the pretrained weights for model.
+ ctx : Context, default CPU
+ The context in which to load the pretrained weights.
+ root : str, default '~/.mxnet/models'
+ Location for keeping the model parameters.
+
+ Returns
+ -------
+ gluon.Block, gluon.text.Vocabulary
+ """
+ predefined_args = {'embed_size': 1500,
+ 'hidden_size': 1500,
+ 'mode': 'lstm',
+ 'num_layers': 2,
+ 'tie_weights': True,
+ 'dropout': 0.65}
+ assert all(k not in kwargs for k in predefined_args), \
+ "Cannot override predefined model settings."
+ kwargs.update(predefined_args)
+ return _get_rnn_model(StandardRNN, 'standard_lstm_lm_1500', dataset_name, vocab, pretrained,
+ ctx, root, **kwargs)
diff --git a/python/mxnet/gluon/parameter.py b/python/mxnet/gluon/parameter.py
index 7dc7243..1148358 100644
--- a/python/mxnet/gluon/parameter.py
+++ b/python/mxnet/gluon/parameter.py
@@ -580,7 +580,12 @@ class ParameterDict(object):
else:
for k, v in kwargs.items():
if hasattr(param, k) and getattr(param, k) is not None:
- assert v is None or v == getattr(param, k), \
+ existing = getattr(param, k)
+ if k == 'shape' and len(v) == len(existing):
+ if all(d1 == 0 or d1 == d2 for d1, d2 in zip(v, existing)):
+ continue
+
+ assert v is None or v == existing, \
"Cannot retrieve Parameter %s because desired attribute " \
"does not match with stored for attribute %s: " \
"desired %s vs stored %s."%(
diff --git a/python/mxnet/gluon/text/vocab.py b/python/mxnet/gluon/text/vocab.py
index aa962af..c8d5529 100644
--- a/python/mxnet/gluon/text/vocab.py
+++ b/python/mxnet/gluon/text/vocab.py
@@ -23,6 +23,9 @@ from __future__ import absolute_import
from __future__ import print_function
import collections
+import json
+import warnings
+
from ... import nd
from . import _constants as C
@@ -321,3 +324,28 @@ class Vocabulary(object):
tokens.append(self._idx_to_token[idx])
return tokens[0] if to_reduce else tokens
+
+ def __repr__(self):
+ return 'Vocab(size={}, unk="{}", reserved="{}")'.format(len(self), self._unknown_token,
+ self._reserved_tokens)
+
+ def json_serialize(self):
+ if self._embedding is not None:
+ warnings.warn('Serialization of attached embedding is not supported. '
+ 'Please save separately')
+ vocab_dict = {}
+ vocab_dict['idx_to_token'] = self._idx_to_token
+ vocab_dict['token_to_idx'] = self._token_to_idx
+ vocab_dict['reserved_tokens'] = self._reserved_tokens
+ vocab_dict['unknown_token'] = self._unknown_token
+ return json.dumps(vocab_dict)
+
+ @staticmethod
+ def json_deserialize(json_str):
+ vocab = Vocabulary()
+ vocab_dict = json.loads(json_str)
+ vocab._idx_to_token = vocab_dict.get('idx_to_token')
+ vocab._token_to_idx = vocab_dict.get('token_to_idx')
+ vocab._reserved_tokens = vocab_dict.get('reserved_tokens')
+ vocab._unknown_token = vocab_dict.get('unknown_token')
+ return vocab
diff --git a/tests/python/unittest/test_gluon_data_text.py b/tests/python/unittest/test_gluon_data_text.py
index 0cd84ed..8a9688f 100644
--- a/tests/python/unittest/test_gluon_data_text.py
+++ b/tests/python/unittest/test_gluon_data_text.py
@@ -17,6 +17,7 @@
from __future__ import print_function
import collections
+import json
import mxnet as mx
from mxnet.gluon import text, contrib, nn
from mxnet.gluon import data as d
@@ -48,6 +49,9 @@ def test_wikitext2():
vocab = text.vocab.Vocabulary(get_frequencies(train))
+ serialized_vocab = vocab.json_serialize()
+ assert len(serialized_vocab) == 962072, len(serialized_vocab)
+ assert json.loads(serialized_vocab)['idx_to_token'] == vocab._idx_to_token
def index_tokens(data, label):
return vocab[data], vocab[label]
nbatch_train = len(train) // 80
diff --git a/tests/python/unittest/test_gluon_model_zoo.py b/tests/python/unittest/test_gluon_model_zoo.py
index e97b3b5..9dc5663 100644
--- a/tests/python/unittest/test_gluon_model_zoo.py
+++ b/tests/python/unittest/test_gluon_model_zoo.py
@@ -16,9 +16,13 @@
# under the License.
from __future__ import print_function
+
+import collections
+import sys
+
import mxnet as mx
from mxnet.gluon.model_zoo.vision import get_model as get_vision_model
-import sys
+from mxnet.gluon.model_zoo.text import get_model as get_text_model
from common import setup_module, with_seed
@@ -27,7 +31,7 @@ def eprint(*args, **kwargs):
@with_seed()
-def test_models():
+def test_vision_models():
vision_models = ['resnet18_v1', 'resnet34_v1', 'resnet50_v1', 'resnet101_v1', 'resnet152_v1',
'resnet18_v2', 'resnet34_v2', 'resnet50_v2', 'resnet101_v2', 'resnet152_v2',
'vgg11', 'vgg13', 'vgg16', 'vgg19',
@@ -49,6 +53,28 @@ def test_models():
model.collect_params().initialize()
model(mx.nd.random.uniform(shape=data_shape)).wait_to_read()
+def get_frequencies(dataset):
+ return collections.Counter(x for tup in dataset for x in tup[0]+tup[1][-1:])
+
+@with_seed()
+def test_text_models():
+ val = mx.gluon.data.text.WikiText2(root='data/wikitext-2', segment='val')
+ val_freq = get_frequencies(val)
+ vocab = mx.gluon.text.vocab.Vocabulary(val_freq)
+ text_models = ['standard_lstm_lm_650', 'standard_lstm_lm_1500', 'awd_lstm_lm_1150']
+ pretrained_to_test = {}
+
+ for model_name in text_models:
+ eprint('testing forward for %s' % model_name)
+ pretrained_dataset = pretrained_to_test.get(model_name)
+ model, _ = get_text_model(model_name, vocab=vocab, dataset_name=pretrained_dataset,
+ pretrained=pretrained_dataset is not None, root='model/')
+ print(model)
+ if not pretrained_dataset:
+ model.collect_params().initialize()
+ output, state = model(mx.nd.arange(330).reshape(33, 10))
+ output.wait_to_read()
+
if __name__ == '__main__':
import nose
--
To stop receiving notification emails like this one, please contact
zhasheng@apache.org.