You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@mxnet.apache.org by GitBox <gi...@apache.org> on 2021/04/21 17:33:23 UTC
[GitHub] [incubator-mxnet] barry-jin opened a new issue #20197: MXNet 1.8.0.post0 sparse feature segmentation fault
barry-jin opened a new issue #20197:
URL: https://github.com/apache/incubator-mxnet/issues/20197
## Description
GluonNLP v0.x branch CI is blocked after switching from MXNet 1.7.0.post1 to MXNet 1.8.0.post0 (tracked in https://github.com/dmlc/gluon-nlp/issues/1559). It looks like the sparse feature in MXNet 1.8.0 will cause segmentation fault.
### Error Message
```
Segmentation fault: 11
terminate called without an active exception
Aborted (core dumped)
```
## To Reproduce
```python3
import mxnet as mx
from mxnet import nd, gluon
from mxnet.gluon import Block, HybridBlock
class _Helper(HybridBlock):
def __init__(self, num_classes, num_sampled, in_unit):
super(_Helper, self).__init__()
self._num_classes = num_classes
self._num_sampled = num_sampled
self._in_unit = in_unit
def hybrid_forward(self, F, x, sampled_values, label, w_all, b_all):
"""Forward computation."""
sampled_candidates, expected_count_sampled, expected_count_true = sampled_values
# (num_sampled, in_unit)
w_sampled = w_all.slice(begin=(0, 0), end=(self._num_sampled, None))
w_true = w_all.slice(begin=(self._num_sampled, 0), end=(None, None))
b_sampled = b_all.slice(begin=(0,), end=(self._num_sampled,))
b_true = b_all.slice(begin=(self._num_sampled,), end=(None,))
# true pred
# (batch_size, 1)
x = x.reshape((-1, self._in_unit))
pred_true = (w_true * x).sum(axis=1) + b_true
# samples pred
# (batch_size, num_sampled)
b_sampled = F.reshape(b_sampled, (-1,))
pred_sampled = F.FullyConnected(x, weight=w_sampled, bias=b_sampled,
num_hidden=self._num_sampled)
# remove accidental hits
label_vec = F.reshape(label, (-1, 1)).astype('int32')
sample_vec = F.reshape(sampled_candidates, (1, -1)).astype('int32')
mask = F.broadcast_equal(label_vec, sample_vec).astype('float32') * -1e37
pred_sampled = pred_sampled + mask
# subtract log(q)
expected_count_sampled = expected_count_sampled.astype('float32')
expected_count_sampled = expected_count_sampled.reshape(shape=(1, self._num_sampled))
expected_count_true = expected_count_true.astype('float32').reshape((-1,))
pred_true = pred_true - F.log(expected_count_true)
pred_true = pred_true.reshape((-1, 1))
pred_sampled = F.broadcast_sub(pred_sampled, F.log(expected_count_sampled))
# pred and new_labels
# (batch_size, 1+num_sampled)
pred = F.concat(pred_true, pred_sampled, dim=1)
new_label = F.zeros_like(label)
return pred, new_label
class SimpleSparse(Block):
def __init__(self, num_classes, num_sampled, in_unit):
super(SimpleSparse, self).__init__()
with self.name_scope():
self.weight = self.params.get('weight', shape=(num_classes, in_unit),
init=None, dtype='float32',
grad_stype='row_sparse', stype='row_sparse')
self.bias = self.params.get('bias', shape=(num_classes,), init='zeros',
dtype='float32')
self._num_classes = num_classes
self._num_sampled = num_sampled
self._in_unit = in_unit
self._kwargs = {'input_dim': self._num_classes, 'output_dim': self._in_unit,
'sparse_grad': True}
self._dense = _Helper(num_classes, num_sampled, in_unit)
def forward(self, x, sampled_values, label): # pylint: disable=arguments-differ
"""Forward computation."""
sampled_candidates, _, _ = sampled_values
# (batch_size,)
label = label.reshape(shape=(-1,))
# (num_sampled+batch_size,)
ids = nd.concat(sampled_candidates.astype('int32'), label.astype('int32'), dim=0)
# lookup weights and biases
weight = self.weight.row_sparse_data(ids)
bias = self.bias.data(ids.context)
# (num_sampled+batch_size, dim)
w_all = nd.Embedding(data=ids, weight=weight, **self._kwargs)
# (num_sampled+batch_size,)
b_all = nd.take(bias, indices=ids)
out, new_targets = self._dense(x, sampled_values, label, w_all, b_all)
return out, new_targets
def test():
ctx = mx.cpu()
batch_size = 2
num_sampled = 3
vocab_size = 10
num_hidden = 5
model = SimpleSparse(vocab_size, num_sampled, num_hidden)
loss = gluon.loss.SoftmaxCrossEntropyLoss()
model.hybridize()
model.initialize(mx.init.Xavier(), ctx=ctx)
trainer = mx.gluon.Trainer(model.collect_params(), 'sgd')
x = mx.nd.ones((batch_size, num_hidden))
y = mx.nd.ones((batch_size,))
sampled_cls = mx.nd.ones((num_sampled,), dtype='float32')
sampled_cls_cnt = mx.nd.ones((num_sampled,), dtype='float32')
true_cls_cnt = mx.nd.ones((batch_size,), dtype='float32')
samples = (sampled_cls, sampled_cls_cnt, true_cls_cnt)
with mx.autograd.record():
pred, new_y = model(x, samples, y)
l = loss(pred, new_y)
l.backward()
mx.nd.waitall()
if __name__ == '__main__':
test()
```
### Steps to reproduce
(Paste the commands you ran that produced the error.)
Run script above
or
```
$ git clone https://github.com/gluon-nlp
$ cd gluon-nlp
$ git checkout v0.x
$ python3 -m pip install -e .[extra,dev]
$ python3 -m pytest tests/unittest/test_sampled_logits.py::test_is_softmax_loss
```
## What have you tried to solve it?
1.
2.
## Environment
***We recommend using our script for collecting the diagnostic information with the following command***
`curl --retry 10 -s https://raw.githubusercontent.com/apache/incubator-mxnet/master/tools/diagnose.py | python3`
<details>
<summary>Environment Information</summary>
```
# Paste the diagnose.py command output here
```
</details>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@mxnet.apache.org
For additional commands, e-mail: issues-help@mxnet.apache.org