You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2017/11/23 09:43:28 UTC
[GitHub] SumNeuron opened a new issue #8794: GPU throws out of index error?
SumNeuron opened a new issue #8794: GPU throws out of index error?
URL: https://github.com/apache/incubator-mxnet/issues/8794
## Description
(Brief description of the problem in no more than 2 sentences.)
I have two files:
- VAE.py which defines a variational auto encoder
- vae_test.py which loads these classes and runs mnist on it
now when I set the context to cpu things run.
when I set the context to gpu things break (error and files pasted below).
I have previously used mxnet with gpus on this machine. So I am a bit confused...
# VAE.py
```
import mxnet as mx
from mxnet import nd, gluon, autograd
from mxnet.gluon import nn, utils
def sample_gaussian(mu, lv, batch_size, latent_z):
epsilon = nd.random_normal(0, 1, shape=(batch_size, latent_z))
sigma = nd.sqrt(nd.exp(lv))
z = mu + nd.multiply(sigma, epsilon)
return z
class VAEEncoder(gluon.Block):
def __init__(self, latent_z=100, **kwargs):
super(VAEEncoder, self).__init__(**kwargs)
with self.name_scope():
self.enc = nn.Sequential()
with self.enc.name_scope():
self.enc.add(nn.Dense(28*28, activation='relu'))
self.enc.add(nn.Dense(128, activation='relu'))
self.enc.add(nn.Activation(activation='tanh'))
self.mu = nn.Dense(latent_z) # mu = mean
self.lv = nn.Dense(latent_z) # lv = log variance
def forward(self, x):
x = self.enc(x)
mu = self.mu(x)
lv = self.lv(x)
return mu, lv
def net_init(self, ctx):
self.enc.initialize(ctx=ctx)
class VAEDecoder(gluon.Block):
def __init__(self, latent_z=100, **kwargs):
super(VAEDecoder, self).__init__(**kwargs)
with self.name_scope():
self.dec = nn.Sequential()
self.dec.add(nn.Dense(128, in_units=latent_z, activation='relu'))
self.dec.add(nn.Dense(28*28))
self.dec.add(nn.Activation(activation='tanh'))
def forward(self, x):
x = self.dec(x)
return x
def net_init(self, ctx):
self.dec.initialize(ctx=ctx)
class VAE(gluon.Block):
def __init__(self, latent_z=100, batch_size=1, **kwargs):
super(VAE, self).__init__(**kwargs)
with self.name_scope():
self.enc = VAEEncoder(latent_z=latent_z)
self.dec = VAEDecoder(latent_z=latent_z)
self.latent_z = latent_z
self.batch_size = batch_size
def forward(self, x):
mu, lv = self.enc(x)
z = sample_gaussian(mu, lv, self.batch_size, self.latent_z)
y = self.dec(z)
return y, mu, lv
def vae_loss(x, y, mu, lv):
l2 = gluon.loss.L2Loss()
bce = l2(y, x) # MSE loss
bce = nd.sum(bce)
# loss = 0.5 sum(1-log(sigma^2)+mu^2+sigma^2)
kld_el = (nd.power(mu, 2) + nd.exp(lv)) * -1 + 1 + lv
kld = nd.sum(kld_el) * (-0.5)
return bce + kld
```
# vae_test.py
```
import mxnet as mx
from mxnet import nd, gluon, autograd
from mxnet.gluon import nn, utils
from VAE import VAE, vae_loss
use_gpu = False
latent_z = 100
batch_size = 128
learning_rate = 0.001
epochs = 20
ctx = mx.gpu() if use_gpu else mx.cpu()
mnist = mx.test_utils.get_mnist()
flattened_training_data = mnist["train_data"].reshape(60000, 28*28)
data_iter = mx.io.NDArrayIter(flattened_training_data, mnist['train_label'], batch_size=batch_size, shuffle=True)
vae = VAE(latent_z=latent_z)
vae.collect_params().initialize(ctx=ctx)
vae_trainer = gluon.Trainer(vae.collect_params(), 'adam', {'learning_rate': learning_rate})
for epoch in range(epochs):
train_loss = 0
data_iter.reset()
for batch_index, batch in enumerate(data_iter):
data = batch.data[0]
with mx.autograd.record():
y, mu, lv = vae(data)
loss = vae_loss(data, y, mu, lv)
loss.backward()
train_loss += loss.asscalar()
vae_trainer.step(batch_size)
if batch_index % 100 == 0:
print('Epoch {}\tBatch {}\tLoss {}'.format(epoch, batch_index, train_loss))
```
# Error when ctx=mx.gpu()
```
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-7-4d9d0712ffcd> in <module>()
5 data = batch.data[0]
6 with mx.autograd.record():
----> 7 y, mu, lv = vae(data)
8 loss = vae_loss(data, y, mu, lv)
9 loss.backward()
/usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in __call__(self, *args)
288 def __call__(self, *args):
289 """Calls forward. Only accepts positional arguments."""
--> 290 return self.forward(*args)
291
292 def forward(self, *args):
~/Programming/edge_repos/detexon/vaegan/VAE.py in forward(self, x)
64
65 def forward(self, x):
---> 66 mu, lv = self.enc(x)
67 z = sample_gaussian(mu, lv, self.batch_size, self.latent_z)
68 y = self.dec(z)
/usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in __call__(self, *args)
288 def __call__(self, *args):
289 """Calls forward. Only accepts positional arguments."""
--> 290 return self.forward(*args)
291
292 def forward(self, *args):
~/Programming/edge_repos/detexon/vaegan/VAE.py in forward(self, x)
26
27 def forward(self, x):
---> 28 x = self.enc(x)
29 mu = self.mu(x)
30 lv = self.lv(x)
/usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in __call__(self, *args)
288 def __call__(self, *args):
289 """Calls forward. Only accepts positional arguments."""
--> 290 return self.forward(*args)
291
292 def forward(self, *args):
/usr/local/lib/python3.5/dist-packages/mxnet/gluon/nn/basic_layers.py in forward(self, x)
48 def forward(self, x):
49 for block in self._children:
---> 50 x = block(x)
51 return x
52
/usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in __call__(self, *args)
288 def __call__(self, *args):
289 """Calls forward. Only accepts positional arguments."""
--> 290 return self.forward(*args)
291
292 def forward(self, *args):
/usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in forward(self, x, *args)
466 return self._call_cached_op(x, *args)
467 try:
--> 468 params = {i: j.data(ctx) for i, j in self._reg_params.items()}
469 except DeferredInitializationError:
470 self.infer_shape(x, *args)
/usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in <dictcomp>(.0)
466 return self._call_cached_op(x, *args)
467 try:
--> 468 params = {i: j.data(ctx) for i, j in self._reg_params.items()}
469 except DeferredInitializationError:
470 self.infer_shape(x, *args)
/usr/local/lib/python3.5/dist-packages/mxnet/gluon/parameter.py in data(self, ctx)
359 NDArray on ctx
360 """
--> 361 return self._check_and_get(self._data, ctx)
362
363 def list_data(self):
/usr/local/lib/python3.5/dist-packages/mxnet/gluon/parameter.py in _check_and_get(self, arr_list, ctx)
148 else:
149 ctx = context.current_context()
--> 150 idx = self._ctx_map[ctx.device_typeid][ctx.device_id]
151 if idx is not None:
152 return arr_list[idx]
IndexError: list index out of range
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services