You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2017/11/23 09:43:28 UTC
[GitHub] SumNeuron opened a new issue #8794: GPU throws out of index error?

SumNeuron opened a new issue #8794: GPU throws out of index error?
URL: https://github.com/apache/incubator-mxnet/issues/8794
 
 
   ## Description
   (Brief description of the problem in no more than 2 sentences.)
   I have two files:
    - VAE.py which defines a variational auto encoder
    - vae_test.py which loads these classes and runs mnist on it
   
   now when I set the context to cpu things run.
   when I set the context to gpu things break (error and files pasted below).
   I have previously used mxnet with gpus on this machine. So I am a bit confused...
   
   
   # VAE.py
   ```
   import mxnet as mx
   from mxnet import nd, gluon, autograd
   from mxnet.gluon import nn, utils
   
   
   def sample_gaussian(mu, lv, batch_size, latent_z):
       epsilon = nd.random_normal(0, 1, shape=(batch_size, latent_z))
       sigma = nd.sqrt(nd.exp(lv))
       z = mu + nd.multiply(sigma, epsilon)
       return z
   
   class VAEEncoder(gluon.Block):
       def __init__(self, latent_z=100, **kwargs):
           super(VAEEncoder, self).__init__(**kwargs)
           with self.name_scope():
               self.enc = nn.Sequential()
               with self.enc.name_scope():
                   self.enc.add(nn.Dense(28*28, activation='relu'))
                   self.enc.add(nn.Dense(128, activation='relu'))
                   self.enc.add(nn.Activation(activation='tanh'))
   
               self.mu = nn.Dense(latent_z) # mu = mean
               self.lv = nn.Dense(latent_z) # lv = log variance
   
       def forward(self, x):
           x = self.enc(x)
           mu = self.mu(x)
           lv = self.lv(x)
           return mu, lv
   
       def net_init(self, ctx):
           self.enc.initialize(ctx=ctx)
   
   
   class VAEDecoder(gluon.Block):
       def __init__(self, latent_z=100, **kwargs):
           super(VAEDecoder, self).__init__(**kwargs)
           with self.name_scope():
               self.dec = nn.Sequential()
               self.dec.add(nn.Dense(128, in_units=latent_z, activation='relu'))
               self.dec.add(nn.Dense(28*28))
               self.dec.add(nn.Activation(activation='tanh'))
   
   
       def forward(self, x):
           x = self.dec(x)
           return x
   
       def net_init(self, ctx):
           self.dec.initialize(ctx=ctx)
   
   class VAE(gluon.Block):
       def __init__(self, latent_z=100, batch_size=1,  **kwargs):
           super(VAE, self).__init__(**kwargs)
           with self.name_scope():
               self.enc = VAEEncoder(latent_z=latent_z)
               self.dec = VAEDecoder(latent_z=latent_z)
               self.latent_z = latent_z
               self.batch_size = batch_size
   
       def forward(self, x):
           mu, lv = self.enc(x)
           z = sample_gaussian(mu, lv, self.batch_size, self.latent_z)
           y = self.dec(z)
           return y, mu, lv
   
   
   def vae_loss(x, y, mu, lv):
       l2 = gluon.loss.L2Loss()
       bce = l2(y, x) # MSE loss
       bce = nd.sum(bce)
       # loss = 0.5 sum(1-log(sigma^2)+mu^2+sigma^2)
       kld_el = (nd.power(mu, 2) + nd.exp(lv)) * -1 + 1 + lv
       kld = nd.sum(kld_el) * (-0.5)
       return bce + kld
   
   
   ```
   
   # vae_test.py
   ```
   import mxnet as mx
   from mxnet import nd, gluon, autograd
   from mxnet.gluon import nn, utils
   
   from VAE import VAE, vae_loss
   
   
   use_gpu = False
   
   latent_z = 100
   batch_size = 128
   learning_rate = 0.001
   epochs = 20
   
   ctx = mx.gpu() if use_gpu else mx.cpu()
   
   
   mnist = mx.test_utils.get_mnist()
   flattened_training_data = mnist["train_data"].reshape(60000, 28*28)
   data_iter = mx.io.NDArrayIter(flattened_training_data, mnist['train_label'], batch_size=batch_size, shuffle=True)
   
   vae = VAE(latent_z=latent_z)
   vae.collect_params().initialize(ctx=ctx)
   
   vae_trainer = gluon.Trainer(vae.collect_params(), 'adam', {'learning_rate': learning_rate})
   
   
   for epoch in range(epochs):
       train_loss = 0
       data_iter.reset()
       for batch_index, batch in enumerate(data_iter):
           data = batch.data[0]
           with mx.autograd.record():
               y, mu, lv = vae(data)
               loss = vae_loss(data, y, mu, lv)
           loss.backward()
           train_loss += loss.asscalar()
           vae_trainer.step(batch_size)
           
           if batch_index % 100 == 0:
               print('Epoch {}\tBatch {}\tLoss {}'.format(epoch, batch_index, train_loss))
           
           
   ```
   # Error when ctx=mx.gpu()
   ```
   ---------------------------------------------------------------------------
   IndexError                                Traceback (most recent call last)
   <ipython-input-7-4d9d0712ffcd> in <module>()
         5         data = batch.data[0]
         6         with mx.autograd.record():
   ----> 7             y, mu, lv = vae(data)
         8             loss = vae_loss(data, y, mu, lv)
         9         loss.backward()
   
   /usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in __call__(self, *args)
       288     def __call__(self, *args):
       289         """Calls forward. Only accepts positional arguments."""
   --> 290         return self.forward(*args)
       291 
       292     def forward(self, *args):
   
   ~/Programming/edge_repos/detexon/vaegan/VAE.py in forward(self, x)
        64 
        65     def forward(self, x):
   ---> 66         mu, lv = self.enc(x)
        67         z = sample_gaussian(mu, lv, self.batch_size, self.latent_z)
        68         y = self.dec(z)
   
   /usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in __call__(self, *args)
       288     def __call__(self, *args):
       289         """Calls forward. Only accepts positional arguments."""
   --> 290         return self.forward(*args)
       291 
       292     def forward(self, *args):
   
   ~/Programming/edge_repos/detexon/vaegan/VAE.py in forward(self, x)
        26 
        27     def forward(self, x):
   ---> 28         x = self.enc(x)
        29         mu = self.mu(x)
        30         lv = self.lv(x)
   
   /usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in __call__(self, *args)
       288     def __call__(self, *args):
       289         """Calls forward. Only accepts positional arguments."""
   --> 290         return self.forward(*args)
       291 
       292     def forward(self, *args):
   
   /usr/local/lib/python3.5/dist-packages/mxnet/gluon/nn/basic_layers.py in forward(self, x)
        48     def forward(self, x):
        49         for block in self._children:
   ---> 50             x = block(x)
        51         return x
        52 
   
   /usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in __call__(self, *args)
       288     def __call__(self, *args):
       289         """Calls forward. Only accepts positional arguments."""
   --> 290         return self.forward(*args)
       291 
       292     def forward(self, *args):
   
   /usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in forward(self, x, *args)
       466                     return self._call_cached_op(x, *args)
       467                 try:
   --> 468                     params = {i: j.data(ctx) for i, j in self._reg_params.items()}
       469                 except DeferredInitializationError:
       470                     self.infer_shape(x, *args)
   
   /usr/local/lib/python3.5/dist-packages/mxnet/gluon/block.py in <dictcomp>(.0)
       466                     return self._call_cached_op(x, *args)
       467                 try:
   --> 468                     params = {i: j.data(ctx) for i, j in self._reg_params.items()}
       469                 except DeferredInitializationError:
       470                     self.infer_shape(x, *args)
   
   /usr/local/lib/python3.5/dist-packages/mxnet/gluon/parameter.py in data(self, ctx)
       359         NDArray on ctx
       360         """
   --> 361         return self._check_and_get(self._data, ctx)
       362 
       363     def list_data(self):
   
   /usr/local/lib/python3.5/dist-packages/mxnet/gluon/parameter.py in _check_and_get(self, arr_list, ctx)
       148                 else:
       149                     ctx = context.current_context()
   --> 150             idx = self._ctx_map[ctx.device_typeid][ctx.device_id]
       151             if idx is not None:
       152                 return arr_list[idx]
   
   IndexError: list index out of range
   ```
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services