You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by gi...@git.apache.org on 2017/08/20 09:07:12 UTC

[GitHub] edmBernard commented on issue #7530: Network with shared layer

edmBernard commented on issue #7530: Network with shared layer
URL: https://github.com/apache/incubator-mxnet/issues/7530#issuecomment-323573051
 
 
   I test the gluon API to achieve this goal: 
   I have this code: https://github.com/edmBernard/mxnet_example_shared_weight/blob/master/demo_with_gluon.py
   ```python
   import mxnet as mx
   from mxnet import nd, autograd
   from mxnet import gluon
   import numpy as np
   
   ctx = mx.cpu()
   
   batch_size = 128
   num_inputs = 784
   num_outputs = 10
   
   # Get MNIST Data
   def transform(data, label):
       return data.astype(np.float32)/255, label.astype(np.float32)
   train_data1 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform), batch_size, shuffle=True)
   test_data1 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform), batch_size, shuffle=False)
   train_data2 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=True, transform=transform), batch_size, shuffle=True)
   test_data2 = mx.gluon.data.DataLoader(mx.gluon.data.vision.MNIST(train=False, transform=transform), batch_size, shuffle=False)
   
   net_shared = gluon.nn.Sequential()
   with net_shared.name_scope():
       net_shared.add(gluon.nn.Dense(128, activation='relu'))
       net_shared.add(gluon.nn.Dense(64, activation='relu'))
   
   net_mod1 = gluon.nn.Sequential()
   with net_mod1.name_scope():
       net_mod1.add(gluon.nn.Dense(num_outputs, activation='relu'))
   
   net_mod2 = gluon.nn.Sequential()
   with net_mod2.name_scope():
       net_mod2.add(gluon.nn.Dense(num_outputs, activation='relu'))
   
   net_shared.collect_params().initialize(mx.init.Uniform(scale=0.1), ctx=ctx)
   net_mod1.collect_params().initialize(mx.init.Uniform(scale=0.1), ctx=ctx)
   net_mod2.collect_params().initialize(mx.init.Uniform(scale=0.1), ctx=ctx)
   
   softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
   
   trainer_shared = gluon.Trainer(net_shared.collect_params(), 'sgd', {'learning_rate': 0.05})
   trainer_mod1 = gluon.Trainer(net_mod1.collect_params(), 'sgd', {'learning_rate': 0.05})
   trainer_mod2 = gluon.Trainer(net_mod2.collect_params(), 'sgd', {'learning_rate': 0.05})
   
   def evaluate_accuracy(data_iterator, net):
       acc = mx.metric.Accuracy()
       for i, (data, label) in enumerate(data_iterator):
           data = data.as_in_context(ctx).reshape((-1,784))
           label = label.as_in_context(ctx)
           output = net(data)
           predictions = nd.argmax(output, axis=1)
           acc.update(preds=predictions, labels=label)
       return acc.get()[1]
   
   epochs = 4
   moving_loss = 0.
   smoothing_constant = .01
   
   print("#### Before Training ####")
   test_accuracy = evaluate_accuracy(test_data1, lambda x: net_mod1(net_shared(x)))
   train_accuracy = evaluate_accuracy(train_data1, lambda x :net_mod1(net_shared(x)))
   print("Mod1: Train_acc %s, Test_acc %s" % (train_accuracy, test_accuracy))
   test_accuracy = evaluate_accuracy(test_data2, lambda x: net_mod2(net_shared(x)))
   train_accuracy = evaluate_accuracy(train_data2, lambda x :net_mod2(net_shared(x)))
   print("Mod2: Train_acc %s, Test_acc %s" % (train_accuracy, test_accuracy))
   
   print("\n#### Shared+Module1 Training ####")
   for e in range(epochs):
       # Train Branch with mod1 on dataset 1 
       for i, (data, label) in enumerate(train_data1):
           data = data.as_in_context(ctx).reshape((-1, 784))
           label = label.as_in_context(ctx)
           with autograd.record():
               output = net_mod1(net_shared(data))
               loss = softmax_cross_entropy(output, label)
           loss.backward()
           trainer_shared.step(batch_size)
           trainer_mod1.step(batch_size)
   
           curr_loss = nd.mean(loss).asscalar()
           moving_loss = (curr_loss if ((i == 0) and (e == 0))
                          else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)
   
       test_accuracy = evaluate_accuracy(test_data1, lambda x: net_mod1(net_shared(x)))
       train_accuracy = evaluate_accuracy(train_data1, lambda x :net_mod1(net_shared(x)))
       print("Mod1: Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))
   
   # We expect the shared module to start where the first module finished
   # There will be a small accuracy decrease since one layer was not trained
   test_accuracy = evaluate_accuracy(test_data2, lambda x: net_mod2(net_shared(x)))
   train_accuracy = evaluate_accuracy(train_data2, lambda x :net_mod2(net_shared(x)))
   print("\n#### Shared+Module2 Result after Mod1 Training ####")
   print("Mod2: Train_acc %s, Test_acc %s" % (train_accuracy, test_accuracy))
   print("\n#### Shared+Module2 Training ####")
   for e in range(epochs):
       # Train Branch with mod2 on dataset 2 
       for i, (data, label) in enumerate(train_data2):
           data = data.as_in_context(ctx).reshape((-1,784))
           label = label.as_in_context(ctx)
           with autograd.record():
               output = net_mod2(net_shared(data))
               loss = softmax_cross_entropy(output, label)
           loss.backward()
           trainer_shared.step(batch_size)
           trainer_mod2.step(batch_size)
   
           curr_loss = nd.mean(loss).asscalar()
           moving_loss = (curr_loss if ((i == 0) and (e == 0))
                          else (1 - smoothing_constant) * moving_loss + (smoothing_constant) * curr_loss)
   
       test_accuracy = evaluate_accuracy(test_data2, lambda x: net_mod2(net_shared(x)))
       train_accuracy = evaluate_accuracy(train_data2, lambda x: net_mod2(net_shared(x)))
       print("Mod2: Epoch %s. Loss: %s, Train_acc %s, Test_acc %s" % (e, moving_loss, train_accuracy, test_accuracy))
   
   print("\n#### After Training ####")
   test_accuracy = evaluate_accuracy(test_data1, lambda x: net_mod1(net_shared(x)))
   train_accuracy = evaluate_accuracy(train_data1, lambda x :net_mod1(net_shared(x)))
   print("Mod1: Train_acc %s, Test_acc %s" % (train_accuracy, test_accuracy))
   test_accuracy = evaluate_accuracy(test_data2, lambda x: net_mod2(net_shared(x)))
   train_accuracy = evaluate_accuracy(train_data2, lambda x :net_mod2(net_shared(x)))
   print("Mod2: Train_acc %s, Test_acc %s" % (train_accuracy, test_accuracy))
   ```
   
   I got a strange result the second network was not able to train correctly :
   ```
   #### Before Training ####
   Mod1: Train_acc 0.0809166666667, Test_acc 0.0823
   Mod2: Train_acc 0.109316666667, Test_acc 0.1107
   
   #### Shared+Module1 Training ####
   Mod1: Epoch 0. Loss: 0.545734875332, Train_acc 0.8884, Test_acc 0.8916
   Mod1: Epoch 1. Loss: 0.320320736991, Train_acc 0.912733333333, Test_acc 0.9177
   Mod1: Epoch 2. Loss: 0.27030488556, Train_acc 0.9227, Test_acc 0.9259
   Mod1: Epoch 3. Loss: 0.244677347026, Train_acc 0.936083333333, Test_acc 0.9359
   
   #### Shared+Module2 Result after Mod1 Training ####
   Mod2: Train_acc 0.101766666667, Test_acc 0.1
   
   #### Shared+Module2 Training ####
   Mod2: Epoch 0. Loss: 1.50810727371, Train_acc 0.38875, Test_acc 0.3831
   Mod2: Epoch 1. Loss: 1.48087985857, Train_acc 0.389916666667, Test_acc 0.3838
   Mod2: Epoch 2. Loss: 1.47309765809, Train_acc 0.390383333333, Test_acc 0.3848
   Mod2: Epoch 3. Loss: 1.46358907821, Train_acc 0.391316666667, Test_acc 0.386
   
   #### After Training ####
   Mod1: Train_acc 0.900266666667, Test_acc 0.9
   Mod2: Train_acc 0.391316666667, Test_acc 0.386
   ```
   
   
 
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services