You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/01/13 01:55:29 UTC
[GitHub] fedorzh opened a new issue #9410: Training with the same parameters and seed gets significantly different results
fedorzh opened a new issue #9410: Training with the same parameters and seed gets significantly different results
URL: https://github.com/apache/incubator-mxnet/issues/9410
Train with the same parameters and seeds, get different results. Repro:
```
# coding: utf-8
# In[1]:
import mxnet as mx
from mxnet import nd, gluon, autograd, ndarray
import numpy as np
import random
def transform(data, label):
return [dat.astype(np.float32) for dat in data], [lab.astype(np.float32) for lab in label]
train_cifar_gluon = gluon.data.vision.CIFAR10(train=True, transform=transform)
test_cifar_gluon = gluon.data.vision.CIFAR10(train=False, transform=transform)
def convert_gluon_dataset_to_numpy(data):
ds = data[:][0][0].shape
X = np.empty((len(data[:][0]), ds[2], ds[0], ds[1]), dtype=np.float32)
for i, example in enumerate(data[:][0]):
X[i, :] = np.rollaxis(example.asnumpy(),2)
y = np.array(data[:][1])
return X, y
X, y = convert_gluon_dataset_to_numpy(train_cifar_gluon)
X_test, y_test = convert_gluon_dataset_to_numpy(test_cifar_gluon)
# In[2]:
def predict_scores(net, X_, batch_size, context):
scores = None
test_loaded = gluon.data.DataLoader(mx.nd.array(X_), batch_size, shuffle=False)
for data in test_loaded:
data = data.as_in_context(context)
output = net(data).asnumpy()
if scores is None:
scores = output
else:
scores = np.append(scores, output, axis=0)
return scores
# In[3]:
gpu_count = 1
_ctx_list = [mx.gpu(i) for i in range(gpu_count)]
_batch_size=64
epochs=1
_seed=42
_optimizer='sgd'
_learning_rate=0.1
_xavier_magnitude=2.
_momentum=0.9
_wd=0.0001
_nclasses=10
# ### Try 1
# In[4]:
random.seed(_seed)
mx.random.seed(_seed)
np.random.seed(_seed)
# In[5]:
net = gluon.model_zoo.vision.get_model('resnet34_v2', pretrained=False, classes=_nclasses, ctx=_ctx_list)
loss = gluon.loss.SoftmaxCrossEntropyLoss()
# In[6]:
net.collect_params().initialize(mx.init.Xavier(magnitude=_xavier_magnitude), ctx=_ctx_list, force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), _optimizer,
optimizer_params=dict(learning_rate=_learning_rate, momentum=_momentum,
wd=_wd),
kvstore='device' if len(_ctx_list) > 0 else 'local')
train_data = mx.io.NDArrayIter(X, label=y, batch_size=_batch_size)
for e in range(epochs):
train_data.reset()
for batch in train_data:
cur_contexts = _ctx_list
if batch.data[0].shape[0] < len(_ctx_list):
cur_contexts = cur_contexts[:batch.data[0].shape[0]]
data = gluon.utils.split_and_load(batch.data[0], ctx_list=cur_contexts, batch_axis=0, even_split=False)
label = gluon.utils.split_and_load(batch.label[0], ctx_list=cur_contexts, batch_axis=0, even_split=False)
Ls = []
with autograd.record(): # Start recording the derivatives
for x_cur, y_cur in zip(data, label):
L = loss(net(x_cur), y_cur)
# store the loss and do backward after we have done forward
# on all GPUs for better speed on multiple GPUs.
Ls.append(L)
for L in Ls:
L.backward()
trainer.step(batch.data[0].shape[0])
scores_test = predict_scores(net, X_test, _batch_size, _ctx_list[0])
predictions_test = np.argmax(scores_test, axis=1)
accuracy = np.mean(predictions_test == y_test)
print('[Epoch %d] accuracy=%f' % (e, accuracy))
# ### Try 2
# In[7]:
random.seed(_seed)
mx.random.seed(_seed)
np.random.seed(_seed)
# In[8]:
net = gluon.model_zoo.vision.get_model('resnet34_v2', pretrained=False, classes=_nclasses, ctx=_ctx_list)
loss = gluon.loss.SoftmaxCrossEntropyLoss()
# In[9]:
net.collect_params().initialize(mx.init.Xavier(magnitude=_xavier_magnitude), ctx=_ctx_list, force_reinit=True)
trainer = gluon.Trainer(net.collect_params(), _optimizer,
optimizer_params=dict(learning_rate=_learning_rate, momentum=_momentum,
wd=_wd),
kvstore='device' if len(_ctx_list) > 0 else 'local')
train_data = mx.io.NDArrayIter(X, label=y, batch_size=_batch_size)
for e in range(epochs):
train_data.reset()
for batch in train_data:
cur_contexts = _ctx_list
if batch.data[0].shape[0] < len(_ctx_list):
cur_contexts = cur_contexts[:batch.data[0].shape[0]]
data = gluon.utils.split_and_load(batch.data[0], ctx_list=cur_contexts, batch_axis=0, even_split=False)
label = gluon.utils.split_and_load(batch.label[0], ctx_list=cur_contexts, batch_axis=0, even_split=False)
Ls = []
with autograd.record(): # Start recording the derivatives
for x_cur, y_cur in zip(data, label):
L = loss(net(x_cur), y_cur)
# store the loss and do backward after we have done forward
# on all GPUs for better speed on multiple GPUs.
Ls.append(L)
for L in Ls:
L.backward()
trainer.step(batch.data[0].shape[0])
scores_test = predict_scores(net, X_test, _batch_size, _ctx_list[0])
predictions_test = np.argmax(scores_test, axis=1)
accuracy = np.mean(predictions_test == y_test)
print('[Epoch %d] accuracy=%f' % (e, accuracy))
```
Output:
```
[Epoch 0] accuracy=0.346900
[Epoch 0] accuracy=0.281900
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services