You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2020/08/18 03:19:30 UTC
[GitHub] [incubator-mxnet] kohillyang edited a comment on issue #18902: Got "kFlag == type_flag_: TBlob.get_with_shape: data type do not match specified type.Expected: 0 v.s. given 2" when training with amp.
kohillyang edited a comment on issue #18902:
URL: https://github.com/apache/incubator-mxnet/issues/18902#issuecomment-671957553
@szha The following codes can reproduce the above error.
```bash
from __future__ import print_function
import mxnet as mx
import mxnet.autograd as ag
import numpy as np
import gluoncv
class resnet(mx.gluon.nn.HybridBlock):
def __init__(self):
super(resnet, self).__init__()
self.feat = gluoncv.model_zoo.resnet50_v1b(pretrained=False)
def hybrid_forward(self, F, x):
input = F.transpose(x, (0, 3, 1, 2))
x = input / 255.0
x = self.feat.conv1(x)
x = self.feat.bn1(x)
x = self.feat.relu(x)
x = self.feat.maxpool(x)
res2 = self.feat.layer1(x)
res3 = self.feat.layer2(res2)
res4 = self.feat.layer3(res3)
res5 = self.feat.layer4(res4)
return res5
def train_net():
mx.random.seed(3)
np.random.seed(3)
ctx_list = [mx.gpu(0)]
net = resnet()
net.initialize()
net.collect_params().reset_ctx(list(set(ctx_list)))
if True:
from mxnet.contrib import amp
amp.init()
net.cast("float16")
# net.collect_params('.*batchnorm.*').setattr('dtype', 'float32')
trainer = mx.gluon.Trainer(
net.collect_params(), # fix batchnorm, fix first stage, etc...
'sgd',
{'wd': 1e-4,
'momentum': .9,
'clip_gradient': None,
'lr_scheduler': None,
'multi_precision': True,
},
update_on_kvstore=(False if True else None), kvstore=mx.kvstore.create('local')
)
if True:
amp.init_trainer(trainer)
with ag.record():
data = mx.nd.zeros(shape=(1, 368, 368, 3), ctx=ctx_list[0])
fpn_predictions = net(data)
preds = mx.nd.concat(*[x.reshape((0, 0, -1)) for x in fpn_predictions], dim=2)
with amp.scale_loss(preds.sum(), trainer) as scaled_losses:
scaled_losses.backward()
trainer.step(1, ignore_stale_grad=False)
if __name__ == '__main__':
train_net()
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org