You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@mxnet.apache.org by GitBox <gi...@apache.org> on 2020/11/10 01:23:23 UTC
[GitHub] [incubator-mxnet] wy3406 commented on issue #19498: SyncBN causes the memory to gradually increase with iteration
wy3406 commented on issue #19498:
URL: https://github.com/apache/incubator-mxnet/issues/19498#issuecomment-724387973
@leezu In the following example, nvidia-smi shows that the memory grows slowly as the iteration progresses
```
from tqdm import tqdm
from mxnet import gluon, autograd
from mxnet.gluon import nn
from mxnet.gluon.data import dataset
from gluoncv.utils.parallel import DataParallelCriterion,DataParallelModel
import mxnet.ndarray as nd
import mxnet as mx
import numpy as np
class Activation(nn.HybridBlock):
"""Activation function used in MobileNetV3"""
def __init__(self, act_func, **kwargs):
super(Activation, self).__init__(**kwargs)
if act_func == "relu":
self.act = nn.Activation('relu')
elif act_func == "relu6":
self.act = ReLU6()
elif act_func == "hard_sigmoid":
self.act = HardSigmoid()
elif act_func == "swish":
self.act = nn.Swish()
elif act_func == "leaky":
self.act = nn.LeakyReLU(alpha=0.375)
else:
raise NotImplementedError
def hybrid_forward(self, F, x):
return self.act(x)
def ConvBlock(in_channels,out_channels,
kernel_size=1,strides=1,padding=0,num_groups=1,
use_act=True,act_type='relu',
name_prefix='ConvBlock_Act_',
use_bias=False,
conv2d=nn.Conv2D,
norm_layer=nn.BatchNorm,norm_kwargs=None):
out = nn.HybridSequential()
with out.name_scope():
out.add(conv2d(in_channels=in_channels,channels=out_channels,kernel_size=kernel_size,strides=strides,padding=padding,use_bias=use_bias,groups=num_groups)
,norm_layer(in_channels=out_channels,**({} if norm_kwargs is None else norm_kwargs))
)
if use_act:
out.add(Activation(act_type))
return out
class Net(nn.HybridBlock):
def __init__(self,norm_layer,norm_kwargs):
super(Net, self).__init__(prefix='')
self.features= nn.HybridSequential()
self.features.add(ConvBlock(3,256,
kernel_size=3,strides=1,padding=1,num_groups=1,
use_act=True,act_type='relu',
name_prefix='ConvBlock_Act_',
use_bias=False,
conv2d=nn.Conv2D,
norm_layer=norm_layer,norm_kwargs=norm_kwargs),
ConvBlock(256,512,
kernel_size=3,strides=2,padding=1,num_groups=1,
use_act=True,act_type='relu',
name_prefix='ConvBlock_Act_',
use_bias=False,
conv2d=nn.Conv2D,
norm_layer=norm_layer,norm_kwargs=norm_kwargs),
ConvBlock(512,512,
kernel_size=3,strides=2,padding=1,num_groups=1,
use_act=True,act_type='relu',
name_prefix='ConvBlock_Act_',
use_bias=False,
conv2d=nn.Conv2D,
norm_layer=norm_layer,norm_kwargs=norm_kwargs),
ConvBlock(512,512,
kernel_size=3,strides=2,padding=1,num_groups=1,
use_act=True,act_type='relu',
name_prefix='ConvBlock_Act_',
use_bias=False,
conv2d=nn.Conv2D,
norm_layer=norm_layer,norm_kwargs=norm_kwargs),
ConvBlock(512,1024,
kernel_size=3,strides=2,padding=1,num_groups=1,
use_act=True,act_type='relu',
name_prefix='ConvBlock_Act_',
use_bias=False,
conv2d=nn.Conv2D,
norm_layer=norm_layer,norm_kwargs=norm_kwargs),
ConvBlock(1024,1024,
kernel_size=3,strides=2,padding=1,num_groups=1,
use_act=True,act_type='relu',
name_prefix='ConvBlock_Act_',
use_bias=False,
conv2d=nn.Conv2D,
norm_layer=norm_layer,norm_kwargs=norm_kwargs),
)
self.features.add(nn.GlobalAvgPool2D())
self.features.add(nn.Flatten())
self.fc = nn.Dense(1, in_units=1024, use_bias=False)
def hybrid_forward(self,F, x):
x=self.features(x)
out = self.fc(x)
return out
class TestData(dataset.Dataset):
def __init__(self,):
self.Number=1e5
def __len__(self):
return self.Number
def __getitem__(self, idx):
inp,tag=self.gen_data()
inp=nd.array(inp,dtype=np.float32)
tag=nd.array(tag,dtype=np.float32)
return inp,tag
def gen_data(self):
X = np.random.randn(3*512*512,1).reshape(3,512,512)
Y =np.random.randn(1)
return X, Y
ngpus=4
_ctx=[mx.gpu(i) for i in range(ngpus)]
_batch_size=20
norm_kwargs ={'num_devices': ngpus}
usesyncbn=True
model=Net(norm_layer=mx.gluon.contrib.nn.SyncBatchNorm,norm_kwargs=norm_kwargs)
model.initialize(mx.init.MSRAPrelu(),ctx=_ctx)
net = DataParallelModel(model,_ctx, usesyncbn)
criterion = DataParallelCriterion(mx.gluon.loss.L1Loss(), _ctx, usesyncbn)
update_params=net.module.collect_params()
optimizer=mx.gluon.Trainer(update_params,'adam',{'learning_rate': 0.001},mx.kvstore.create())
train_dataset=TestData()
train_data = gluon.data.DataLoader(train_dataset, _batch_size,
shuffle=True, last_batch='rollover',
num_workers=4,
pin_memory=False)
for j in range(1000):
tbar=tqdm(train_data)
for i, idatas in enumerate(tbar):
with autograd.record(True):
ipt,targ=idatas
oupt=net(ipt)
losses=criterion(oupt,targ)
mx.nd.waitall()
autograd.backward(losses)
optimizer.step(_batch_size)
tbar.set_description()
mx.nd.waitall()
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@mxnet.apache.org
For additional commands, e-mail: issues-help@mxnet.apache.org