You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/12/14 17:08:35 UTC

[GitHub] kaleidoscopical edited a comment on issue #13650: speed drop when set `multi_precision=True`

kaleidoscopical edited a comment on issue #13650: speed drop when set `multi_precision=True`
URL: https://github.com/apache/incubator-mxnet/issues/13650#issuecomment-447388839
 
 
   The script I have tried is simplified and posted below. I think the only required modification is to change `path_imgrec` and `path_imgidx` before running. Performance is dropped from ~800img/sec to ~650img/sec when training with 4 1080Ti
   
   ```python
   import mxnet as mx 
   import numpy as np 
   import argparse
   import os
   import logging
   
   class ResNetV1D(object):
   	def __init__(self, num_outputs=1000, workspace=1024, 
   				 bn_mom=0.9, bn_eps=1e-5):
   		super(ResNetV1D, self).__init__()
   		self.num_outputs = num_outputs
   		self.workspace = workspace
   		self.bn_mom = bn_mom
   		self.bn_eps = bn_eps
   
   	def conv3x3(self, data, name, num_filter, stride=None):
   		if stride is None:
   			output = mx.sym.Convolution(data=data, num_filter=num_filter, 
   										kernel=(3, 3), stride=(1,1), pad=(1, 1),
   										no_bias=True, name=name, workspace=self.workspace)
   		else:
   			assert isinstance(stride, int), "stride should be int"
   			output = mx.sym.Convolution(data=data, num_filter=num_filter, 
   										kernel=(3, 3), stride=(stride, stride), pad=(1, 1),
   										no_bias=True, name=name, workspace=self.workspace)
   		return output
   
   	def conv1x1(self, data, name, num_filter):
   		output = mx.sym.Convolution(data=data, num_filter=num_filter, 
   									kernel=(1, 1), stride=(1, 1), pad=(0, 0),
   									no_bias=True, name=name, workspace=self.workspace)
   		return output
   
   	def bn(self, data, name, fix_gamma=False, last_gamma=False):
   		# config gamma and beta
   		if not last_gamma:
   			gamma = mx.sym.Variable(name=name+'_gamma', wd_mult=0)
   		else:
   			gamma = mx.sym.Variable(name=name+'_gamma', wd_mult=0, init=mx.init.Zero())
   		beta = mx.sym.Variable(name=name+'_beta', wd_mult=0)
   		# run bn
   		output = mx.sym.BatchNorm(data=data, gamma=gamma, beta=beta,
   								  fix_gamma=fix_gamma, eps=self.bn_eps, 
   								  momentum=self.bn_mom, name=name)
   		return output
   
   	def relu(self, data, name):
   		output = mx.sym.Activation(data=data, act_type='relu', name=name)
   
   		return output
   
   	def pooling(self, data, name, pool_type, kernel=2, stride=2, pad=0, global_pool=False):
   		if not global_pool:
   			assert isinstance(kernel, int), "kernel should be int"
   			assert isinstance(stride, int), "stride should be int"
   			assert isinstance(pad, int), "pad should be int"
   			output = mx.symbol.Pooling(data=data, pool_type=pool_type,
   									   kernel=(kernel, kernel), stride=(stride, stride),
   									   pad=(pad, pad), name=name)
   		else:
   			output = mx.symbol.Pooling(data=data, global_pool=True, 
   									   kernel=(kernel, kernel),
   									   pool_type=pool_type, name=name)
   		return output
   
   	def fc(self, data, name, num_hidden):
   		bias = mx.sym.Variable(name=name+'_bias', wd_mult=0)
   		output = mx.symbol.FullyConnected(data=data, bias=bias,
   										  num_hidden=num_hidden, name=name)
   		return output
   
   	def build_stem(self, data):
   		# conv1
   		output = self.conv3x3(data=data, name="stage0_conv1", num_filter=32, stride=2)
   		output = self.bn(output, name="stage0_bn1")
   		output = self.relu(output, name="stage0_relu1")
   		# conv2
   		output = self.conv3x3(data=output, name="stage0_conv2", num_filter=32)
   		output = self.bn(output, name="stage0_bn2")
   		output = self.relu(output, name="stage0_relu2")
   		# conv3
   		output = self.conv3x3(data=output, name="stage0_conv3", num_filter=64)
   		output = self.bn(output, name="stage0_bn3")
   		output = self.relu(output, name="stage0_relu3")
   		# max_pooling
   		output = self.pooling(data=output, name="stage0_max_pooling",
   							  pool_type='max', kernel=3, stride=2, pad=1)
   		return output
   
   	def build_block(self, data, name, channel_num, stride=1, down_sample=False):
   		# downsample
   		if down_sample:
   			if stride != 1:
   				downsample = self.pooling(data, name=name+"_avg_pool",
   										  pool_type='avg', kernel=2, stride=2, pad=0)
   			else:
   				downsample = data
   			downsample = self.conv1x1(downsample, name=name+"_downsample_conv", 
   									  num_filter=channel_num*4)
   			downsample = self.bn(downsample, name=name+"_downsample_bn")
   		else:
   			downsample = data
   		# conv1
   		output = self.conv1x1(data, name=name+"_conv1", num_filter=channel_num)
   		output = self.bn(output, name=name+"_bn1")
   		output = self.relu(output, name=name+"_relu1")
   		# conv2
   		output = self.conv3x3(output, name=name+"_conv2", 
   							  num_filter=channel_num, stride=stride)
   		output = self.bn(output, name=name+"_bn2")
   		output = self.relu(output, name=name+"_relu2")
   		# conv3
   		output = self.conv1x1(output, name=name+"_conv3", num_filter=channel_num*4)
   		output = self.bn(output, name=name+"_bn3", last_gamma=True)
   		output = self.relu(output + downsample, name=name+"_relu3")
   
   		return output
   
   	def build_stage(self, data, name, channel_num, block_num, stride=2):
   		# config
   		assert isinstance(name, str), "name should be str"
   		# first block
   		output = self.build_block(data, name=name+"_block1", channel_num=channel_num, 
   								  stride=stride, down_sample=True)
   		# rest block
   		for i in range(1, block_num):
   			output = self.build_block(output, name=name+"_block"+str(i+1),
   									  channel_num=channel_num)
   		return output
   
   	def build(self):
   		data = mx.sym.Variable(name="data")
   		label = mx.sym.Variable(name="softmax_label")
   		data = mx.sym.Cast(data=data, dtype=np.float16)
   		stage0 = self.build_stem(data)
   		stage1 = self.build_stage(stage0, name="stage1", channel_num=64, block_num=3, stride=1)
   		stage2 = self.build_stage(stage1, name="stage2", channel_num=128, block_num=4)
   		stage3 = self.build_stage(stage2, name="stage3", channel_num=256, block_num=6)
   		stage4 = self.build_stage(stage3, name="stage4", channel_num=512, block_num=3)
   		output = self.pooling(stage4, name="global_pooling", pool_type="avg", global_pool=True)
   		output = mx.symbol.Flatten(data=output)
   		output = self.fc(output, name="classifier", num_hidden=self.num_outputs)
   		output = mx.sym.Cast(data=output, dtype=np.float32)
   		output = mx.sym.SoftmaxOutput(data=output, label=label, 
   									  name="softmax_output", smooth_alpha=0.1)
   		return output
   
   # config
   parser = argparse.ArgumentParser(description='Train a model for image classification.')
   parser.add_argument('--logging-file', type=str, default='train_imagenet.log',
                       help='name of training log file')
   parser.add_argument('--save-dir', type=str, default='params',
                       help='directory of saved models')
   opt = parser.parse_args()
   
   filehandler = logging.FileHandler(opt.logging_file)
   streamhandler = logging.StreamHandler()
   
   logger = logging.getLogger('')
   logger.setLevel(logging.INFO)
   logger.addHandler(filehandler)
   logger.addHandler(streamhandler)
   
   logger.info(opt)
   
   batch_size = 128 * 4
   classes = 1000
   num_training_samples = 1281167
   begin_epoch = 0
   epoch = 120
   lr = 0.2
   wd = 0.0001
   warmup_epoch = 5
   ctx = [mx.gpu(i) for i in range(4)]
   frequent = 50
   
   
   # lr_scheduler
   num_batches = num_training_samples // batch_size
   max_update = num_batches * epoch
   warmup_steps = num_batches * warmup_epoch
   lr_scheduler = mx.lr_scheduler.CosineScheduler(
   		max_update		=	max_update,
   		base_lr			=	lr,
   		final_lr		=	0.,
   		warmup_steps	= 	warmup_steps,
   		warmup_begin_lr	= 	0.,
   		warmup_mode		=	"linear"
   		)
   
   # symbol
   model = ResNetV1D()
   symbol = model.build()
   
   # iterator
   train_iter = mx.io.ImageRecordIter(
   		path_imgrec			=	"data/imagenet/train.rec",
   		path_imgidx			=	"data/imagenet/train.idx",
   		data_shape			=	(3, 224, 224),
   		preprocess_threads	=	60,
   		shuffle				=	True,
   		batch_size			=	batch_size,
   		random_resized_crop =	True,
   		max_aspect_ratio 	=	4 / 3.,
   		min_aspect_ratio 	= 	3 / 4.,
   		max_random_area		= 	1.,
   		min_random_area		= 	0.08,
   		brightness			= 	0.4, 
   		saturation			= 	0.4, 
   		contrast			=	0.4,
   		pca_noise			=	0.1,
   		random_mirror		=	True,
   		mean_r				=	123.68,
   		mean_g				= 	116.779,
   		mean_b				=	103.939,
   		std_r				=	58.393,
   		std_g				=	57.12,
   		std_b				=	57.375,	
   		)
   
   val_iter = mx.io.ImageRecordIter(
   		path_imgrec			=	"data/imagenet/val.rec",
   		path_imgidx			=	"data/imagenet/val.idx",
   		data_shape			=	(3, 224, 224),
   		preprocess_threads	= 	60,
   		shuffle				=	False,
   		batch_size			= 	batch_size,
   		resize 				= 	256, 
   		mean_r				=	123.68,
   		mean_g				= 	116.779,
   		mean_b				=	103.939,
   		std_r				=	58.393,
   		std_g				=	57.12,
   		std_b				=	57.375,		
   		)
   
   # module
   data_names = [k[0] for k in train_iter.provide_data]
   label_names = [k[0] for k in train_iter.provide_label]
   mod = mx.mod.Module(symbol, data_names=data_names, 
   					label_names=label_names, logger=logger, context=ctx)
   
   # metric
   eval_metrics = mx.metric.CompositeEvalMetric()
   for child_metric in [mx.metric.Accuracy()]:
   	eval_metrics.add(child_metric)
   
   # callback
   batch_end_callback = mx.callback.Speedometer(train_iter.batch_size, frequent=frequent, auto_reset=False)
   epoch_end_callback = mx.callback.do_checkpoint(opt.save_dir)
   
   # optimizer
   optimizer_params = {'momentum': 0.9,
   					'wd': wd,
   					'lr_scheduler': lr_scheduler,
   					'multi_precision': True}
   
   
   # fit
   mod.fit(train_iter, val_iter, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
   		batch_end_callback=batch_end_callback, optimizer='nag', optimizer_params=optimizer_params,
   		initializer=mx.initializer.MSRAPrelu(), begin_epoch=begin_epoch, num_epoch=epoch)
   
   
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services