You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2018/12/14 17:08:35 UTC
[GitHub] kaleidoscopical edited a comment on issue #13650: speed drop when
set `multi_precision=True`
kaleidoscopical edited a comment on issue #13650: speed drop when set `multi_precision=True`
URL: https://github.com/apache/incubator-mxnet/issues/13650#issuecomment-447388839
The script I have tried is simplified and posted below. I think the only required modification is to change `path_imgrec` and `path_imgidx` before running. Performance is dropped from ~800img/sec to ~650img/sec when training with 4 1080Ti
```python
import mxnet as mx
import numpy as np
import argparse
import os
import logging
class ResNetV1D(object):
def __init__(self, num_outputs=1000, workspace=1024,
bn_mom=0.9, bn_eps=1e-5):
super(ResNetV1D, self).__init__()
self.num_outputs = num_outputs
self.workspace = workspace
self.bn_mom = bn_mom
self.bn_eps = bn_eps
def conv3x3(self, data, name, num_filter, stride=None):
if stride is None:
output = mx.sym.Convolution(data=data, num_filter=num_filter,
kernel=(3, 3), stride=(1,1), pad=(1, 1),
no_bias=True, name=name, workspace=self.workspace)
else:
assert isinstance(stride, int), "stride should be int"
output = mx.sym.Convolution(data=data, num_filter=num_filter,
kernel=(3, 3), stride=(stride, stride), pad=(1, 1),
no_bias=True, name=name, workspace=self.workspace)
return output
def conv1x1(self, data, name, num_filter):
output = mx.sym.Convolution(data=data, num_filter=num_filter,
kernel=(1, 1), stride=(1, 1), pad=(0, 0),
no_bias=True, name=name, workspace=self.workspace)
return output
def bn(self, data, name, fix_gamma=False, last_gamma=False):
# config gamma and beta
if not last_gamma:
gamma = mx.sym.Variable(name=name+'_gamma', wd_mult=0)
else:
gamma = mx.sym.Variable(name=name+'_gamma', wd_mult=0, init=mx.init.Zero())
beta = mx.sym.Variable(name=name+'_beta', wd_mult=0)
# run bn
output = mx.sym.BatchNorm(data=data, gamma=gamma, beta=beta,
fix_gamma=fix_gamma, eps=self.bn_eps,
momentum=self.bn_mom, name=name)
return output
def relu(self, data, name):
output = mx.sym.Activation(data=data, act_type='relu', name=name)
return output
def pooling(self, data, name, pool_type, kernel=2, stride=2, pad=0, global_pool=False):
if not global_pool:
assert isinstance(kernel, int), "kernel should be int"
assert isinstance(stride, int), "stride should be int"
assert isinstance(pad, int), "pad should be int"
output = mx.symbol.Pooling(data=data, pool_type=pool_type,
kernel=(kernel, kernel), stride=(stride, stride),
pad=(pad, pad), name=name)
else:
output = mx.symbol.Pooling(data=data, global_pool=True,
kernel=(kernel, kernel),
pool_type=pool_type, name=name)
return output
def fc(self, data, name, num_hidden):
bias = mx.sym.Variable(name=name+'_bias', wd_mult=0)
output = mx.symbol.FullyConnected(data=data, bias=bias,
num_hidden=num_hidden, name=name)
return output
def build_stem(self, data):
# conv1
output = self.conv3x3(data=data, name="stage0_conv1", num_filter=32, stride=2)
output = self.bn(output, name="stage0_bn1")
output = self.relu(output, name="stage0_relu1")
# conv2
output = self.conv3x3(data=output, name="stage0_conv2", num_filter=32)
output = self.bn(output, name="stage0_bn2")
output = self.relu(output, name="stage0_relu2")
# conv3
output = self.conv3x3(data=output, name="stage0_conv3", num_filter=64)
output = self.bn(output, name="stage0_bn3")
output = self.relu(output, name="stage0_relu3")
# max_pooling
output = self.pooling(data=output, name="stage0_max_pooling",
pool_type='max', kernel=3, stride=2, pad=1)
return output
def build_block(self, data, name, channel_num, stride=1, down_sample=False):
# downsample
if down_sample:
if stride != 1:
downsample = self.pooling(data, name=name+"_avg_pool",
pool_type='avg', kernel=2, stride=2, pad=0)
else:
downsample = data
downsample = self.conv1x1(downsample, name=name+"_downsample_conv",
num_filter=channel_num*4)
downsample = self.bn(downsample, name=name+"_downsample_bn")
else:
downsample = data
# conv1
output = self.conv1x1(data, name=name+"_conv1", num_filter=channel_num)
output = self.bn(output, name=name+"_bn1")
output = self.relu(output, name=name+"_relu1")
# conv2
output = self.conv3x3(output, name=name+"_conv2",
num_filter=channel_num, stride=stride)
output = self.bn(output, name=name+"_bn2")
output = self.relu(output, name=name+"_relu2")
# conv3
output = self.conv1x1(output, name=name+"_conv3", num_filter=channel_num*4)
output = self.bn(output, name=name+"_bn3", last_gamma=True)
output = self.relu(output + downsample, name=name+"_relu3")
return output
def build_stage(self, data, name, channel_num, block_num, stride=2):
# config
assert isinstance(name, str), "name should be str"
# first block
output = self.build_block(data, name=name+"_block1", channel_num=channel_num,
stride=stride, down_sample=True)
# rest block
for i in range(1, block_num):
output = self.build_block(output, name=name+"_block"+str(i+1),
channel_num=channel_num)
return output
def build(self):
data = mx.sym.Variable(name="data")
label = mx.sym.Variable(name="softmax_label")
data = mx.sym.Cast(data=data, dtype=np.float16)
stage0 = self.build_stem(data)
stage1 = self.build_stage(stage0, name="stage1", channel_num=64, block_num=3, stride=1)
stage2 = self.build_stage(stage1, name="stage2", channel_num=128, block_num=4)
stage3 = self.build_stage(stage2, name="stage3", channel_num=256, block_num=6)
stage4 = self.build_stage(stage3, name="stage4", channel_num=512, block_num=3)
output = self.pooling(stage4, name="global_pooling", pool_type="avg", global_pool=True)
output = mx.symbol.Flatten(data=output)
output = self.fc(output, name="classifier", num_hidden=self.num_outputs)
output = mx.sym.Cast(data=output, dtype=np.float32)
output = mx.sym.SoftmaxOutput(data=output, label=label,
name="softmax_output", smooth_alpha=0.1)
return output
# config
parser = argparse.ArgumentParser(description='Train a model for image classification.')
parser.add_argument('--logging-file', type=str, default='train_imagenet.log',
help='name of training log file')
parser.add_argument('--save-dir', type=str, default='params',
help='directory of saved models')
opt = parser.parse_args()
filehandler = logging.FileHandler(opt.logging_file)
streamhandler = logging.StreamHandler()
logger = logging.getLogger('')
logger.setLevel(logging.INFO)
logger.addHandler(filehandler)
logger.addHandler(streamhandler)
logger.info(opt)
batch_size = 128 * 4
classes = 1000
num_training_samples = 1281167
begin_epoch = 0
epoch = 120
lr = 0.2
wd = 0.0001
warmup_epoch = 5
ctx = [mx.gpu(i) for i in range(4)]
frequent = 50
# lr_scheduler
num_batches = num_training_samples // batch_size
max_update = num_batches * epoch
warmup_steps = num_batches * warmup_epoch
lr_scheduler = mx.lr_scheduler.CosineScheduler(
max_update = max_update,
base_lr = lr,
final_lr = 0.,
warmup_steps = warmup_steps,
warmup_begin_lr = 0.,
warmup_mode = "linear"
)
# symbol
model = ResNetV1D()
symbol = model.build()
# iterator
train_iter = mx.io.ImageRecordIter(
path_imgrec = "data/imagenet/train.rec",
path_imgidx = "data/imagenet/train.idx",
data_shape = (3, 224, 224),
preprocess_threads = 60,
shuffle = True,
batch_size = batch_size,
random_resized_crop = True,
max_aspect_ratio = 4 / 3.,
min_aspect_ratio = 3 / 4.,
max_random_area = 1.,
min_random_area = 0.08,
brightness = 0.4,
saturation = 0.4,
contrast = 0.4,
pca_noise = 0.1,
random_mirror = True,
mean_r = 123.68,
mean_g = 116.779,
mean_b = 103.939,
std_r = 58.393,
std_g = 57.12,
std_b = 57.375,
)
val_iter = mx.io.ImageRecordIter(
path_imgrec = "data/imagenet/val.rec",
path_imgidx = "data/imagenet/val.idx",
data_shape = (3, 224, 224),
preprocess_threads = 60,
shuffle = False,
batch_size = batch_size,
resize = 256,
mean_r = 123.68,
mean_g = 116.779,
mean_b = 103.939,
std_r = 58.393,
std_g = 57.12,
std_b = 57.375,
)
# module
data_names = [k[0] for k in train_iter.provide_data]
label_names = [k[0] for k in train_iter.provide_label]
mod = mx.mod.Module(symbol, data_names=data_names,
label_names=label_names, logger=logger, context=ctx)
# metric
eval_metrics = mx.metric.CompositeEvalMetric()
for child_metric in [mx.metric.Accuracy()]:
eval_metrics.add(child_metric)
# callback
batch_end_callback = mx.callback.Speedometer(train_iter.batch_size, frequent=frequent, auto_reset=False)
epoch_end_callback = mx.callback.do_checkpoint(opt.save_dir)
# optimizer
optimizer_params = {'momentum': 0.9,
'wd': wd,
'lr_scheduler': lr_scheduler,
'multi_precision': True}
# fit
mod.fit(train_iter, val_iter, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
batch_end_callback=batch_end_callback, optimizer='nag', optimizer_params=optimizer_params,
initializer=mx.initializer.MSRAPrelu(), begin_epoch=begin_epoch, num_epoch=epoch)
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services