You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2017/12/11 14:16:35 UTC
[GitHub] dbsxdbsx opened a new issue #9026: why is it so slow (MXNET0.12)even with NVIDIA V100 GPU?
dbsxdbsx opened a new issue #9026: why is it so slow (MXNET0.12)even with NVIDIA V100 GPU?
URL: https://github.com/apache/incubator-mxnet/issues/9026
I test my py on AWS EC2 P3.2xlarge(GPU:V100), the .py is as follow:
```
'''this is a py used to predict capts num ranging from 4 to 10~
Therefore, the DataIter should predict capt examples of different size, even in a batch_size'''
font_name = 'segoeuib.ttf'
import sys
sys.path.insert(0, "../../python")
sys.path.append('../')
sys.path.append('../../')
import mxnet as mx
import numpy as np
import cv2, random
from captcha.image import ImageCaptcha
class OCRBatch(object):
def __init__(self, data_names, data, label_names, label):
self.data = data
self.label = label
self.data_names = data_names
self.label_names = label_names
@property
def provide_data(self):
return [(n, x.shape) for n, x in zip(self.data_names, self.data)]
@property
def provide_label(self):
return [(n, x.shape) for n, x in zip(self.label_names, self.label)]
def gen_rand(capt_num):
buf = ""
for i in range(capt_num):
buf += str(random.randint(0, 9))
return buf
def get_label(capt_str, capt_max_num):
a = [int(x) for x in capt_str]
# for max 10 label, if the capt_num is less than 10, fulfill with -1
min_pos = len(a)
for i in range(min_pos, capt_max_num):
a.append(11) # -1
return np.array(a)
def gen_sample(captcha, width, height, capt_num):
num = gen_rand(capt_num)
img = captcha.generate(num)
img = np.fromstring(img.getvalue(), dtype='uint8')
img = cv2.imdecode(img, cv2.IMREAD_COLOR)
img = cv2.resize(img, (width, height))
img = np.multiply(img, 1 / 255.0)
img = img.transpose(2, 0, 1)
return (num, img)
class OCRIter(mx.io.DataIter):
def __init__(self, count, batch_size, height, width):
super(OCRIter, self).__init__()
self.captcha = ImageCaptcha(fonts=[font_name])
self.batch_size = batch_size
self.count = count
self.height = height
self.width = width
self.provide_data = [('data', (batch_size, 3, height, width))]
self.capt_max_num = 10
self.provide_label = [('softmax_label', (self.batch_size, self.capt_max_num))] # ori version
# self.provide_label = [('softmax1_label', (self.batch_size,)),
# ('softmax2_label', (self.batch_size,)),
# # ('softmax3_label', (self.batch_size,)),
# # ('softmax4_label', (self.batch_size,)),
# ]
def __iter__(self):
for k in range(self.count / self.batch_size):
data = []
label = [] # ori version
# label = [[], [], [], []]
for i in range(self.batch_size):
capt_num_for_1_example = np.random.randint(4, 10, size=1)
capt_num, img = gen_sample(self.captcha, self.width, self.height, capt_num_for_1_example)
data.append(img)
label.append(get_label(capt_num, self.capt_max_num))
# num = [int(x) for x in num]
# for i in range(4):
# label[i].append(num[i])
data_all = [mx.nd.array(data)]
label_all = [mx.nd.array(label)]
data_names = ['data']
label_names = ['softmax_label']
data_batch = OCRBatch(data_names, data_all, label_names, label_all) # ori version
# data_batch = mx.io.DataBatch(data=data_all, label=label_all)
yield data_batch
def reset(self):
pass
def get_ocrnet():
data = mx.symbol.Variable('data')
label = mx.symbol.Variable('softmax_label')
conv1 = mx.symbol.Convolution(data=data, kernel=(5, 5), num_filter=32)
pool1 = mx.symbol.Pooling(data=conv1, pool_type="max", kernel=(2, 2), stride=(1, 1))
relu1 = mx.symbol.Activation(data=pool1, act_type="relu")
conv2 = mx.symbol.Convolution(data=relu1, kernel=(5, 5), num_filter=32)
pool2 = mx.symbol.Pooling(data=conv2, pool_type="avg", kernel=(2, 2), stride=(1, 1))
relu2 = mx.symbol.Activation(data=pool2, act_type="relu")
conv3 = mx.symbol.Convolution(data=relu2, kernel=(3, 3), num_filter=32)
pool3 = mx.symbol.Pooling(data=conv3, pool_type="avg", kernel=(2, 2), stride=(1, 1))
relu3 = mx.symbol.Activation(data=pool3, act_type="relu")
conv4 = mx.symbol.Convolution(data=relu3, kernel=(3, 3), num_filter=32)
pool4 = mx.symbol.Pooling(data=conv4, pool_type="avg", kernel=(2, 2), stride=(1, 1))
relu4 = mx.symbol.Activation(data=pool4, act_type="relu")
flatten = mx.symbol.Flatten(data=relu4)
fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=256)
fc21 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
fc22 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
fc23 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
fc24 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
fc25 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
fc26 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
fc27 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
fc28 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
fc29 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
fc210 = mx.symbol.FullyConnected(data=fc1, num_hidden=10)
fc2 = mx.symbol.Concat(*[fc21, fc22, fc23, fc24, fc25, fc26, fc27, fc28, fc29, fc210], dim=0)
label = mx.symbol.transpose(data=label)
label = mx.symbol.Reshape(data=label, target_shape=(0,))
return mx.symbol.SoftmaxOutput(data=fc2, label=label, name="softmax")
def Accuracy(label, pred):
"""the old version, I just think the pos is calculated wrong~"""
label = label.T.reshape((-1,))
hit = 0
total = 0
for i in range(pred.shape[0] / capt_num):
ok = True
for j in range(capt_num):
k = i * capt_num + j
if np.argmax(pred[k]) != int(label[k]):
ok = False
break
if ok:
hit += 1
total += 1
return 1.0 * hit / total
def Accuracy2(label, pred):
"""new version made by me"""
hit = 0
total = 0
batch_size = pred.shape[0] / capt_num
for i in range(batch_size):
ok = True
for j in range(capt_num):
k = j * batch_size + i
if np.argmax(pred[k]) != int(label[i, j]):
ok = False
break
if ok:
hit += 1
total += 1
return 1.0 * hit / total
import argparse
def parse_args(description):
parser = argparse.ArgumentParser(description=description)
parser.add_argument('--batch_size', dest='batch_size', type=int, default=8)
parser.add_argument('--train_exp_num', dest='train_exp_num', type=int, default=2000)
parser.add_argument('--epoch_num', dest='epoch_num', type=int, default=50)
parser.add_argument('--gpu_num', dest='gpu_num', type=int, default=1)
parser.add_argument('--lr', dest='lr', type=float, default=0.00075)
args = parser.parse_args()
return args
if __name__ == '__main__':
import logging
head = '%(asctime)-15s %(message)s'
logging.basicConfig(level=logging.DEBUG, format=head)
args = parse_args('train 4 to 10 capt in 1 net')
network = get_ocrnet()
batch_size = args.batch_size # 50
train_exp_num = args.train_exp_num # 10000 # 50000
test_exp_num = 1000
epoch_num = args.epoch_num # 2000
gpu_num = args.gpu_num
lr = args.lr
devs = [mx.gpu(i) for i in range(gpu_num)]
# model = mx.model.FeedForward(ctx=devs,
# symbol=network,
# num_epoch=epoch_num,
# learning_rate=lr,
# wd=0.00001,
# initializer=mx.init.Xavier(factor_type="in", magnitude=2.34),
# momentum=0.9)
global capt_num
capt_num = 10
data_train = OCRIter(train_exp_num, batch_size, 30, 80)
data_test = OCRIter(test_exp_num, batch_size, 30, 80)
# model.fit(X=data_train,
# eval_data=data_test,
# eval_metric=[Accuracy, Accuracy2],
# batch_end_callback=mx.callback.Speedometer(batch_size, 50),
# epoch_end_callback=mx.callback.do_checkpoint(prefix='param', period=2))
# new version
lenet_model = mx.mod.Module(symbol=network, context=devs)
# train with the same
lenet_model.fit(data_train,
eval_data=data_test,
optimizer='sgd',
optimizer_params={'learning_rate': lr, 'momentum': 0.9, 'wd': 0.00001},
eval_metric=[Accuracy, Accuracy2],
initializer=mx.init.Xavier(factor_type="in", magnitude=2.34),
batch_end_callback=mx.callback.Speedometer(batch_size, 50),
epoch_end_callback=mx.callback.do_checkpoint(prefix='param', period=2),
num_epoch=epoch_num)
# model.save("cnn-ocr")
```
On my own host, win10, mx0.12, gpu:940M, I got near 110 samples/seconds with default params, but surprisingly, on p3.2xlarge, I got only 170 samples/seconds. In detail, with `watch -n 1 nvidia-smi`, I found the volatile GPU utile is always near 0%, up t0 4%. WHY??? Is that just because I got a custom DataIter?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services