You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2017/12/22 04:34:23 UTC
[GitHub] helloworldlxb commented on issue #9032: Why does a tanh activation layer generates values greater than 1?
helloworldlxb commented on issue #9032: Why does a tanh activation layer generates values greater than 1?
URL: https://github.com/apache/incubator-mxnet/issues/9032#issuecomment-353520009
@reminisce
```
class DetectionLoss(mx.operator.NumpyOp):
def __init__(self):
super(DetectionLoss, self).__init__(False)
def list_arguments(self):
return ['data', 'label']
def list_outputs(self):
return ['output']
def infer_shape(self, in_shape):
data_shape = in_shape[0]
label_shape = in_shape[1]
# output_shape = (1,)
output_shape = data_shape
return [data_shape, label_shape], [output_shape]
def forward(self, in_data, out_data):
data = in_data[0]
label = in_data[1]
loss = 0
batch_size = data.shape[0]
self.map = np.zeros((batch_size, 12, 39, 5), dtype=np.int)
self.max_iou = np.zeros((batch_size, 12, 39, 5))
self.typ = np.zeros((batch_size, 12, 39, 5))
for n in range(batch_size):
ious = np.zeros((12,39,5,25))
for i in range(25):
if label[n,i,0] == -1:
continue
left = label[n,i,1] / 32.0
right = min(label[n,i,3] / 32, 31.0)
top = label[n,i,2] / 32.0
bottom = min(label[n,i,4] / 32, 18.0)
for j in range(int(top), int(bottom)):
for k in range(int(left), int(right)):
for l in range(5):
tleft = (left - k) / 39
tright = (right - k) / 39
ttop = (top - j) / 12
tbottom = (bottom - j) / 12
bleft = data[n,l*8,j,k]
btop = data[n,l*8+1,j,k]
bright = data[n,l*8+2,j,k]
bbottom = data[n,l*8+3,j,k]
if bbottom <= btop or bright <= bleft or bleft >= tright or bright <= tleft or btop >= tbottom or bbottom <= ttop:
ious[j,k,l,i] = 0
continue
max_left = max(bleft, tleft)
min_right = min(bright, tright)
max_top = max(btop, ttop)
min_bottom = min(bbottom, tbottom)
intersection = max(0, (min_right - max_left) * (min_bottom - max_top))
union = max(0, (bright - bleft) * (bbottom - btop)) + (tright - tleft) * (tbottom - ttop) - intersection
ious[j,k,l,i] = intersection / union
max_ious = np.argmax(ious, axis = 3)
miou = np.zeros((12, 39, 5),dtype=np.float)
types = np.zeros((12, 39, 5), dtype=np.float)
for i in range(12):
for j in range(39):
for k in range(5):
m_iou = ious[i,j,k,max_ious[i,j,k]]
miou[i,j,k] = m_iou
if m_iou < 0.1:
max_ious[i,j,k] = -1
# loss += data[n,k*8+4,i,j] ** 2
# loss += np.sum(np.square(data[n,k*8+5:8*5+8,i,j]))
# loss += 0.01
else:
# loss += (m_iou - data[n, k*8+4, i, j]) ** 2
typ = int(label[n,max_ious[i,j,k],0])
# prob = data[n,k*8+5:k*8+8,i,j]
# prob[typ] = 1 - prob[typ]
types[i, j, k] = typ
# loss += np.sum(np.square(prob))
# gt = label[n,max_ious[i,j,k],1:5]
# bbox = data[n,k*8:k*8+4,i,j]
# loss += np.sum(np.square(gt - bbox))
self.map[n,:,:,:] = max_ious
self.max_iou[n,:,:,:] = miou
self.typ[n,:,:,:] = types
# out_data[0] = loss
out_data[:] = data[:]
def backward(self, out_grad, in_data, out_data, in_grad):
data = in_data[0]
label = in_data[1]
dx = in_grad[0]
batch_size = data.shape[0]
dx[:] = 0
for n in range(batch_size):
max_ious = self.map[n]
for i in range(12):
for j in range(39):
for k in range(5):
if self.map[n,i,j,k] == -1:
dx[n,k*8:k*8+4,i,j] = data[n,k*8:k*8+4,i,j]
dx[n,8*k+4,i,j] = 2 * data[n,k*8+4,i,j]
dx[n,8*k+5:8*k+8,i,j] = 2 * data[n,k*8+5:k*8+8,i,j]
else:
dx[n,k*8:k*8+4,i,j] = (data[n,k*8:k*8+4,i,j] - (label[n,max_ious[i,j,k],1:5] / 32 - np.array([j,i,j,i])) / np.array([39.0, 12.0, 39.0, 12.0]))
dx[n,k*8+4,i,j] = 2 * (data[n,k*8+4,i,j] - self.max_iou[n,i,j,k])
dx[n,k*8+5:k*8+8,i,j] = 2 * data[n,k*8+5:k*8+8,i,j]
t = int(self.typ[n,i,j,k])
dx[n,k * 8 + 5 + t,i,j] = 2 * (data[n,k * 8 + 5 + t,i,j] - 1)
class bboxAccMetric(mx.metric.EvalMetric):
def __init__(self):
super(bboxAccMetric, self).__init__('bboxAcc')
def update(self, labels, preds):
self.sum_metric += 1.0
self.num_inst += 1
img = mx.sym.Variable('img')
dif = mx.sym.Variable('dif')
lab = mx.sym.Variable('label')
img_conv1 = mx.sym.Convolution(img, name='img_conv1', kernel=(3,3,), pad=(1,1,), num_filter=64, )
img_conv1 = mx.sym.Activation(img_conv1, act_type='relu')
dif_conv1 = mx.sym.Convolution(dif, name='dif_conv1', kernel=(3,3,), pad=(1,1,), num_filter=64, )
dif_conv1 = mx.sym.Activation(dif_conv1, act_type='relu')
img_conv2 = mx.sym.Convolution(img_conv1, name='img_conv2', kernel=(3,3,), pad=(1,1,), num_filter=64, )
img_conv2 = mx.sym.Activation(img_conv2, act_type='relu')
dif_conv2 = mx.sym.Convolution(dif_conv1, name='dif_conv2', kernel=(3,3,), pad=(1,1,), num_filter=64, )
dif_conv2 = mx.sym.Activation(dif_conv2, act_type='relu')
img_pool1 = mx.sym.Pooling(img_conv2, name='img_pool1',kernel=(2,2), stride=(2,2), pool_type='max', pooling_convention='full')
dif_pool1 = mx.sym.Pooling(dif_conv2, name='dif_pool1',kernel=(2,2), stride=(2,2), pool_type='max', pooling_convention='full')
img_conv3 = mx.sym.Convolution(img_pool1, name='img_conv3', kernel=(3,3,), pad=(1,1,), num_filter=128, )
img_conv3 = mx.sym.Activation(img_conv3, act_type='relu')
dif_conv3 = mx.sym.Convolution(dif_pool1, name='dif_conv3', kernel=(3,3,), pad=(1,1,), num_filter=128, )
dif_conv3 = mx.sym.Activation(dif_conv3, act_type='relu')
img_conv4 = mx.sym.Convolution(img_conv3, name='img_conv4', kernel=(3,3,), pad=(1,1,), num_filter=128, )
img_conv4 = mx.sym.Activation(img_conv4, act_type='relu')
dif_conv4 = mx.sym.Convolution(dif_conv3, name='dif_conv4', kernel=(3,3,), pad=(1,1,), num_filter=128, )
dif_conv4 = mx.sym.Activation(dif_conv4, act_type='relu')
img_pool2 = mx.sym.Pooling(img_conv4, name='img_pool2',kernel=(2,2), stride=(2,2), pool_type='max', pooling_convention='full')
dif_pool2 = mx.sym.Pooling(dif_conv4, name='dif_pool2',kernel=(2,2), stride=(2,2), pool_type='max', pooling_convention='full')
img_conv5 = mx.sym.Convolution(img_pool2, name='img_conv5', kernel=(3,3,), pad=(1,1,), num_filter=256, )
img_conv5 = mx.sym.Activation(img_conv5, act_type='relu')
dif_conv5 = mx.sym.Convolution(dif_pool2, name='dif_conv5', kernel=(3,3,), pad=(1,1,), num_filter=256, )
dif_conv5 = mx.sym.Activation(dif_conv5, act_type='relu')
img_conv6 = mx.sym.Convolution(img_conv5, name='img_conv6', kernel=(3,3,), pad=(1,1,), num_filter=256, )
img_conv6 = mx.sym.Activation(img_conv6, act_type='relu')
dif_conv6 = mx.sym.Convolution(dif_conv5, name='dif_conv6', kernel=(3,3,), pad=(1,1,), num_filter=256, )
dif_conv6 = mx.sym.Activation(dif_conv6, act_type='relu')
img_conv7 = mx.sym.Convolution(img_conv6, name='img_conv7', kernel=(3,3,), pad=(1,1,), num_filter=256, )
img_conv7 = mx.sym.Activation(img_conv7, act_type='relu')
dif_conv7 = mx.sym.Convolution(dif_conv6, name='dif_conv7', kernel=(3,3,), pad=(1,1,), num_filter=256, )
dif_conv7 = mx.sym.Activation(dif_conv7, act_type='relu')
img_pool3 = mx.sym.Pooling(img_conv7, name='img_pool3', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
dif_pool3 = mx.sym.Pooling(dif_conv7, name='dif_pool3', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
img_conv8 = mx.sym.Convolution(img_pool3, name='img_conv8', kernel=(3,3,), pad=(1,1,), num_filter=512, )
img_conv8 = mx.sym.Activation(img_conv8, act_type='relu')
dif_conv8 = mx.sym.Convolution(dif_pool3, name='dif_conv8', kernel=(3,3,), pad=(1,1,), num_filter=512, )
dif_conv8 = mx.sym.Activation(dif_conv8, act_type='relu')
img_conv9 = mx.sym.Convolution(img_conv8, name='img_conv9', kernel=(3,3,), pad=(1,1,), num_filter=512, )
img_conv9 = mx.sym.Activation(img_conv9, act_type='relu')
dif_conv9 = mx.sym.Convolution(dif_conv8, name='dif_conv9', kernel=(3,3,), pad=(1,1,), num_filter=512, )
dif_conv9 = mx.sym.Activation(dif_conv9, act_type='relu')
img_conv10 = mx.sym.Convolution(img_conv9, name='img_conv10', kernel=(3,3,), pad=(1,1,), num_filter=512, )
img_conv10 = mx.sym.Activation(img_conv10, act_type='relu')
dif_conv10 = mx.sym.Convolution(dif_conv9, name='dif_conv10', kernel=(3,3,), pad=(1,1,), num_filter=512, )
dif_conv10 = mx.sym.Activation(dif_conv10, act_type='relu')
img_pool4 = mx.sym.Pooling(img_conv10, name='img_pool4', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
dif_pool4 = mx.sym.Pooling(dif_conv10, name='dif_pool4', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
img_conv11 = mx.sym.Convolution(img_pool4, name='img_conv11', kernel=(3,3,), pad=(1,1,), num_filter=512, )
img_conv11 = mx.sym.Activation(img_conv11, act_type='relu')
dif_conv11 = mx.sym.Convolution(dif_pool4, name='dif_conv11', kernel=(3,3,), pad=(1,1,), num_filter=512, )
dif_conv11 = mx.sym.Activation(dif_conv11, act_type='relu')
img_conv12 = mx.sym.Convolution(img_conv11, name='img_conv12', kernel=(3,3,), pad=(1,1,), num_filter=512, )
img_conv12 = mx.sym.Activation(img_conv12, act_type='relu')
dif_conv12 = mx.sym.Convolution(dif_conv11, name='dif_conv12', kernel=(3,3,), pad=(1,1,), num_filter=512, )
dif_conv12 = mx.sym.Activation(dif_conv12, act_type='relu')
img_conv13 = mx.sym.Convolution(img_conv12, name='img_conv13', kernel=(3,3,), pad=(1,1,), num_filter=512, )
img_conv13 = mx.sym.Activation(img_conv13, act_type='relu')
dif_conv13 = mx.sym.Convolution(dif_conv12, name='dif_conv13', kernel=(3,3,), pad=(1,1,), num_filter=512, )
dif_conv13 = mx.sym.Activation(dif_conv13, act_type='relu')
img_conv14 = mx.sym.Convolution(img_conv13, name='img_conv14', kernel=(1,1,), num_filter=512, )
img_conv14 = mx.sym.Activation(img_conv14, act_type='relu')
dif_conv14 = mx.sym.Convolution(dif_conv13, name='dif_conv14', kernel=(1,1,), num_filter=512, )
dif_conv14 = mx.sym.Activation(dif_conv14, act_type='relu')
img_conv15 = mx.sym.Convolution(img_conv14, name='img_conv15', kernel=(1,1,), num_filter=512, )
img_conv15 = mx.sym.Activation(img_conv15, act_type='relu')
dif_conv15 = mx.sym.Convolution(dif_conv14, name='dif_conv15', kernel=(1,1,), num_filter=512, )
dif_conv15 = mx.sym.Activation(dif_conv15, act_type='relu')
img_pool5 = mx.sym.Pooling(img_conv13, name='img_pool5', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
dif_pool5 = mx.sym.Pooling(dif_conv13, name='dif_pool5', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
# con = mx.sym.sum([img_pool5, dif_pool5], axis=0)
con = img_pool5 + dif_pool5
detect = mx.sym.Convolution(con, name='detect', kernel=(1,1,), num_filter=40)
acti = mx.sym.Activation(data=detect, act_type='tanh')
loss_layer = DetectionLoss()
loss = loss_layer(data = acti, label=lab, name='loss')
mod = mx.mod.Module(loss, data_names=('img','dif') , label_names=('label',), context=mx.gpu(1))
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
With regards,
Apache Git Services