You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2017/12/22 04:34:23 UTC

[GitHub] helloworldlxb commented on issue #9032: Why does a tanh activation layer generates values greater than 1?

helloworldlxb commented on issue #9032: Why does a tanh activation layer generates values greater than 1?
URL: https://github.com/apache/incubator-mxnet/issues/9032#issuecomment-353520009
 
 
   @reminisce 
   ```
   class DetectionLoss(mx.operator.NumpyOp):
       def __init__(self):
           super(DetectionLoss, self).__init__(False)
       
       def list_arguments(self):
           return ['data', 'label']
   
       def list_outputs(self):
           return ['output']
   
       def infer_shape(self, in_shape):
           data_shape = in_shape[0]
           label_shape = in_shape[1]
           # output_shape = (1,)
           output_shape = data_shape
           return [data_shape, label_shape], [output_shape]
   
       def forward(self, in_data, out_data):
           data = in_data[0]
           label = in_data[1]
           loss = 0
           batch_size = data.shape[0]
           self.map = np.zeros((batch_size, 12, 39, 5), dtype=np.int)
           self.max_iou = np.zeros((batch_size, 12, 39, 5))
           self.typ = np.zeros((batch_size, 12, 39, 5))
           for n in range(batch_size):
               ious = np.zeros((12,39,5,25))
               for i in range(25):
                   if label[n,i,0] == -1:
                       continue
                   left = label[n,i,1] / 32.0
                   right = min(label[n,i,3] / 32, 31.0)
                   top = label[n,i,2] / 32.0
                   bottom = min(label[n,i,4] / 32, 18.0)
                   for j in range(int(top), int(bottom)):
                       for k in range(int(left), int(right)):
                           for l in range(5):
                               tleft = (left - k) / 39
                               tright = (right - k) / 39
                               ttop = (top - j) / 12
                               tbottom = (bottom - j) / 12
                               bleft = data[n,l*8,j,k]
                               btop = data[n,l*8+1,j,k]
                               bright = data[n,l*8+2,j,k]
                               bbottom = data[n,l*8+3,j,k]
                               if bbottom <= btop or bright <= bleft or bleft >= tright or bright <= tleft or btop >= tbottom or bbottom <= ttop:
                                   ious[j,k,l,i] = 0
                                   continue
                               max_left = max(bleft, tleft)
                               min_right = min(bright, tright)
                               max_top = max(btop, ttop)
                               min_bottom = min(bbottom, tbottom)
                               intersection = max(0, (min_right - max_left) * (min_bottom - max_top))
                               union = max(0, (bright - bleft) * (bbottom - btop)) + (tright - tleft) * (tbottom - ttop) - intersection
                               ious[j,k,l,i] = intersection / union
               max_ious = np.argmax(ious, axis = 3)
               miou = np.zeros((12, 39, 5),dtype=np.float)
               types = np.zeros((12, 39, 5), dtype=np.float)
               for i in range(12):
                   for j in range(39):
                       for k in range(5):
                           m_iou =  ious[i,j,k,max_ious[i,j,k]]
                           miou[i,j,k] = m_iou
                           if m_iou < 0.1:
                               max_ious[i,j,k] = -1
                               # loss += data[n,k*8+4,i,j] ** 2
                               # loss += np.sum(np.square(data[n,k*8+5:8*5+8,i,j]))
                               # loss += 0.01
                           else:
                               # loss += (m_iou - data[n, k*8+4, i, j]) ** 2
                               typ = int(label[n,max_ious[i,j,k],0])
                               # prob = data[n,k*8+5:k*8+8,i,j]
                               # prob[typ] = 1 - prob[typ]
                               types[i, j, k] = typ
                               # loss += np.sum(np.square(prob))
                               # gt = label[n,max_ious[i,j,k],1:5]
                               # bbox = data[n,k*8:k*8+4,i,j]
                               # loss += np.sum(np.square(gt - bbox))
               self.map[n,:,:,:] = max_ious
               self.max_iou[n,:,:,:] = miou
               self.typ[n,:,:,:] = types
           # out_data[0] = loss
           out_data[:] = data[:]
   
       def backward(self, out_grad, in_data, out_data, in_grad):
           data = in_data[0]
           label = in_data[1]
           dx = in_grad[0]
           batch_size = data.shape[0]
           dx[:] = 0
           for n in range(batch_size):
               max_ious = self.map[n]
               for i in range(12):
                   for j in range(39):
                       for k in range(5):
                           if self.map[n,i,j,k] == -1:
                               dx[n,k*8:k*8+4,i,j] = data[n,k*8:k*8+4,i,j] 
                               dx[n,8*k+4,i,j] = 2 * data[n,k*8+4,i,j]
                               dx[n,8*k+5:8*k+8,i,j] = 2 * data[n,k*8+5:k*8+8,i,j]
                           else:
                               dx[n,k*8:k*8+4,i,j] = (data[n,k*8:k*8+4,i,j] - (label[n,max_ious[i,j,k],1:5] / 32 - np.array([j,i,j,i])) / np.array([39.0, 12.0, 39.0, 12.0]))
                               dx[n,k*8+4,i,j] = 2 * (data[n,k*8+4,i,j] - self.max_iou[n,i,j,k])
                               dx[n,k*8+5:k*8+8,i,j] = 2 * data[n,k*8+5:k*8+8,i,j]
                               t = int(self.typ[n,i,j,k])
                               dx[n,k * 8 + 5 + t,i,j] = 2 * (data[n,k * 8 + 5 + t,i,j] - 1)
   
   class bboxAccMetric(mx.metric.EvalMetric):
       def __init__(self):
           super(bboxAccMetric, self).__init__('bboxAcc')
       
       def update(self, labels, preds):
           self.sum_metric += 1.0
           self.num_inst += 1
   
   
   img = mx.sym.Variable('img')
   dif = mx.sym.Variable('dif')
   lab = mx.sym.Variable('label')
   img_conv1 = mx.sym.Convolution(img, name='img_conv1', kernel=(3,3,), pad=(1,1,), num_filter=64, )
   img_conv1 = mx.sym.Activation(img_conv1, act_type='relu')
   dif_conv1 = mx.sym.Convolution(dif, name='dif_conv1', kernel=(3,3,), pad=(1,1,), num_filter=64, )
   dif_conv1 = mx.sym.Activation(dif_conv1, act_type='relu')
   img_conv2 = mx.sym.Convolution(img_conv1, name='img_conv2', kernel=(3,3,), pad=(1,1,), num_filter=64, )
   img_conv2 = mx.sym.Activation(img_conv2, act_type='relu')
   dif_conv2 = mx.sym.Convolution(dif_conv1, name='dif_conv2', kernel=(3,3,), pad=(1,1,), num_filter=64, )
   dif_conv2 = mx.sym.Activation(dif_conv2, act_type='relu')
   img_pool1 = mx.sym.Pooling(img_conv2, name='img_pool1',kernel=(2,2), stride=(2,2), pool_type='max', pooling_convention='full')
   dif_pool1 = mx.sym.Pooling(dif_conv2, name='dif_pool1',kernel=(2,2), stride=(2,2), pool_type='max', pooling_convention='full')
   img_conv3 = mx.sym.Convolution(img_pool1, name='img_conv3', kernel=(3,3,), pad=(1,1,), num_filter=128, )
   img_conv3 = mx.sym.Activation(img_conv3, act_type='relu')
   dif_conv3 = mx.sym.Convolution(dif_pool1, name='dif_conv3', kernel=(3,3,), pad=(1,1,), num_filter=128, )
   dif_conv3 = mx.sym.Activation(dif_conv3, act_type='relu')
   img_conv4 = mx.sym.Convolution(img_conv3, name='img_conv4', kernel=(3,3,), pad=(1,1,), num_filter=128, )
   img_conv4 = mx.sym.Activation(img_conv4, act_type='relu')
   dif_conv4 = mx.sym.Convolution(dif_conv3, name='dif_conv4', kernel=(3,3,), pad=(1,1,), num_filter=128, )
   dif_conv4 = mx.sym.Activation(dif_conv4, act_type='relu')
   img_pool2 = mx.sym.Pooling(img_conv4, name='img_pool2',kernel=(2,2), stride=(2,2), pool_type='max', pooling_convention='full')
   dif_pool2 = mx.sym.Pooling(dif_conv4, name='dif_pool2',kernel=(2,2), stride=(2,2), pool_type='max', pooling_convention='full')
   img_conv5 = mx.sym.Convolution(img_pool2, name='img_conv5', kernel=(3,3,), pad=(1,1,), num_filter=256, )
   img_conv5 = mx.sym.Activation(img_conv5, act_type='relu')
   dif_conv5 = mx.sym.Convolution(dif_pool2, name='dif_conv5', kernel=(3,3,), pad=(1,1,), num_filter=256, )
   dif_conv5 = mx.sym.Activation(dif_conv5, act_type='relu')
   img_conv6 = mx.sym.Convolution(img_conv5, name='img_conv6', kernel=(3,3,), pad=(1,1,), num_filter=256, )
   img_conv6 = mx.sym.Activation(img_conv6, act_type='relu')
   dif_conv6 = mx.sym.Convolution(dif_conv5, name='dif_conv6', kernel=(3,3,), pad=(1,1,), num_filter=256, )
   dif_conv6 = mx.sym.Activation(dif_conv6, act_type='relu')
   img_conv7 = mx.sym.Convolution(img_conv6, name='img_conv7', kernel=(3,3,), pad=(1,1,), num_filter=256, )
   img_conv7 = mx.sym.Activation(img_conv7, act_type='relu')
   dif_conv7 = mx.sym.Convolution(dif_conv6, name='dif_conv7', kernel=(3,3,), pad=(1,1,), num_filter=256, )
   dif_conv7 = mx.sym.Activation(dif_conv7, act_type='relu')
   img_pool3 = mx.sym.Pooling(img_conv7, name='img_pool3', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
   dif_pool3 = mx.sym.Pooling(dif_conv7, name='dif_pool3', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
   img_conv8 = mx.sym.Convolution(img_pool3, name='img_conv8', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   img_conv8 = mx.sym.Activation(img_conv8, act_type='relu')
   dif_conv8 = mx.sym.Convolution(dif_pool3, name='dif_conv8', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   dif_conv8 = mx.sym.Activation(dif_conv8, act_type='relu')
   img_conv9 = mx.sym.Convolution(img_conv8, name='img_conv9', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   img_conv9 = mx.sym.Activation(img_conv9, act_type='relu')
   dif_conv9 = mx.sym.Convolution(dif_conv8, name='dif_conv9', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   dif_conv9 = mx.sym.Activation(dif_conv9, act_type='relu')
   img_conv10 = mx.sym.Convolution(img_conv9, name='img_conv10', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   img_conv10 = mx.sym.Activation(img_conv10, act_type='relu')
   dif_conv10 = mx.sym.Convolution(dif_conv9, name='dif_conv10', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   dif_conv10 = mx.sym.Activation(dif_conv10, act_type='relu')
   img_pool4 = mx.sym.Pooling(img_conv10, name='img_pool4', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
   dif_pool4 = mx.sym.Pooling(dif_conv10, name='dif_pool4', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
   img_conv11 = mx.sym.Convolution(img_pool4, name='img_conv11', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   img_conv11 = mx.sym.Activation(img_conv11, act_type='relu')
   dif_conv11 = mx.sym.Convolution(dif_pool4, name='dif_conv11', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   dif_conv11 = mx.sym.Activation(dif_conv11, act_type='relu')
   img_conv12 = mx.sym.Convolution(img_conv11, name='img_conv12', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   img_conv12 = mx.sym.Activation(img_conv12, act_type='relu')
   dif_conv12 = mx.sym.Convolution(dif_conv11, name='dif_conv12', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   dif_conv12 = mx.sym.Activation(dif_conv12, act_type='relu')
   img_conv13 = mx.sym.Convolution(img_conv12, name='img_conv13', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   img_conv13 = mx.sym.Activation(img_conv13, act_type='relu')
   dif_conv13 = mx.sym.Convolution(dif_conv12, name='dif_conv13', kernel=(3,3,), pad=(1,1,), num_filter=512, )
   dif_conv13 = mx.sym.Activation(dif_conv13, act_type='relu')
   img_conv14 = mx.sym.Convolution(img_conv13, name='img_conv14', kernel=(1,1,), num_filter=512, )
   img_conv14 = mx.sym.Activation(img_conv14, act_type='relu')
   dif_conv14 = mx.sym.Convolution(dif_conv13, name='dif_conv14', kernel=(1,1,), num_filter=512, )
   dif_conv14 = mx.sym.Activation(dif_conv14, act_type='relu')
   img_conv15 = mx.sym.Convolution(img_conv14, name='img_conv15', kernel=(1,1,), num_filter=512, )
   img_conv15 = mx.sym.Activation(img_conv15, act_type='relu')
   dif_conv15 = mx.sym.Convolution(dif_conv14, name='dif_conv15', kernel=(1,1,), num_filter=512, )
   dif_conv15 = mx.sym.Activation(dif_conv15, act_type='relu')
   img_pool5 = mx.sym.Pooling(img_conv13, name='img_pool5', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
   dif_pool5 = mx.sym.Pooling(dif_conv13, name='dif_pool5', kernel=(2,2,), stride=(2,2), pool_type='max', pooling_convention='full')
   # con = mx.sym.sum([img_pool5, dif_pool5], axis=0)
   con = img_pool5 + dif_pool5
   detect = mx.sym.Convolution(con, name='detect', kernel=(1,1,), num_filter=40)
   acti = mx.sym.Activation(data=detect, act_type='tanh')
   loss_layer = DetectionLoss()
   loss = loss_layer(data = acti, label=lab, name='loss')
   mod = mx.mod.Module(loss, data_names=('img','dif') , label_names=('label',), context=mx.gpu(1))
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services