You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by lx...@apache.org on 2017/07/07 15:58:15 UTC
[02/50] [abbrv] incubator-mxnet-test git commit: Better ssd (#6827)
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/legacy_vgg16_ssd_512.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/legacy_vgg16_ssd_512.py b/example/ssd/symbol/legacy_vgg16_ssd_512.py
new file mode 100644
index 0000000..15424f9
--- /dev/null
+++ b/example/ssd/symbol/legacy_vgg16_ssd_512.py
@@ -0,0 +1,194 @@
+import mxnet as mx
+from common import legacy_conv_act_layer
+from common import multibox_layer
+
+def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
+ """
+ Single-shot multi-box detection with VGG 16 layers ConvNet
+ This is a modified version, with fc6/fc7 layers replaced by conv layers
+ And the network is slightly smaller than original VGG 16 network
+ This is a training network with losses
+
+ Parameters:
+ ----------
+ num_classes: int
+ number of object classes not including background
+ nms_thresh : float
+ non-maximum suppression threshold
+ force_suppress : boolean
+ whether suppress different class objects
+ nms_topk : int
+ apply NMS to top K detections
+
+ Returns:
+ ----------
+ mx.Symbol
+ """
+ data = mx.symbol.Variable(name="data")
+ label = mx.symbol.Variable(name="label")
+
+ # group 1
+ conv1_1 = mx.symbol.Convolution(
+ data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
+ relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
+ conv1_2 = mx.symbol.Convolution(
+ data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
+ relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
+ pool1 = mx.symbol.Pooling(
+ data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
+ # group 2
+ conv2_1 = mx.symbol.Convolution(
+ data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
+ relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
+ conv2_2 = mx.symbol.Convolution(
+ data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
+ relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
+ pool2 = mx.symbol.Pooling(
+ data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
+ # group 3
+ conv3_1 = mx.symbol.Convolution(
+ data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
+ relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
+ conv3_2 = mx.symbol.Convolution(
+ data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
+ relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
+ conv3_3 = mx.symbol.Convolution(
+ data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
+ relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
+ pool3 = mx.symbol.Pooling(
+ data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
+ pooling_convention="full", name="pool3")
+ # group 4
+ conv4_1 = mx.symbol.Convolution(
+ data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
+ relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
+ conv4_2 = mx.symbol.Convolution(
+ data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
+ relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
+ conv4_3 = mx.symbol.Convolution(
+ data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
+ relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
+ pool4 = mx.symbol.Pooling(
+ data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
+ # group 5
+ conv5_1 = mx.symbol.Convolution(
+ data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
+ relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
+ conv5_2 = mx.symbol.Convolution(
+ data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
+ relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
+ conv5_3 = mx.symbol.Convolution(
+ data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
+ relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
+ pool5 = mx.symbol.Pooling(
+ data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
+ pad=(1,1), name="pool5")
+ # group 6
+ conv6 = mx.symbol.Convolution(
+ data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
+ num_filter=1024, name="conv6")
+ relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
+ # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
+ # group 7
+ conv7 = mx.symbol.Convolution(
+ data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
+ relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
+ # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
+
+ ### ssd extra layers ###
+ conv8_1, relu8_1 = legacy_conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
+ stride=(1,1), act_type="relu", use_batchnorm=False)
+ conv8_2, relu8_2 = legacy_conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
+ stride=(2,2), act_type="relu", use_batchnorm=False)
+ conv9_1, relu9_1 = legacy_conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
+ stride=(1,1), act_type="relu", use_batchnorm=False)
+ conv9_2, relu9_2 = legacy_conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
+ stride=(2,2), act_type="relu", use_batchnorm=False)
+ conv10_1, relu10_1 = legacy_conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
+ stride=(1,1), act_type="relu", use_batchnorm=False)
+ conv10_2, relu10_2 = legacy_conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \
+ stride=(2,2), act_type="relu", use_batchnorm=False)
+ conv11_1, relu11_1 = legacy_conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \
+ stride=(1,1), act_type="relu", use_batchnorm=False)
+ conv11_2, relu11_2 = legacy_conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(1,1), \
+ stride=(2,2), act_type="relu", use_batchnorm=False)
+ conv12_1, relu12_1 = legacy_conv_act_layer(relu11_2, "12_1", 128, kernel=(1,1), pad=(0,0), \
+ stride=(1,1), act_type="relu", use_batchnorm=False)
+ conv12_2, relu12_2 = legacy_conv_act_layer(relu12_1, "12_2", 256, kernel=(4,4), pad=(1,1), \
+ stride=(1,1), act_type="relu", use_batchnorm=False)
+
+ # specific parameters for VGG16 network
+ from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2, relu12_2]
+ sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \
+ [.75, .8216], [.9, .9721]]
+ ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+ [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
+ normalizations = [20, -1, -1, -1, -1, -1, -1]
+ steps = [ x / 512.0 for x in [8, 16, 32, 64, 128, 256, 512]]
+ num_channels = [512]
+
+ loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
+ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
+ num_channels=num_channels, clip=False, interm_layer=0, steps=steps)
+
+ tmp = mx.contrib.symbol.MultiBoxTarget(
+ *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
+ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
+ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
+ name="multibox_target")
+ loc_target = tmp[0]
+ loc_target_mask = tmp[1]
+ cls_target = tmp[2]
+
+ cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
+ ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
+ normalization='valid', name="cls_prob")
+ loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
+ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
+ loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
+ normalization='valid', name="loc_loss")
+
+ # monitoring training status
+ cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
+ det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
+ variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
+ det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
+
+ # group output
+ out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
+ return out
+
+def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
+ """
+ Single-shot multi-box detection with VGG 16 layers ConvNet
+ This is a modified version, with fc6/fc7 layers replaced by conv layers
+ And the network is slightly smaller than original VGG 16 network
+ This is the detection network
+
+ Parameters:
+ ----------
+ num_classes: int
+ number of object classes not including background
+ nms_thresh : float
+ threshold of overlap for non-maximum suppression
+ force_suppress : boolean
+ whether suppress different class objects
+ nms_topk : int
+ apply NMS to top K detections
+
+ Returns:
+ ----------
+ mx.Symbol
+ """
+ net = get_symbol_train(num_classes)
+ cls_preds = net.get_internals()["multibox_cls_pred_output"]
+ loc_preds = net.get_internals()["multibox_loc_pred_output"]
+ anchor_boxes = net.get_internals()["multibox_anchors_output"]
+
+ cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
+ name='cls_prob')
+ out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
+ variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
+ return out
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/resnet.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/resnet.py b/example/ssd/symbol/resnet.py
new file mode 100644
index 0000000..9c121e8
--- /dev/null
+++ b/example/ssd/symbol/resnet.py
@@ -0,0 +1,169 @@
+'''
+Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
+Original author Wei Wu
+
+Implemented the following paper:
+
+Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks"
+'''
+import mxnet as mx
+
+def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False):
+ """Return ResNet Unit symbol for building ResNet
+ Parameters
+ ----------
+ data : str
+ Input data
+ num_filter : int
+ Number of output channels
+ bnf : int
+ Bottle neck channels factor with regard to num_filter
+ stride : tupe
+ Stride used in convolution
+ dim_match : Boolen
+ True means channel number between input and output is the same, otherwise means differ
+ name : str
+ Base name of the operators
+ workspace : int
+ Workspace used in convolution operator
+ """
+ if bottle_neck:
+ # the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
+ bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
+ act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
+ conv1 = mx.sym.Convolution(data=act1, num_filter=int(num_filter*0.25), kernel=(1,1), stride=(1,1), pad=(0,0),
+ no_bias=True, workspace=workspace, name=name + '_conv1')
+ bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
+ act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
+ conv2 = mx.sym.Convolution(data=act2, num_filter=int(num_filter*0.25), kernel=(3,3), stride=stride, pad=(1,1),
+ no_bias=True, workspace=workspace, name=name + '_conv2')
+ bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
+ act3 = mx.sym.Activation(data=bn3, act_type='relu', name=name + '_relu3')
+ conv3 = mx.sym.Convolution(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
+ workspace=workspace, name=name + '_conv3')
+ if dim_match:
+ shortcut = data
+ else:
+ shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
+ workspace=workspace, name=name+'_sc')
+ if memonger:
+ shortcut._set_attr(mirror_stage='True')
+ return conv3 + shortcut
+ else:
+ bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1')
+ act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
+ conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3,3), stride=stride, pad=(1,1),
+ no_bias=True, workspace=workspace, name=name + '_conv1')
+ bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
+ act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
+ conv2 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
+ no_bias=True, workspace=workspace, name=name + '_conv2')
+ if dim_match:
+ shortcut = data
+ else:
+ shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
+ workspace=workspace, name=name+'_sc')
+ if memonger:
+ shortcut._set_attr(mirror_stage='True')
+ return conv2 + shortcut
+
+def resnet(units, num_stages, filter_list, num_classes, image_shape, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False):
+ """Return ResNet symbol of
+ Parameters
+ ----------
+ units : list
+ Number of units in each stage
+ num_stages : int
+ Number of stage
+ filter_list : list
+ Channel size of each stage
+ num_classes : int
+ Ouput size of symbol
+ dataset : str
+ Dataset type, only cifar10 and imagenet supports
+ workspace : int
+ Workspace used in convolution operator
+ """
+ num_unit = len(units)
+ assert(num_unit == num_stages)
+ data = mx.sym.Variable(name='data')
+ data = mx.sym.identity(data=data, name='id')
+ data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
+ (nchannel, height, width) = image_shape
+ if height <= 32: # such as cifar10
+ body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(3, 3), stride=(1,1), pad=(1, 1),
+ no_bias=True, name="conv0", workspace=workspace)
+ else: # often expected to be 224 such as imagenet
+ body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3),
+ no_bias=True, name="conv0", workspace=workspace)
+ body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
+ body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
+ body = mx.symbol.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
+
+ for i in range(num_stages):
+ body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False,
+ name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace,
+ memonger=memonger)
+ for j in range(units[i]-1):
+ body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2),
+ bottle_neck=bottle_neck, workspace=workspace, memonger=memonger)
+ bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
+ relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1')
+ # Although kernel is not used here when global_pool=True, we should put one
+ pool1 = mx.symbol.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
+ flat = mx.symbol.Flatten(data=pool1)
+ fc1 = mx.symbol.FullyConnected(data=flat, num_hidden=num_classes, name='fc1')
+ return mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
+
+def get_symbol(num_classes, num_layers, image_shape, conv_workspace=256, **kwargs):
+ """
+ Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
+ Original author Wei Wu
+ """
+ image_shape = [int(l) for l in image_shape.split(',')]
+ (nchannel, height, width) = image_shape
+ if height <= 28:
+ num_stages = 3
+ if (num_layers-2) % 9 == 0 and num_layers >= 164:
+ per_unit = [(num_layers-2)//9]
+ filter_list = [16, 64, 128, 256]
+ bottle_neck = True
+ elif (num_layers-2) % 6 == 0 and num_layers < 164:
+ per_unit = [(num_layers-2)//6]
+ filter_list = [16, 16, 32, 64]
+ bottle_neck = False
+ else:
+ raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
+ units = per_unit * num_stages
+ else:
+ if num_layers >= 50:
+ filter_list = [64, 256, 512, 1024, 2048]
+ bottle_neck = True
+ else:
+ filter_list = [64, 64, 128, 256, 512]
+ bottle_neck = False
+ num_stages = 4
+ if num_layers == 18:
+ units = [2, 2, 2, 2]
+ elif num_layers == 34:
+ units = [3, 4, 6, 3]
+ elif num_layers == 50:
+ units = [3, 4, 6, 3]
+ elif num_layers == 101:
+ units = [3, 4, 23, 3]
+ elif num_layers == 152:
+ units = [3, 8, 36, 3]
+ elif num_layers == 200:
+ units = [3, 24, 36, 3]
+ elif num_layers == 269:
+ units = [3, 30, 48, 8]
+ else:
+ raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
+
+ return resnet(units = units,
+ num_stages = num_stages,
+ filter_list = filter_list,
+ num_classes = num_classes,
+ image_shape = image_shape,
+ bottle_neck = bottle_neck,
+ workspace = conv_workspace)
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/symbol_builder.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/symbol_builder.py b/example/ssd/symbol/symbol_builder.py
new file mode 100644
index 0000000..48c9d1d
--- /dev/null
+++ b/example/ssd/symbol/symbol_builder.py
@@ -0,0 +1,166 @@
+import mxnet as mx
+from common import multi_layer_feature, multibox_layer
+
+
+def import_module(module_name):
+ """Helper function to import module"""
+ import sys, os
+ import importlib
+ sys.path.append(os.path.dirname(__file__))
+ return importlib.import_module(module_name)
+
+def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pads,
+ sizes, ratios, normalizations=-1, steps=[], min_filter=128,
+ nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
+ """Build network symbol for training SSD
+
+ Parameters
+ ----------
+ network : str
+ base network symbol name
+ num_classes : int
+ number of object classes not including background
+ from_layers : list of str
+ feature extraction layers, use '' for add extra layers
+ For example:
+ from_layers = ['relu4_3', 'fc7', '', '', '', '']
+ which means extract feature from relu4_3 and fc7, adding 4 extra layers
+ on top of fc7
+ num_filters : list of int
+ number of filters for extra layers, you can use -1 for extracted features,
+ however, if normalization and scale is applied, the number of filter for
+ that layer must be provided.
+ For example:
+ num_filters = [512, -1, 512, 256, 256, 256]
+ strides : list of int
+ strides for the 3x3 convolution appended, -1 can be used for extracted
+ feature layers
+ pads : list of int
+ paddings for the 3x3 convolution, -1 can be used for extracted layers
+ sizes : list or list of list
+ [min_size, max_size] for all layers or [[], [], []...] for specific layers
+ ratios : list or list of list
+ [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
+ normalizations : int or list of int
+ use normalizations value for all layers or [...] for specific layers,
+ -1 indicate no normalizations and scales
+ steps : list
+ specify steps for each MultiBoxPrior layer, leave empty, it will calculate
+ according to layer dimensions
+ min_filter : int
+ minimum number of filters used in 1x1 convolution
+ nms_thresh : float
+ non-maximum suppression threshold
+ force_suppress : boolean
+ whether suppress different class objects
+ nms_topk : int
+ apply NMS to top K detections
+
+ Returns
+ -------
+ mx.Symbol
+
+ """
+ label = mx.sym.Variable('label')
+ body = import_module(network).get_symbol(num_classes, **kwargs)
+ layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
+ min_filter=min_filter)
+
+ loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
+ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
+ num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
+
+ tmp = mx.contrib.symbol.MultiBoxTarget(
+ *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
+ ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
+ negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
+ name="multibox_target")
+ loc_target = tmp[0]
+ loc_target_mask = tmp[1]
+ cls_target = tmp[2]
+
+ cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
+ ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
+ normalization='valid', name="cls_prob")
+ loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
+ data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
+ loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
+ normalization='valid', name="loc_loss")
+
+ # monitoring training status
+ cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
+ det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
+ variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
+ det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
+
+ # group output
+ out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
+ return out
+
+def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios,
+ strides, pads, normalizations=-1, steps=[], min_filter=128,
+ nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
+ """Build network for testing SSD
+
+ Parameters
+ ----------
+ network : str
+ base network symbol name
+ num_classes : int
+ number of object classes not including background
+ from_layers : list of str
+ feature extraction layers, use '' for add extra layers
+ For example:
+ from_layers = ['relu4_3', 'fc7', '', '', '', '']
+ which means extract feature from relu4_3 and fc7, adding 4 extra layers
+ on top of fc7
+ num_filters : list of int
+ number of filters for extra layers, you can use -1 for extracted features,
+ however, if normalization and scale is applied, the number of filter for
+ that layer must be provided.
+ For example:
+ num_filters = [512, -1, 512, 256, 256, 256]
+ strides : list of int
+ strides for the 3x3 convolution appended, -1 can be used for extracted
+ feature layers
+ pads : list of int
+ paddings for the 3x3 convolution, -1 can be used for extracted layers
+ sizes : list or list of list
+ [min_size, max_size] for all layers or [[], [], []...] for specific layers
+ ratios : list or list of list
+ [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
+ normalizations : int or list of int
+ use normalizations value for all layers or [...] for specific layers,
+ -1 indicate no normalizations and scales
+ steps : list
+ specify steps for each MultiBoxPrior layer, leave empty, it will calculate
+ according to layer dimensions
+ min_filter : int
+ minimum number of filters used in 1x1 convolution
+ nms_thresh : float
+ non-maximum suppression threshold
+ force_suppress : boolean
+ whether suppress different class objects
+ nms_topk : int
+ apply NMS to top K detections
+
+ Returns
+ -------
+ mx.Symbol
+
+ """
+ body = import_module(network).get_symbol(num_classes, **kwargs)
+ layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
+ min_filter=min_filter)
+
+ loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
+ num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
+ num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
+
+ cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
+ name='cls_prob')
+ out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+ name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
+ variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
+ return out
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/symbol_factory.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/symbol_factory.py b/example/ssd/symbol/symbol_factory.py
new file mode 100644
index 0000000..ef156f1
--- /dev/null
+++ b/example/ssd/symbol/symbol_factory.py
@@ -0,0 +1,122 @@
+"""Presets for various network configurations"""
+import logging
+import symbol_builder
+
+def get_config(network, data_shape, **kwargs):
+ """Configuration factory for various networks
+
+ Parameters
+ ----------
+ network : str
+ base network name, such as vgg_reduced, inceptionv3, resnet...
+ data_shape : int
+ input data dimension
+ kwargs : dict
+ extra arguments
+ """
+ if network == 'vgg16_reduced':
+ if data_shape >= 448:
+ from_layers = ['relu4_3', 'relu7', '', '', '', '', '']
+ num_filters = [512, -1, 512, 256, 256, 256, 256]
+ strides = [-1, -1, 2, 2, 2, 2, 1]
+ pads = [-1, -1, 1, 1, 1, 1, 1]
+ sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \
+ [.75, .8216], [.9, .9721]]
+ ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+ [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
+ normalizations = [20, -1, -1, -1, -1, -1, -1]
+ steps = [] if data_shape != 512 else [x / 512.0 for x in
+ [8, 16, 32, 64, 128, 256, 512]]
+ else:
+ from_layers = ['relu4_3', 'relu7', '', '', '', '']
+ num_filters = [512, -1, 512, 256, 256, 256]
+ strides = [-1, -1, 2, 2, 1, 1]
+ pads = [-1, -1, 1, 1, 0, 0]
+ sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
+ ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+ [1,2,.5], [1,2,.5]]
+ normalizations = [20, -1, -1, -1, -1, -1]
+ steps = [] if data_shape != 300 else [x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
+ if not (data_shape == 300 or data_shape == 512):
+ logging.warn('data_shape %d was not tested, use with caucious.' % data_shape)
+ return locals()
+ elif network == 'inceptionv3':
+ from_layers = ['ch_concat_mixed_7_chconcat', 'ch_concat_mixed_10_chconcat', '', '', '', '']
+ num_filters = [-1, -1, 512, 256, 256, 128]
+ strides = [-1, -1, 2, 2, 2, 2]
+ pads = [-1, -1, 1, 1, 1, 1]
+ sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
+ ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+ [1,2,.5], [1,2,.5]]
+ normalizations = -1
+ steps = []
+ return locals()
+ elif network == 'resnet50':
+ num_layers = 50
+ image_shape = '3,224,224' # resnet require it as shape check
+ network = 'resnet'
+ from_layers = ['_plus12', '_plus15', '', '', '', '']
+ num_filters = [-1, -1, 512, 256, 256, 128]
+ strides = [-1, -1, 2, 2, 2, 2]
+ pads = [-1, -1, 1, 1, 1, 1]
+ sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
+ ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+ [1,2,.5], [1,2,.5]]
+ normalizations = -1
+ steps = []
+ return locals()
+ elif network == 'resnet101':
+ num_layers = 101
+ image_shape = '3,224,224'
+ network = 'resnet'
+ from_layers = ['_plus12', '_plus15', '', '', '', '']
+ num_filters = [-1, -1, 512, 256, 256, 128]
+ strides = [-1, -1, 2, 2, 2, 2]
+ pads = [-1, -1, 1, 1, 1, 1]
+ sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
+ ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+ [1,2,.5], [1,2,.5]]
+ normalizations = -1
+ steps = []
+ return locals()
+ else:
+ msg = 'No configuration found for %s with data_shape %d' % (network, data_shape)
+ raise NotImplementedError(msg)
+
+def get_symbol_train(network, data_shape, **kwargs):
+ """Wrapper for get symbol for train
+
+ Parameters
+ ----------
+ network : str
+ name for the base network symbol
+ data_shape : int
+ input shape
+ kwargs : dict
+ see symbol_builder.get_symbol_train for more details
+ """
+ if network.startswith('legacy'):
+ logging.warn('Using legacy model.')
+ return symbol_builder.import_module(network).get_symbol_train(**kwargs)
+ config = get_config(network, data_shape, **kwargs).copy()
+ config.update(kwargs)
+ return symbol_builder.get_symbol_train(**config)
+
+def get_symbol(network, data_shape, **kwargs):
+ """Wrapper for get symbol for test
+
+ Parameters
+ ----------
+ network : str
+ name for the base network symbol
+ data_shape : int
+ input shape
+ kwargs : dict
+ see symbol_builder.get_symbol for more details
+ """
+ if network.startswith('legacy'):
+ logging.warn('Using legacy model.')
+ return symbol_builder.import_module(network).get_symbol(**kwargs)
+ config = get_config(network, data_shape, **kwargs).copy()
+ config.update(kwargs)
+ return symbol_builder.get_symbol(**config)
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/symbol_vgg16_ssd_300.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/symbol_vgg16_ssd_300.py b/example/ssd/symbol/symbol_vgg16_ssd_300.py
deleted file mode 100644
index e406746..0000000
--- a/example/ssd/symbol/symbol_vgg16_ssd_300.py
+++ /dev/null
@@ -1,189 +0,0 @@
-import mxnet as mx
-from common import conv_act_layer
-from common import multibox_layer
-
-def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
- """
- Single-shot multi-box detection with VGG 16 layers ConvNet
- This is a modified version, with fc6/fc7 layers replaced by conv layers
- And the network is slightly smaller than original VGG 16 network
- This is a training network with losses
-
- Parameters:
- ----------
- num_classes: int
- number of object classes not including background
- nms_thresh : float
- non-maximum suppression threshold
- force_suppress : boolean
- whether suppress different class objects
- nms_topk : int
- apply NMS to top K detections
-
- Returns:
- ----------
- mx.Symbol
- """
- data = mx.symbol.Variable(name="data")
- label = mx.symbol.Variable(name="label")
-
- # group 1
- conv1_1 = mx.symbol.Convolution(
- data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
- relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
- conv1_2 = mx.symbol.Convolution(
- data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
- relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
- pool1 = mx.symbol.Pooling(
- data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
- # group 2
- conv2_1 = mx.symbol.Convolution(
- data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
- relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
- conv2_2 = mx.symbol.Convolution(
- data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
- relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
- pool2 = mx.symbol.Pooling(
- data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
- # group 3
- conv3_1 = mx.symbol.Convolution(
- data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
- relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
- conv3_2 = mx.symbol.Convolution(
- data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
- relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
- conv3_3 = mx.symbol.Convolution(
- data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
- relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
- pool3 = mx.symbol.Pooling(
- data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
- pooling_convention="full", name="pool3")
- # group 4
- conv4_1 = mx.symbol.Convolution(
- data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
- relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
- conv4_2 = mx.symbol.Convolution(
- data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
- relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
- conv4_3 = mx.symbol.Convolution(
- data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
- relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
- pool4 = mx.symbol.Pooling(
- data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
- # group 5
- conv5_1 = mx.symbol.Convolution(
- data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
- relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
- conv5_2 = mx.symbol.Convolution(
- data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
- relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
- conv5_3 = mx.symbol.Convolution(
- data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
- relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
- pool5 = mx.symbol.Pooling(
- data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
- pad=(1,1), name="pool5")
- # group 6
- conv6 = mx.symbol.Convolution(
- data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
- num_filter=1024, name="conv6")
- relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
- # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
- # group 7
- conv7 = mx.symbol.Convolution(
- data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
- relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
- # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
-
- ### ssd extra layers ###
- conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
- conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
- stride=(2,2), act_type="relu", use_batchnorm=False)
- conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
- conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
- stride=(2,2), act_type="relu", use_batchnorm=False)
- conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
- conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(0,0), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
- conv11_1, relu11_1 = conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
- conv11_2, relu11_2 = conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(0,0), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
-
- # specific parameters for VGG16 network
- from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2]
- sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
- ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
- [1,2,.5], [1,2,.5]]
- normalizations = [20, -1, -1, -1, -1, -1]
- steps = [ x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
- num_channels = [512]
-
- loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
- num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
- num_channels=num_channels, clip=False, interm_layer=0, steps=steps)
-
- tmp = mx.contrib.symbol.MultiBoxTarget(
- *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
- ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
- negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
- name="multibox_target")
- loc_target = tmp[0]
- loc_target_mask = tmp[1]
- cls_target = tmp[2]
-
- cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
- ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
- normalization='valid', name="cls_prob")
- loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
- data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
- loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
- normalization='valid', name="loc_loss")
-
- # monitoring training status
- cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
- det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
- name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
- variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
- det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
-
- # group output
- out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
- return out
-
-def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
- """
- Single-shot multi-box detection with VGG 16 layers ConvNet
- This is a modified version, with fc6/fc7 layers replaced by conv layers
- And the network is slightly smaller than original VGG 16 network
- This is the detection network
-
- Parameters:
- ----------
- num_classes: int
- number of object classes not including background
- nms_thresh : float
- threshold of overlap for non-maximum suppression
- force_suppress : boolean
- whether suppress different class objects
- nms_topk : int
- apply NMS to top K detections
-
- Returns:
- ----------
- mx.Symbol
- """
- net = get_symbol_train(num_classes)
- cls_preds = net.get_internals()["multibox_cls_pred_output"]
- loc_preds = net.get_internals()["multibox_loc_pred_output"]
- anchor_boxes = net.get_internals()["multibox_anchors_output"]
-
- cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
- name='cls_prob')
- out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
- name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
- variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
- return out
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/symbol_vgg16_ssd_512.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/symbol_vgg16_ssd_512.py b/example/ssd/symbol/symbol_vgg16_ssd_512.py
deleted file mode 100644
index 1cc243b..0000000
--- a/example/ssd/symbol/symbol_vgg16_ssd_512.py
+++ /dev/null
@@ -1,194 +0,0 @@
-import mxnet as mx
-from common import conv_act_layer
-from common import multibox_layer
-
-def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
- """
- Single-shot multi-box detection with VGG 16 layers ConvNet
- This is a modified version, with fc6/fc7 layers replaced by conv layers
- And the network is slightly smaller than original VGG 16 network
- This is a training network with losses
-
- Parameters:
- ----------
- num_classes: int
- number of object classes not including background
- nms_thresh : float
- non-maximum suppression threshold
- force_suppress : boolean
- whether suppress different class objects
- nms_topk : int
- apply NMS to top K detections
-
- Returns:
- ----------
- mx.Symbol
- """
- data = mx.symbol.Variable(name="data")
- label = mx.symbol.Variable(name="label")
-
- # group 1
- conv1_1 = mx.symbol.Convolution(
- data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
- relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
- conv1_2 = mx.symbol.Convolution(
- data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
- relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
- pool1 = mx.symbol.Pooling(
- data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
- # group 2
- conv2_1 = mx.symbol.Convolution(
- data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
- relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
- conv2_2 = mx.symbol.Convolution(
- data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
- relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
- pool2 = mx.symbol.Pooling(
- data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
- # group 3
- conv3_1 = mx.symbol.Convolution(
- data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
- relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
- conv3_2 = mx.symbol.Convolution(
- data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
- relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
- conv3_3 = mx.symbol.Convolution(
- data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
- relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
- pool3 = mx.symbol.Pooling(
- data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
- pooling_convention="full", name="pool3")
- # group 4
- conv4_1 = mx.symbol.Convolution(
- data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
- relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
- conv4_2 = mx.symbol.Convolution(
- data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
- relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
- conv4_3 = mx.symbol.Convolution(
- data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
- relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
- pool4 = mx.symbol.Pooling(
- data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
- # group 5
- conv5_1 = mx.symbol.Convolution(
- data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
- relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
- conv5_2 = mx.symbol.Convolution(
- data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
- relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
- conv5_3 = mx.symbol.Convolution(
- data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
- relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
- pool5 = mx.symbol.Pooling(
- data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
- pad=(1,1), name="pool5")
- # group 6
- conv6 = mx.symbol.Convolution(
- data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
- num_filter=1024, name="conv6")
- relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
- # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
- # group 7
- conv7 = mx.symbol.Convolution(
- data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
- relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
- # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
-
- ### ssd extra layers ###
- conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
- conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
- stride=(2,2), act_type="relu", use_batchnorm=False)
- conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
- conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
- stride=(2,2), act_type="relu", use_batchnorm=False)
- conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
- conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \
- stride=(2,2), act_type="relu", use_batchnorm=False)
- conv11_1, relu11_1 = conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
- conv11_2, relu11_2 = conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(1,1), \
- stride=(2,2), act_type="relu", use_batchnorm=False)
- conv12_1, relu12_1 = conv_act_layer(relu11_2, "12_1", 128, kernel=(1,1), pad=(0,0), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
- conv12_2, relu12_2 = conv_act_layer(relu12_1, "12_2", 256, kernel=(4,4), pad=(1,1), \
- stride=(1,1), act_type="relu", use_batchnorm=False)
-
- # specific parameters for VGG16 network
- from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2, relu12_2]
- sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \
- [.75, .8216], [.9, .9721]]
- ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
- [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
- normalizations = [20, -1, -1, -1, -1, -1, -1]
- steps = [ x / 512.0 for x in [8, 16, 32, 64, 128, 256, 512]]
- num_channels = [512]
-
- loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
- num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
- num_channels=num_channels, clip=False, interm_layer=0, steps=steps)
-
- tmp = mx.contrib.symbol.MultiBoxTarget(
- *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
- ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
- negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
- name="multibox_target")
- loc_target = tmp[0]
- loc_target_mask = tmp[1]
- cls_target = tmp[2]
-
- cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
- ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
- normalization='valid', name="cls_prob")
- loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
- data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
- loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
- normalization='valid', name="loc_loss")
-
- # monitoring training status
- cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
- det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
- name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
- variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
- det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
-
- # group output
- out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
- return out
-
-def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
- """
- Single-shot multi-box detection with VGG 16 layers ConvNet
- This is a modified version, with fc6/fc7 layers replaced by conv layers
- And the network is slightly smaller than original VGG 16 network
- This is the detection network
-
- Parameters:
- ----------
- num_classes: int
- number of object classes not including background
- nms_thresh : float
- threshold of overlap for non-maximum suppression
- force_suppress : boolean
- whether suppress different class objects
- nms_topk : int
- apply NMS to top K detections
-
- Returns:
- ----------
- mx.Symbol
- """
- net = get_symbol_train(num_classes)
- cls_preds = net.get_internals()["multibox_cls_pred_output"]
- loc_preds = net.get_internals()["multibox_loc_pred_output"]
- anchor_boxes = net.get_internals()["multibox_anchors_output"]
-
- cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
- name='cls_prob')
- out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
- name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
- variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
- return out
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/vgg16_reduced.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/vgg16_reduced.py b/example/ssd/symbol/vgg16_reduced.py
new file mode 100644
index 0000000..c6a55b1
--- /dev/null
+++ b/example/ssd/symbol/vgg16_reduced.py
@@ -0,0 +1,86 @@
+import mxnet as mx
+
+def get_symbol(num_classes=1000, **kwargs):
+ """
+ VGG 16 layers network
+ This is a modified version, with fc6/fc7 layers replaced by conv layers
+ And the network is slightly smaller than original VGG 16 network
+ """
+ data = mx.symbol.Variable(name="data")
+ label = mx.symbol.Variable(name="label")
+
+ # group 1
+ conv1_1 = mx.symbol.Convolution(
+ data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
+ relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
+ conv1_2 = mx.symbol.Convolution(
+ data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
+ relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
+ pool1 = mx.symbol.Pooling(
+ data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
+ # group 2
+ conv2_1 = mx.symbol.Convolution(
+ data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
+ relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
+ conv2_2 = mx.symbol.Convolution(
+ data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
+ relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
+ pool2 = mx.symbol.Pooling(
+ data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
+ # group 3
+ conv3_1 = mx.symbol.Convolution(
+ data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
+ relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
+ conv3_2 = mx.symbol.Convolution(
+ data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
+ relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
+ conv3_3 = mx.symbol.Convolution(
+ data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
+ relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
+ pool3 = mx.symbol.Pooling(
+ data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
+ pooling_convention="full", name="pool3")
+ # group 4
+ conv4_1 = mx.symbol.Convolution(
+ data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
+ relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
+ conv4_2 = mx.symbol.Convolution(
+ data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
+ relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
+ conv4_3 = mx.symbol.Convolution(
+ data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
+ relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
+ pool4 = mx.symbol.Pooling(
+ data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
+ # group 5
+ conv5_1 = mx.symbol.Convolution(
+ data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
+ relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
+ conv5_2 = mx.symbol.Convolution(
+ data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
+ relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
+ conv5_3 = mx.symbol.Convolution(
+ data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
+ relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
+ pool5 = mx.symbol.Pooling(
+ data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
+ pad=(1,1), name="pool5")
+ # group 6
+ conv6 = mx.symbol.Convolution(
+ data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
+ num_filter=1024, name="fc6")
+ relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
+ # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
+ # group 7
+ conv7 = mx.symbol.Convolution(
+ data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="fc7")
+ relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
+ # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
+
+ gpool = mx.symbol.Pooling(data=relu7, pool_type='avg', kernel=(7, 7),
+ global_pool=True, name='global_pool')
+ conv8 = mx.symbol.Convolution(data=gpool, num_filter=num_classes, kernel=(1, 1),
+ name='fc8')
+ flat = mx.symbol.Flatten(data=conv8)
+ softmax = mx.symbol.SoftmaxOutput(data=flat, name='softmax')
+ return softmax
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/tools/prepare_coco.sh
----------------------------------------------------------------------
diff --git a/example/ssd/tools/prepare_coco.sh b/example/ssd/tools/prepare_coco.sh
new file mode 100644
index 0000000..1b11f93
--- /dev/null
+++ b/example/ssd/tools/prepare_coco.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+python $DIR/prepare_dataset.py --dataset coco --set train2014,valminusminival2014 --target $DIR/../data/train.lst --root $DIR/../data/coco
+python $DIR/prepare_dataset.py --dataset coco --set minival2014 --target $DIR/../data/val.lst --shuffle False --root $DIR/../data/coco
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/tools/prepare_dataset.py
----------------------------------------------------------------------
diff --git a/example/ssd/tools/prepare_dataset.py b/example/ssd/tools/prepare_dataset.py
index 7bd6968..12b090c 100644
--- a/example/ssd/tools/prepare_dataset.py
+++ b/example/ssd/tools/prepare_dataset.py
@@ -5,6 +5,7 @@ import subprocess
curr_path = os.path.abspath(os.path.dirname(__file__))
sys.path.append(os.path.join(curr_path, '..'))
from dataset.pascal_voc import PascalVoc
+from dataset.mscoco import Coco
from dataset.concat_db import ConcatDB
def load_pascal(image_set, year, devkit_path, shuffle=False):
@@ -46,6 +47,30 @@ def load_pascal(image_set, year, devkit_path, shuffle=False):
else:
return imdbs[0]
+def load_coco(image_set, dirname, shuffle=False):
+ """
+ wrapper function for loading ms coco dataset
+
+ Parameters:
+ ----------
+ image_set : str
+ train2014, val2014, valminusminival2014, minival2014
+ dirname: str
+ root dir for coco
+ shuffle: boolean
+ initial shuffle
+ """
+ anno_files = ['instances_' + y.strip() + '.json' for y in image_set.split(',')]
+ assert anno_files, "No image set specified"
+ imdbs = []
+ for af in anno_files:
+ af_path = os.path.join(dirname, 'annotations', af)
+ imdbs.append(Coco(af_path, dirname, shuffle=shuffle))
+ if len(imdbs) > 1:
+ return ConcatDB(imdbs, shuffle)
+ else:
+ return imdbs[0]
+
def parse_args():
parser = argparse.ArgumentParser(description='Prepare lists for dataset')
parser.add_argument('--dataset', dest='dataset', help='dataset to use',
@@ -69,6 +94,11 @@ if __name__ == '__main__':
args = parse_args()
if args.dataset == 'pascal':
db = load_pascal(args.set, args.year, args.root_path, args.shuffle)
+ print("saving list to disk...")
+ db.save_imglist(args.target, root=args.root_path)
+ elif args.dataset == 'coco':
+ db = load_coco(args.set, args.root_path, args.shuffle)
+ print("saving list to disk...")
db.save_imglist(args.target, root=args.root_path)
else:
raise NotImplementedError("No implementation for dataset: " + args.dataset)
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/tools/visualize_net.py
----------------------------------------------------------------------
diff --git a/example/ssd/tools/visualize_net.py b/example/ssd/tools/visualize_net.py
index e619c23..f72d6a6 100644
--- a/example/ssd/tools/visualize_net.py
+++ b/example/ssd/tools/visualize_net.py
@@ -1,13 +1,14 @@
from __future__ import print_function
import find_mxnet
import mxnet as mx
-import importlib
import argparse
-import sys
+import sys, os
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'symbol'))
+import symbol_factory
+
parser = argparse.ArgumentParser(description='network visualization')
-parser.add_argument('--network', type=str, default='vgg16_ssd_300',
- choices = ['vgg16_ssd_300', 'vgg16_ssd_512'],
+parser.add_argument('--network', type=str, default='vgg16_reduced',
help = 'the cnn to use')
parser.add_argument('--num-classes', type=int, default=20,
help='the number of classes')
@@ -16,13 +17,11 @@ parser.add_argument('--data-shape', type=int, default=300,
parser.add_argument('--train', action='store_true', default=False, help='show train net')
args = parser.parse_args()
-sys.path.append('../symbol')
-
if not args.train:
- net = importlib.import_module("symbol_" + args.network).get_symbol(args.num_classes)
+ net = symbol_factory.get_symbol(args.network, args.data_shape, num_classes=args.num_classes)
a = mx.viz.plot_network(net, shape={"data":(1,3,args.data_shape,args.data_shape)}, \
node_attrs={"shape":'rect', "fixedsize":'false'})
- a.render("ssd_" + args.network)
+ a.render("ssd_" + args.network + '_' + str(args.data_shape))
else:
- net = importlib.import_module("symbol_" + args.network).get_symbol_train(args.num_classes)
+ net = symbol_factory.get_symbol_train(args.network, args.data_shape, num_classes=args.num_classes)
print(net.tojson())
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/train.py
----------------------------------------------------------------------
diff --git a/example/ssd/train.py b/example/ssd/train.py
index fcd5fb9..62ad2c4 100644
--- a/example/ssd/train.py
+++ b/example/ssd/train.py
@@ -15,8 +15,8 @@ def parse_args():
default=os.path.join(os.getcwd(), 'data', 'val.rec'), type=str)
parser.add_argument('--val-list', dest='val_list', help='validation list to use',
default="", type=str)
- parser.add_argument('--network', dest='network', type=str, default='vgg16_ssd_300',
- choices=['vgg16_ssd_300', 'vgg16_ssd_512'], help='which network to use')
+ parser.add_argument('--network', dest='network', type=str, default='vgg16_reduced',
+ help='which network to use')
parser.add_argument('--batch-size', dest='batch_size', type=int, default=32,
help='training batch size')
parser.add_argument('--resume', dest='resume', type=int, default=-1,
@@ -41,7 +41,7 @@ def parse_args():
help='set image shape')
parser.add_argument('--label-width', dest='label_width', type=int, default=350,
help='force padding label width to sync across train and validation')
- parser.add_argument('--lr', dest='learning_rate', type=float, default=0.004,
+ parser.add_argument('--lr', dest='learning_rate', type=float, default=0.002,
help='learning rate')
parser.add_argument('--momentum', dest='momentum', type=float, default=0.9,
help='momentum')
@@ -53,7 +53,7 @@ def parse_args():
help='green mean value')
parser.add_argument('--mean-b', dest='mean_b', type=float, default=104,
help='blue mean value')
- parser.add_argument('--lr-steps', dest='lr_refactor_step', type=str, default='150, 200',
+ parser.add_argument('--lr-steps', dest='lr_refactor_step', type=str, default='80, 160',
help='refactor learning rate at specified epochs')
parser.add_argument('--lr-factor', dest='lr_refactor_ratio', type=str, default=0.1,
help='ratio to refactor learning rate')
@@ -92,9 +92,9 @@ def parse_class_names(args):
num_class = args.num_class
if len(args.class_names) > 0:
if os.path.isfile(args.class_names):
- # try to open it to read class names
- with open(args.class_names, 'r') as f:
- class_names = [l.strip() for l in f.readlines()]
+ # try to open it to read class names
+ with open(args.class_names, 'r') as f:
+ class_names = [l.strip() for l in f.readlines()]
else:
class_names = [c.strip() for c in args.class_names.split(',')]
assert len(class_names) == num_class, str(len(class_names))
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/train/train_net.py
----------------------------------------------------------------------
diff --git a/example/ssd/train/train_net.py b/example/ssd/train/train_net.py
index 3f73ee8..54cae05 100644
Binary files a/example/ssd/train/train_net.py and b/example/ssd/train/train_net.py differ
http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/src/io/image_det_aug_default.cc
----------------------------------------------------------------------
diff --git a/src/io/image_det_aug_default.cc b/src/io/image_det_aug_default.cc
index b5d4209..cb7966d 100644
--- a/src/io/image_det_aug_default.cc
+++ b/src/io/image_det_aug_default.cc
@@ -255,9 +255,9 @@ class ImageDetLabel {
obj.right = *(it++);
obj.bottom = *(it++);
obj.extra.assign(it, it - 5 + object_width_);
- objects_.push_back(obj);
- CHECK_GT(obj.right, obj.left);
- CHECK_GT(obj.bottom, obj.top);
+ if (obj.right > obj.left && obj.bottom > obj.top) {
+ objects_.push_back(obj);
+ }
}
}