You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by lx...@apache.org on 2017/07/07 15:58:15 UTC
[02/50] [abbrv] incubator-mxnet-test git commit: Better ssd (#6827)

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/legacy_vgg16_ssd_512.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/legacy_vgg16_ssd_512.py b/example/ssd/symbol/legacy_vgg16_ssd_512.py
new file mode 100644
index 0000000..15424f9
--- /dev/null
+++ b/example/ssd/symbol/legacy_vgg16_ssd_512.py
@@ -0,0 +1,194 @@
+import mxnet as mx
+from common import legacy_conv_act_layer
+from common import multibox_layer
+
+def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
+    """
+    Single-shot multi-box detection with VGG 16 layers ConvNet
+    This is a modified version, with fc6/fc7 layers replaced by conv layers
+    And the network is slightly smaller than original VGG 16 network
+    This is a training network with losses
+
+    Parameters:
+    ----------
+    num_classes: int
+        number of object classes not including background
+    nms_thresh : float
+        non-maximum suppression threshold
+    force_suppress : boolean
+        whether suppress different class objects
+    nms_topk : int
+        apply NMS to top K detections
+
+    Returns:
+    ----------
+    mx.Symbol
+    """
+    data = mx.symbol.Variable(name="data")
+    label = mx.symbol.Variable(name="label")
+
+    # group 1
+    conv1_1 = mx.symbol.Convolution(
+        data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
+    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
+    conv1_2 = mx.symbol.Convolution(
+        data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
+    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
+    pool1 = mx.symbol.Pooling(
+        data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
+    # group 2
+    conv2_1 = mx.symbol.Convolution(
+        data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
+    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
+    conv2_2 = mx.symbol.Convolution(
+        data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
+    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
+    pool2 = mx.symbol.Pooling(
+        data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
+    # group 3
+    conv3_1 = mx.symbol.Convolution(
+        data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
+    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
+    conv3_2 = mx.symbol.Convolution(
+        data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
+    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
+    conv3_3 = mx.symbol.Convolution(
+        data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
+    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
+    pool3 = mx.symbol.Pooling(
+        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
+        pooling_convention="full", name="pool3")
+    # group 4
+    conv4_1 = mx.symbol.Convolution(
+        data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
+    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
+    conv4_2 = mx.symbol.Convolution(
+        data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
+    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
+    conv4_3 = mx.symbol.Convolution(
+        data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
+    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
+    pool4 = mx.symbol.Pooling(
+        data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
+    # group 5
+    conv5_1 = mx.symbol.Convolution(
+        data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
+    relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
+    conv5_2 = mx.symbol.Convolution(
+        data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
+    relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
+    conv5_3 = mx.symbol.Convolution(
+        data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
+    relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
+    pool5 = mx.symbol.Pooling(
+        data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
+        pad=(1,1), name="pool5")
+    # group 6
+    conv6 = mx.symbol.Convolution(
+        data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
+        num_filter=1024, name="conv6")
+    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
+    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
+    # group 7
+    conv7 = mx.symbol.Convolution(
+        data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
+    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
+    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
+
+    ### ssd extra layers ###
+    conv8_1, relu8_1 = legacy_conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
+        stride=(1,1), act_type="relu", use_batchnorm=False)
+    conv8_2, relu8_2 = legacy_conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
+        stride=(2,2), act_type="relu", use_batchnorm=False)
+    conv9_1, relu9_1 = legacy_conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
+        stride=(1,1), act_type="relu", use_batchnorm=False)
+    conv9_2, relu9_2 = legacy_conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
+        stride=(2,2), act_type="relu", use_batchnorm=False)
+    conv10_1, relu10_1 = legacy_conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
+        stride=(1,1), act_type="relu", use_batchnorm=False)
+    conv10_2, relu10_2 = legacy_conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \
+        stride=(2,2), act_type="relu", use_batchnorm=False)
+    conv11_1, relu11_1 = legacy_conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \
+        stride=(1,1), act_type="relu", use_batchnorm=False)
+    conv11_2, relu11_2 = legacy_conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(1,1), \
+        stride=(2,2), act_type="relu", use_batchnorm=False)
+    conv12_1, relu12_1 = legacy_conv_act_layer(relu11_2, "12_1", 128, kernel=(1,1), pad=(0,0), \
+        stride=(1,1), act_type="relu", use_batchnorm=False)
+    conv12_2, relu12_2 = legacy_conv_act_layer(relu12_1, "12_2", 256, kernel=(4,4), pad=(1,1), \
+        stride=(1,1), act_type="relu", use_batchnorm=False)
+
+    # specific parameters for VGG16 network
+    from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2, relu12_2]
+    sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \
+        [.75, .8216], [.9, .9721]]
+    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+        [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
+    normalizations = [20, -1, -1, -1, -1, -1, -1]
+    steps = [ x / 512.0 for x in [8, 16, 32, 64, 128, 256, 512]]
+    num_channels = [512]
+
+    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
+        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
+        num_channels=num_channels, clip=False, interm_layer=0, steps=steps)
+
+    tmp = mx.contrib.symbol.MultiBoxTarget(
+        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
+        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
+        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
+        name="multibox_target")
+    loc_target = tmp[0]
+    loc_target_mask = tmp[1]
+    cls_target = tmp[2]
+
+    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
+        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
+        normalization='valid', name="cls_prob")
+    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
+        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
+    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
+        normalization='valid', name="loc_loss")
+
+    # monitoring training status
+    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
+    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
+        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
+    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
+
+    # group output
+    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
+    return out
+
+def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
+    """
+    Single-shot multi-box detection with VGG 16 layers ConvNet
+    This is a modified version, with fc6/fc7 layers replaced by conv layers
+    And the network is slightly smaller than original VGG 16 network
+    This is the detection network
+
+    Parameters:
+    ----------
+    num_classes: int
+        number of object classes not including background
+    nms_thresh : float
+        threshold of overlap for non-maximum suppression
+    force_suppress : boolean
+        whether suppress different class objects
+    nms_topk : int
+        apply NMS to top K detections
+
+    Returns:
+    ----------
+    mx.Symbol
+    """
+    net = get_symbol_train(num_classes)
+    cls_preds = net.get_internals()["multibox_cls_pred_output"]
+    loc_preds = net.get_internals()["multibox_loc_pred_output"]
+    anchor_boxes = net.get_internals()["multibox_anchors_output"]
+
+    cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
+        name='cls_prob')
+    out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
+        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
+    return out

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/resnet.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/resnet.py b/example/ssd/symbol/resnet.py
new file mode 100644
index 0000000..9c121e8
--- /dev/null
+++ b/example/ssd/symbol/resnet.py
@@ -0,0 +1,169 @@
+'''
+Adapted from https://github.com/tornadomeet/ResNet/blob/master/symbol_resnet.py
+Original author Wei Wu
+
+Implemented the following paper:
+
+Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun. "Identity Mappings in Deep Residual Networks"
+'''
+import mxnet as mx
+
+def residual_unit(data, num_filter, stride, dim_match, name, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False):
+    """Return ResNet Unit symbol for building ResNet
+    Parameters
+    ----------
+    data : str
+        Input data
+    num_filter : int
+        Number of output channels
+    bnf : int
+        Bottle neck channels factor with regard to num_filter
+    stride : tupe
+        Stride used in convolution
+    dim_match : Boolen
+        True means channel number between input and output is the same, otherwise means differ
+    name : str
+        Base name of the operators
+    workspace : int
+        Workspace used in convolution operator
+    """
+    if bottle_neck:
+        # the same as https://github.com/facebook/fb.resnet.torch#notes, a bit difference with origin paper
+        bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn1')
+        act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
+        conv1 = mx.sym.Convolution(data=act1, num_filter=int(num_filter*0.25), kernel=(1,1), stride=(1,1), pad=(0,0),
+                                   no_bias=True, workspace=workspace, name=name + '_conv1')
+        bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn2')
+        act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
+        conv2 = mx.sym.Convolution(data=act2, num_filter=int(num_filter*0.25), kernel=(3,3), stride=stride, pad=(1,1),
+                                   no_bias=True, workspace=workspace, name=name + '_conv2')
+        bn3 = mx.sym.BatchNorm(data=conv2, fix_gamma=False, eps=2e-5, momentum=bn_mom, name=name + '_bn3')
+        act3 = mx.sym.Activation(data=bn3, act_type='relu', name=name + '_relu3')
+        conv3 = mx.sym.Convolution(data=act3, num_filter=num_filter, kernel=(1,1), stride=(1,1), pad=(0,0), no_bias=True,
+                                   workspace=workspace, name=name + '_conv3')
+        if dim_match:
+            shortcut = data
+        else:
+            shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
+                                            workspace=workspace, name=name+'_sc')
+        if memonger:
+            shortcut._set_attr(mirror_stage='True')
+        return conv3 + shortcut
+    else:
+        bn1 = mx.sym.BatchNorm(data=data, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn1')
+        act1 = mx.sym.Activation(data=bn1, act_type='relu', name=name + '_relu1')
+        conv1 = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(3,3), stride=stride, pad=(1,1),
+                                      no_bias=True, workspace=workspace, name=name + '_conv1')
+        bn2 = mx.sym.BatchNorm(data=conv1, fix_gamma=False, momentum=bn_mom, eps=2e-5, name=name + '_bn2')
+        act2 = mx.sym.Activation(data=bn2, act_type='relu', name=name + '_relu2')
+        conv2 = mx.sym.Convolution(data=act2, num_filter=num_filter, kernel=(3,3), stride=(1,1), pad=(1,1),
+                                      no_bias=True, workspace=workspace, name=name + '_conv2')
+        if dim_match:
+            shortcut = data
+        else:
+            shortcut = mx.sym.Convolution(data=act1, num_filter=num_filter, kernel=(1,1), stride=stride, no_bias=True,
+                                            workspace=workspace, name=name+'_sc')
+        if memonger:
+            shortcut._set_attr(mirror_stage='True')
+        return conv2 + shortcut
+
+def resnet(units, num_stages, filter_list, num_classes, image_shape, bottle_neck=True, bn_mom=0.9, workspace=256, memonger=False):
+    """Return ResNet symbol of
+    Parameters
+    ----------
+    units : list
+        Number of units in each stage
+    num_stages : int
+        Number of stage
+    filter_list : list
+        Channel size of each stage
+    num_classes : int
+        Ouput size of symbol
+    dataset : str
+        Dataset type, only cifar10 and imagenet supports
+    workspace : int
+        Workspace used in convolution operator
+    """
+    num_unit = len(units)
+    assert(num_unit == num_stages)
+    data = mx.sym.Variable(name='data')
+    data = mx.sym.identity(data=data, name='id')
+    data = mx.sym.BatchNorm(data=data, fix_gamma=True, eps=2e-5, momentum=bn_mom, name='bn_data')
+    (nchannel, height, width) = image_shape
+    if height <= 32:            # such as cifar10
+        body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(3, 3), stride=(1,1), pad=(1, 1),
+                                  no_bias=True, name="conv0", workspace=workspace)
+    else:                       # often expected to be 224 such as imagenet
+        body = mx.sym.Convolution(data=data, num_filter=filter_list[0], kernel=(7, 7), stride=(2,2), pad=(3, 3),
+                                  no_bias=True, name="conv0", workspace=workspace)
+        body = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn0')
+        body = mx.sym.Activation(data=body, act_type='relu', name='relu0')
+        body = mx.symbol.Pooling(data=body, kernel=(3, 3), stride=(2,2), pad=(1,1), pool_type='max')
+
+    for i in range(num_stages):
+        body = residual_unit(body, filter_list[i+1], (1 if i==0 else 2, 1 if i==0 else 2), False,
+                             name='stage%d_unit%d' % (i + 1, 1), bottle_neck=bottle_neck, workspace=workspace,
+                             memonger=memonger)
+        for j in range(units[i]-1):
+            body = residual_unit(body, filter_list[i+1], (1,1), True, name='stage%d_unit%d' % (i + 1, j + 2),
+                                 bottle_neck=bottle_neck, workspace=workspace, memonger=memonger)
+    bn1 = mx.sym.BatchNorm(data=body, fix_gamma=False, eps=2e-5, momentum=bn_mom, name='bn1')
+    relu1 = mx.sym.Activation(data=bn1, act_type='relu', name='relu1')
+    # Although kernel is not used here when global_pool=True, we should put one
+    pool1 = mx.symbol.Pooling(data=relu1, global_pool=True, kernel=(7, 7), pool_type='avg', name='pool1')
+    flat = mx.symbol.Flatten(data=pool1)
+    fc1 = mx.symbol.FullyConnected(data=flat, num_hidden=num_classes, name='fc1')
+    return mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
+
+def get_symbol(num_classes, num_layers, image_shape, conv_workspace=256, **kwargs):
+    """
+    Adapted from https://github.com/tornadomeet/ResNet/blob/master/train_resnet.py
+    Original author Wei Wu
+    """
+    image_shape = [int(l) for l in image_shape.split(',')]
+    (nchannel, height, width) = image_shape
+    if height <= 28:
+        num_stages = 3
+        if (num_layers-2) % 9 == 0 and num_layers >= 164:
+            per_unit = [(num_layers-2)//9]
+            filter_list = [16, 64, 128, 256]
+            bottle_neck = True
+        elif (num_layers-2) % 6 == 0 and num_layers < 164:
+            per_unit = [(num_layers-2)//6]
+            filter_list = [16, 16, 32, 64]
+            bottle_neck = False
+        else:
+            raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
+        units = per_unit * num_stages
+    else:
+        if num_layers >= 50:
+            filter_list = [64, 256, 512, 1024, 2048]
+            bottle_neck = True
+        else:
+            filter_list = [64, 64, 128, 256, 512]
+            bottle_neck = False
+        num_stages = 4
+        if num_layers == 18:
+            units = [2, 2, 2, 2]
+        elif num_layers == 34:
+            units = [3, 4, 6, 3]
+        elif num_layers == 50:
+            units = [3, 4, 6, 3]
+        elif num_layers == 101:
+            units = [3, 4, 23, 3]
+        elif num_layers == 152:
+            units = [3, 8, 36, 3]
+        elif num_layers == 200:
+            units = [3, 24, 36, 3]
+        elif num_layers == 269:
+            units = [3, 30, 48, 8]
+        else:
+            raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
+
+    return resnet(units       = units,
+                  num_stages  = num_stages,
+                  filter_list = filter_list,
+                  num_classes = num_classes,
+                  image_shape = image_shape,
+                  bottle_neck = bottle_neck,
+                  workspace   = conv_workspace)

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/symbol_builder.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/symbol_builder.py b/example/ssd/symbol/symbol_builder.py
new file mode 100644
index 0000000..48c9d1d
--- /dev/null
+++ b/example/ssd/symbol/symbol_builder.py
@@ -0,0 +1,166 @@
+import mxnet as mx
+from common import multi_layer_feature, multibox_layer
+
+
+def import_module(module_name):
+    """Helper function to import module"""
+    import sys, os
+    import importlib
+    sys.path.append(os.path.dirname(__file__))
+    return importlib.import_module(module_name)
+
+def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pads,
+                     sizes, ratios, normalizations=-1, steps=[], min_filter=128,
+                     nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
+    """Build network symbol for training SSD
+
+    Parameters
+    ----------
+    network : str
+        base network symbol name
+    num_classes : int
+        number of object classes not including background
+    from_layers : list of str
+        feature extraction layers, use '' for add extra layers
+        For example:
+        from_layers = ['relu4_3', 'fc7', '', '', '', '']
+        which means extract feature from relu4_3 and fc7, adding 4 extra layers
+        on top of fc7
+    num_filters : list of int
+        number of filters for extra layers, you can use -1 for extracted features,
+        however, if normalization and scale is applied, the number of filter for
+        that layer must be provided.
+        For example:
+        num_filters = [512, -1, 512, 256, 256, 256]
+    strides : list of int
+        strides for the 3x3 convolution appended, -1 can be used for extracted
+        feature layers
+    pads : list of int
+        paddings for the 3x3 convolution, -1 can be used for extracted layers
+    sizes : list or list of list
+        [min_size, max_size] for all layers or [[], [], []...] for specific layers
+    ratios : list or list of list
+        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
+    normalizations : int or list of int
+        use normalizations value for all layers or [...] for specific layers,
+        -1 indicate no normalizations and scales
+    steps : list
+        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
+        according to layer dimensions
+    min_filter : int
+        minimum number of filters used in 1x1 convolution
+    nms_thresh : float
+        non-maximum suppression threshold
+    force_suppress : boolean
+        whether suppress different class objects
+    nms_topk : int
+        apply NMS to top K detections
+
+    Returns
+    -------
+    mx.Symbol
+
+    """
+    label = mx.sym.Variable('label')
+    body = import_module(network).get_symbol(num_classes, **kwargs)
+    layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
+        min_filter=min_filter)
+
+    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
+        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
+        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
+
+    tmp = mx.contrib.symbol.MultiBoxTarget(
+        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
+        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
+        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
+        name="multibox_target")
+    loc_target = tmp[0]
+    loc_target_mask = tmp[1]
+    cls_target = tmp[2]
+
+    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
+        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
+        normalization='valid', name="cls_prob")
+    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
+        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
+    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
+        normalization='valid', name="loc_loss")
+
+    # monitoring training status
+    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
+    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
+        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
+    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
+
+    # group output
+    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
+    return out
+
+def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios,
+               strides, pads, normalizations=-1, steps=[], min_filter=128,
+               nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
+    """Build network for testing SSD
+
+    Parameters
+    ----------
+    network : str
+        base network symbol name
+    num_classes : int
+        number of object classes not including background
+    from_layers : list of str
+        feature extraction layers, use '' for add extra layers
+        For example:
+        from_layers = ['relu4_3', 'fc7', '', '', '', '']
+        which means extract feature from relu4_3 and fc7, adding 4 extra layers
+        on top of fc7
+    num_filters : list of int
+        number of filters for extra layers, you can use -1 for extracted features,
+        however, if normalization and scale is applied, the number of filter for
+        that layer must be provided.
+        For example:
+        num_filters = [512, -1, 512, 256, 256, 256]
+    strides : list of int
+        strides for the 3x3 convolution appended, -1 can be used for extracted
+        feature layers
+    pads : list of int
+        paddings for the 3x3 convolution, -1 can be used for extracted layers
+    sizes : list or list of list
+        [min_size, max_size] for all layers or [[], [], []...] for specific layers
+    ratios : list or list of list
+        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
+    normalizations : int or list of int
+        use normalizations value for all layers or [...] for specific layers,
+        -1 indicate no normalizations and scales
+    steps : list
+        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
+        according to layer dimensions
+    min_filter : int
+        minimum number of filters used in 1x1 convolution
+    nms_thresh : float
+        non-maximum suppression threshold
+    force_suppress : boolean
+        whether suppress different class objects
+    nms_topk : int
+        apply NMS to top K detections
+
+    Returns
+    -------
+    mx.Symbol
+
+    """
+    body = import_module(network).get_symbol(num_classes, **kwargs)
+    layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
+        min_filter=min_filter)
+
+    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
+        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
+        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)
+
+    cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
+        name='cls_prob')
+    out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
+        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
+        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
+    return out

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/symbol_factory.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/symbol_factory.py b/example/ssd/symbol/symbol_factory.py
new file mode 100644
index 0000000..ef156f1
--- /dev/null
+++ b/example/ssd/symbol/symbol_factory.py
@@ -0,0 +1,122 @@
+"""Presets for various network configurations"""
+import logging
+import symbol_builder
+
+def get_config(network, data_shape, **kwargs):
+    """Configuration factory for various networks
+
+    Parameters
+    ----------
+    network : str
+        base network name, such as vgg_reduced, inceptionv3, resnet...
+    data_shape : int
+        input data dimension
+    kwargs : dict
+        extra arguments
+    """
+    if network == 'vgg16_reduced':
+        if data_shape >= 448:
+            from_layers = ['relu4_3', 'relu7', '', '', '', '', '']
+            num_filters = [512, -1, 512, 256, 256, 256, 256]
+            strides = [-1, -1, 2, 2, 2, 2, 1]
+            pads = [-1, -1, 1, 1, 1, 1, 1]
+            sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \
+                [.75, .8216], [.9, .9721]]
+            ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+                [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
+            normalizations = [20, -1, -1, -1, -1, -1, -1]
+            steps = [] if data_shape != 512 else [x / 512.0 for x in
+                [8, 16, 32, 64, 128, 256, 512]]
+        else:
+            from_layers = ['relu4_3', 'relu7', '', '', '', '']
+            num_filters = [512, -1, 512, 256, 256, 256]
+            strides = [-1, -1, 2, 2, 1, 1]
+            pads = [-1, -1, 1, 1, 0, 0]
+            sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
+            ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+                [1,2,.5], [1,2,.5]]
+            normalizations = [20, -1, -1, -1, -1, -1]
+            steps = [] if data_shape != 300 else [x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
+        if not (data_shape == 300 or data_shape == 512):
+            logging.warn('data_shape %d was not tested, use with caucious.' % data_shape)
+        return locals()
+    elif network == 'inceptionv3':
+        from_layers = ['ch_concat_mixed_7_chconcat', 'ch_concat_mixed_10_chconcat', '', '', '', '']
+        num_filters = [-1, -1, 512, 256, 256, 128]
+        strides = [-1, -1, 2, 2, 2, 2]
+        pads = [-1, -1, 1, 1, 1, 1]
+        sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
+        ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+            [1,2,.5], [1,2,.5]]
+        normalizations = -1
+        steps = []
+        return locals()
+    elif network == 'resnet50':
+        num_layers = 50
+        image_shape = '3,224,224'  # resnet require it as shape check
+        network = 'resnet'
+        from_layers = ['_plus12', '_plus15', '', '', '', '']
+        num_filters = [-1, -1, 512, 256, 256, 128]
+        strides = [-1, -1, 2, 2, 2, 2]
+        pads = [-1, -1, 1, 1, 1, 1]
+        sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
+        ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+            [1,2,.5], [1,2,.5]]
+        normalizations = -1
+        steps = []
+        return locals()
+    elif network == 'resnet101':
+        num_layers = 101
+        image_shape = '3,224,224'
+        network = 'resnet'
+        from_layers = ['_plus12', '_plus15', '', '', '', '']
+        num_filters = [-1, -1, 512, 256, 256, 128]
+        strides = [-1, -1, 2, 2, 2, 2]
+        pads = [-1, -1, 1, 1, 1, 1]
+        sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
+        ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
+            [1,2,.5], [1,2,.5]]
+        normalizations = -1
+        steps = []
+        return locals()
+    else:
+        msg = 'No configuration found for %s with data_shape %d' % (network, data_shape)
+        raise NotImplementedError(msg)
+
+def get_symbol_train(network, data_shape, **kwargs):
+    """Wrapper for get symbol for train
+
+    Parameters
+    ----------
+    network : str
+        name for the base network symbol
+    data_shape : int
+        input shape
+    kwargs : dict
+        see symbol_builder.get_symbol_train for more details
+    """
+    if network.startswith('legacy'):
+        logging.warn('Using legacy model.')
+        return symbol_builder.import_module(network).get_symbol_train(**kwargs)
+    config = get_config(network, data_shape, **kwargs).copy()
+    config.update(kwargs)
+    return symbol_builder.get_symbol_train(**config)
+
+def get_symbol(network, data_shape, **kwargs):
+    """Wrapper for get symbol for test
+
+    Parameters
+    ----------
+    network : str
+        name for the base network symbol
+    data_shape : int
+        input shape
+    kwargs : dict
+        see symbol_builder.get_symbol for more details
+    """
+    if network.startswith('legacy'):
+        logging.warn('Using legacy model.')
+        return symbol_builder.import_module(network).get_symbol(**kwargs)
+    config = get_config(network, data_shape, **kwargs).copy()
+    config.update(kwargs)
+    return symbol_builder.get_symbol(**config)

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/symbol_vgg16_ssd_300.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/symbol_vgg16_ssd_300.py b/example/ssd/symbol/symbol_vgg16_ssd_300.py
deleted file mode 100644
index e406746..0000000
--- a/example/ssd/symbol/symbol_vgg16_ssd_300.py
+++ /dev/null
@@ -1,189 +0,0 @@
-import mxnet as mx
-from common import conv_act_layer
-from common import multibox_layer
-
-def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
-    """
-    Single-shot multi-box detection with VGG 16 layers ConvNet
-    This is a modified version, with fc6/fc7 layers replaced by conv layers
-    And the network is slightly smaller than original VGG 16 network
-    This is a training network with losses
-
-    Parameters:
-    ----------
-    num_classes: int
-        number of object classes not including background
-    nms_thresh : float
-        non-maximum suppression threshold
-    force_suppress : boolean
-        whether suppress different class objects
-    nms_topk : int
-        apply NMS to top K detections
-
-    Returns:
-    ----------
-    mx.Symbol
-    """
-    data = mx.symbol.Variable(name="data")
-    label = mx.symbol.Variable(name="label")
-
-    # group 1
-    conv1_1 = mx.symbol.Convolution(
-        data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
-    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
-    conv1_2 = mx.symbol.Convolution(
-        data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
-    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
-    pool1 = mx.symbol.Pooling(
-        data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
-    # group 2
-    conv2_1 = mx.symbol.Convolution(
-        data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
-    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
-    conv2_2 = mx.symbol.Convolution(
-        data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
-    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
-    pool2 = mx.symbol.Pooling(
-        data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
-    # group 3
-    conv3_1 = mx.symbol.Convolution(
-        data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
-    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
-    conv3_2 = mx.symbol.Convolution(
-        data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
-    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
-    conv3_3 = mx.symbol.Convolution(
-        data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
-    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
-    pool3 = mx.symbol.Pooling(
-        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
-        pooling_convention="full", name="pool3")
-    # group 4
-    conv4_1 = mx.symbol.Convolution(
-        data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
-    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
-    conv4_2 = mx.symbol.Convolution(
-        data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
-    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
-    conv4_3 = mx.symbol.Convolution(
-        data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
-    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
-    pool4 = mx.symbol.Pooling(
-        data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
-    # group 5
-    conv5_1 = mx.symbol.Convolution(
-        data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
-    relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
-    conv5_2 = mx.symbol.Convolution(
-        data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
-    relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
-    conv5_3 = mx.symbol.Convolution(
-        data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
-    relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
-    pool5 = mx.symbol.Pooling(
-        data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
-        pad=(1,1), name="pool5")
-    # group 6
-    conv6 = mx.symbol.Convolution(
-        data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
-        num_filter=1024, name="conv6")
-    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
-    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
-    # group 7
-    conv7 = mx.symbol.Convolution(
-        data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
-    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
-    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
-
-    ### ssd extra layers ###
-    conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-    conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
-        stride=(2,2), act_type="relu", use_batchnorm=False)
-    conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-    conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
-        stride=(2,2), act_type="relu", use_batchnorm=False)
-    conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-    conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(0,0), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-    conv11_1, relu11_1 = conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-    conv11_2, relu11_2 = conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(0,0), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-
-    # specific parameters for VGG16 network
-    from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2]
-    sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
-    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
-        [1,2,.5], [1,2,.5]]
-    normalizations = [20, -1, -1, -1, -1, -1]
-    steps = [ x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
-    num_channels = [512]
-
-    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
-        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
-        num_channels=num_channels, clip=False, interm_layer=0, steps=steps)
-
-    tmp = mx.contrib.symbol.MultiBoxTarget(
-        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
-        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
-        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
-        name="multibox_target")
-    loc_target = tmp[0]
-    loc_target_mask = tmp[1]
-    cls_target = tmp[2]
-
-    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
-        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
-        normalization='valid', name="cls_prob")
-    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
-        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
-    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
-        normalization='valid', name="loc_loss")
-
-    # monitoring training status
-    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
-    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
-        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
-        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
-    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
-
-    # group output
-    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
-    return out
-
-def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
-    """
-    Single-shot multi-box detection with VGG 16 layers ConvNet
-    This is a modified version, with fc6/fc7 layers replaced by conv layers
-    And the network is slightly smaller than original VGG 16 network
-    This is the detection network
-
-    Parameters:
-    ----------
-    num_classes: int
-        number of object classes not including background
-    nms_thresh : float
-        threshold of overlap for non-maximum suppression
-    force_suppress : boolean
-        whether suppress different class objects
-    nms_topk : int
-        apply NMS to top K detections
-
-    Returns:
-    ----------
-    mx.Symbol
-    """
-    net = get_symbol_train(num_classes)
-    cls_preds = net.get_internals()["multibox_cls_pred_output"]
-    loc_preds = net.get_internals()["multibox_loc_pred_output"]
-    anchor_boxes = net.get_internals()["multibox_anchors_output"]
-
-    cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
-        name='cls_prob')
-    out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
-        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
-        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
-    return out

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/symbol_vgg16_ssd_512.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/symbol_vgg16_ssd_512.py b/example/ssd/symbol/symbol_vgg16_ssd_512.py
deleted file mode 100644
index 1cc243b..0000000
--- a/example/ssd/symbol/symbol_vgg16_ssd_512.py
+++ /dev/null
@@ -1,194 +0,0 @@
-import mxnet as mx
-from common import conv_act_layer
-from common import multibox_layer
-
-def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
-    """
-    Single-shot multi-box detection with VGG 16 layers ConvNet
-    This is a modified version, with fc6/fc7 layers replaced by conv layers
-    And the network is slightly smaller than original VGG 16 network
-    This is a training network with losses
-
-    Parameters:
-    ----------
-    num_classes: int
-        number of object classes not including background
-    nms_thresh : float
-        non-maximum suppression threshold
-    force_suppress : boolean
-        whether suppress different class objects
-    nms_topk : int
-        apply NMS to top K detections
-
-    Returns:
-    ----------
-    mx.Symbol
-    """
-    data = mx.symbol.Variable(name="data")
-    label = mx.symbol.Variable(name="label")
-
-    # group 1
-    conv1_1 = mx.symbol.Convolution(
-        data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
-    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
-    conv1_2 = mx.symbol.Convolution(
-        data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
-    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
-    pool1 = mx.symbol.Pooling(
-        data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
-    # group 2
-    conv2_1 = mx.symbol.Convolution(
-        data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
-    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
-    conv2_2 = mx.symbol.Convolution(
-        data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
-    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
-    pool2 = mx.symbol.Pooling(
-        data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
-    # group 3
-    conv3_1 = mx.symbol.Convolution(
-        data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
-    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
-    conv3_2 = mx.symbol.Convolution(
-        data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
-    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
-    conv3_3 = mx.symbol.Convolution(
-        data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
-    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
-    pool3 = mx.symbol.Pooling(
-        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
-        pooling_convention="full", name="pool3")
-    # group 4
-    conv4_1 = mx.symbol.Convolution(
-        data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
-    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
-    conv4_2 = mx.symbol.Convolution(
-        data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
-    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
-    conv4_3 = mx.symbol.Convolution(
-        data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
-    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
-    pool4 = mx.symbol.Pooling(
-        data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
-    # group 5
-    conv5_1 = mx.symbol.Convolution(
-        data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
-    relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
-    conv5_2 = mx.symbol.Convolution(
-        data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
-    relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
-    conv5_3 = mx.symbol.Convolution(
-        data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
-    relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
-    pool5 = mx.symbol.Pooling(
-        data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
-        pad=(1,1), name="pool5")
-    # group 6
-    conv6 = mx.symbol.Convolution(
-        data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
-        num_filter=1024, name="conv6")
-    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
-    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
-    # group 7
-    conv7 = mx.symbol.Convolution(
-        data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
-    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
-    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
-
-    ### ssd extra layers ###
-    conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-    conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
-        stride=(2,2), act_type="relu", use_batchnorm=False)
-    conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-    conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
-        stride=(2,2), act_type="relu", use_batchnorm=False)
-    conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-    conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \
-        stride=(2,2), act_type="relu", use_batchnorm=False)
-    conv11_1, relu11_1 = conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-    conv11_2, relu11_2 = conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(1,1), \
-        stride=(2,2), act_type="relu", use_batchnorm=False)
-    conv12_1, relu12_1 = conv_act_layer(relu11_2, "12_1", 128, kernel=(1,1), pad=(0,0), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-    conv12_2, relu12_2 = conv_act_layer(relu12_1, "12_2", 256, kernel=(4,4), pad=(1,1), \
-        stride=(1,1), act_type="relu", use_batchnorm=False)
-
-    # specific parameters for VGG16 network
-    from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2, relu12_2]
-    sizes = [[.07, .1025], [.15,.2121], [.3, .3674], [.45, .5196], [.6, .6708], \
-        [.75, .8216], [.9, .9721]]
-    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
-        [1,2,.5,3,1./3], [1,2,.5], [1,2,.5]]
-    normalizations = [20, -1, -1, -1, -1, -1, -1]
-    steps = [ x / 512.0 for x in [8, 16, 32, 64, 128, 256, 512]]
-    num_channels = [512]
-
-    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
-        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
-        num_channels=num_channels, clip=False, interm_layer=0, steps=steps)
-
-    tmp = mx.contrib.symbol.MultiBoxTarget(
-        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
-        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
-        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
-        name="multibox_target")
-    loc_target = tmp[0]
-    loc_target_mask = tmp[1]
-    cls_target = tmp[2]
-
-    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
-        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
-        normalization='valid', name="cls_prob")
-    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
-        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
-    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
-        normalization='valid', name="loc_loss")
-
-    # monitoring training status
-    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
-    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
-        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
-        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
-    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")
-
-    # group output
-    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
-    return out
-
-def get_symbol(num_classes=20, nms_thresh=0.5, force_suppress=False, nms_topk=400):
-    """
-    Single-shot multi-box detection with VGG 16 layers ConvNet
-    This is a modified version, with fc6/fc7 layers replaced by conv layers
-    And the network is slightly smaller than original VGG 16 network
-    This is the detection network
-
-    Parameters:
-    ----------
-    num_classes: int
-        number of object classes not including background
-    nms_thresh : float
-        threshold of overlap for non-maximum suppression
-    force_suppress : boolean
-        whether suppress different class objects
-    nms_topk : int
-        apply NMS to top K detections
-
-    Returns:
-    ----------
-    mx.Symbol
-    """
-    net = get_symbol_train(num_classes)
-    cls_preds = net.get_internals()["multibox_cls_pred_output"]
-    loc_preds = net.get_internals()["multibox_loc_pred_output"]
-    anchor_boxes = net.get_internals()["multibox_anchors_output"]
-
-    cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
-        name='cls_prob')
-    out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
-        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
-        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
-    return out

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/symbol/vgg16_reduced.py
----------------------------------------------------------------------
diff --git a/example/ssd/symbol/vgg16_reduced.py b/example/ssd/symbol/vgg16_reduced.py
new file mode 100644
index 0000000..c6a55b1
--- /dev/null
+++ b/example/ssd/symbol/vgg16_reduced.py
@@ -0,0 +1,86 @@
+import mxnet as mx
+
+def get_symbol(num_classes=1000, **kwargs):
+    """
+    VGG 16 layers network
+    This is a modified version, with fc6/fc7 layers replaced by conv layers
+    And the network is slightly smaller than original VGG 16 network
+    """
+    data = mx.symbol.Variable(name="data")
+    label = mx.symbol.Variable(name="label")
+
+    # group 1
+    conv1_1 = mx.symbol.Convolution(
+        data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
+    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
+    conv1_2 = mx.symbol.Convolution(
+        data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
+    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
+    pool1 = mx.symbol.Pooling(
+        data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
+    # group 2
+    conv2_1 = mx.symbol.Convolution(
+        data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
+    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
+    conv2_2 = mx.symbol.Convolution(
+        data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
+    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
+    pool2 = mx.symbol.Pooling(
+        data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
+    # group 3
+    conv3_1 = mx.symbol.Convolution(
+        data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
+    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
+    conv3_2 = mx.symbol.Convolution(
+        data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
+    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
+    conv3_3 = mx.symbol.Convolution(
+        data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
+    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
+    pool3 = mx.symbol.Pooling(
+        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
+        pooling_convention="full", name="pool3")
+    # group 4
+    conv4_1 = mx.symbol.Convolution(
+        data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
+    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
+    conv4_2 = mx.symbol.Convolution(
+        data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
+    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
+    conv4_3 = mx.symbol.Convolution(
+        data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
+    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
+    pool4 = mx.symbol.Pooling(
+        data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
+    # group 5
+    conv5_1 = mx.symbol.Convolution(
+        data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
+    relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
+    conv5_2 = mx.symbol.Convolution(
+        data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
+    relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
+    conv5_3 = mx.symbol.Convolution(
+        data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
+    relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
+    pool5 = mx.symbol.Pooling(
+        data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
+        pad=(1,1), name="pool5")
+    # group 6
+    conv6 = mx.symbol.Convolution(
+        data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
+        num_filter=1024, name="fc6")
+    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
+    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
+    # group 7
+    conv7 = mx.symbol.Convolution(
+        data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="fc7")
+    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
+    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")
+
+    gpool = mx.symbol.Pooling(data=relu7, pool_type='avg', kernel=(7, 7),
+        global_pool=True, name='global_pool')
+    conv8 = mx.symbol.Convolution(data=gpool, num_filter=num_classes, kernel=(1, 1),
+        name='fc8')
+    flat = mx.symbol.Flatten(data=conv8)
+    softmax = mx.symbol.SoftmaxOutput(data=flat, name='softmax')
+    return softmax

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/tools/prepare_coco.sh
----------------------------------------------------------------------
diff --git a/example/ssd/tools/prepare_coco.sh b/example/ssd/tools/prepare_coco.sh
new file mode 100644
index 0000000..1b11f93
--- /dev/null
+++ b/example/ssd/tools/prepare_coco.sh
@@ -0,0 +1,4 @@
+#!/usr/bin/env bash
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+python $DIR/prepare_dataset.py --dataset coco --set train2014,valminusminival2014 --target $DIR/../data/train.lst  --root $DIR/../data/coco
+python $DIR/prepare_dataset.py --dataset coco --set minival2014 --target $DIR/../data/val.lst --shuffle False --root $DIR/../data/coco

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/tools/prepare_dataset.py
----------------------------------------------------------------------
diff --git a/example/ssd/tools/prepare_dataset.py b/example/ssd/tools/prepare_dataset.py
index 7bd6968..12b090c 100644
--- a/example/ssd/tools/prepare_dataset.py
+++ b/example/ssd/tools/prepare_dataset.py
@@ -5,6 +5,7 @@ import subprocess
 curr_path = os.path.abspath(os.path.dirname(__file__))
 sys.path.append(os.path.join(curr_path, '..'))
 from dataset.pascal_voc import PascalVoc
+from dataset.mscoco import Coco
 from dataset.concat_db import ConcatDB
 
 def load_pascal(image_set, year, devkit_path, shuffle=False):
@@ -46,6 +47,30 @@ def load_pascal(image_set, year, devkit_path, shuffle=False):
     else:
         return imdbs[0]
 
+def load_coco(image_set, dirname, shuffle=False):
+    """
+    wrapper function for loading ms coco dataset
+
+    Parameters:
+    ----------
+    image_set : str
+        train2014, val2014, valminusminival2014, minival2014
+    dirname: str
+        root dir for coco
+    shuffle: boolean
+        initial shuffle
+    """
+    anno_files = ['instances_' + y.strip() + '.json' for y in image_set.split(',')]
+    assert anno_files, "No image set specified"
+    imdbs = []
+    for af in anno_files:
+        af_path = os.path.join(dirname, 'annotations', af)
+        imdbs.append(Coco(af_path, dirname, shuffle=shuffle))
+    if len(imdbs) > 1:
+        return ConcatDB(imdbs, shuffle)
+    else:
+        return imdbs[0]
+
 def parse_args():
     parser = argparse.ArgumentParser(description='Prepare lists for dataset')
     parser.add_argument('--dataset', dest='dataset', help='dataset to use',
@@ -69,6 +94,11 @@ if __name__ == '__main__':
     args = parse_args()
     if args.dataset == 'pascal':
         db = load_pascal(args.set, args.year, args.root_path, args.shuffle)
+        print("saving list to disk...")
+        db.save_imglist(args.target, root=args.root_path)
+    elif args.dataset == 'coco':
+        db = load_coco(args.set, args.root_path, args.shuffle)
+        print("saving list to disk...")
         db.save_imglist(args.target, root=args.root_path)
     else:
         raise NotImplementedError("No implementation for dataset: " + args.dataset)

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/tools/visualize_net.py
----------------------------------------------------------------------
diff --git a/example/ssd/tools/visualize_net.py b/example/ssd/tools/visualize_net.py
index e619c23..f72d6a6 100644
--- a/example/ssd/tools/visualize_net.py
+++ b/example/ssd/tools/visualize_net.py
@@ -1,13 +1,14 @@
 from __future__ import print_function
 import find_mxnet
 import mxnet as mx
-import importlib
 import argparse
-import sys
+import sys, os
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', 'symbol'))
+import symbol_factory
+
 
 parser = argparse.ArgumentParser(description='network visualization')
-parser.add_argument('--network', type=str, default='vgg16_ssd_300',
-                    choices = ['vgg16_ssd_300', 'vgg16_ssd_512'],
+parser.add_argument('--network', type=str, default='vgg16_reduced',
                     help = 'the cnn to use')
 parser.add_argument('--num-classes', type=int, default=20,
                     help='the number of classes')
@@ -16,13 +17,11 @@ parser.add_argument('--data-shape', type=int, default=300,
 parser.add_argument('--train', action='store_true', default=False, help='show train net')
 args = parser.parse_args()
 
-sys.path.append('../symbol')
-
 if not args.train:
-    net = importlib.import_module("symbol_" + args.network).get_symbol(args.num_classes)
+    net = symbol_factory.get_symbol(args.network, args.data_shape, num_classes=args.num_classes)
     a = mx.viz.plot_network(net, shape={"data":(1,3,args.data_shape,args.data_shape)}, \
         node_attrs={"shape":'rect', "fixedsize":'false'})
-    a.render("ssd_" + args.network)
+    a.render("ssd_" + args.network + '_' + str(args.data_shape))
 else:
-    net = importlib.import_module("symbol_" + args.network).get_symbol_train(args.num_classes)
+    net = symbol_factory.get_symbol_train(args.network, args.data_shape, num_classes=args.num_classes)
     print(net.tojson())

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/train.py
----------------------------------------------------------------------
diff --git a/example/ssd/train.py b/example/ssd/train.py
index fcd5fb9..62ad2c4 100644
--- a/example/ssd/train.py
+++ b/example/ssd/train.py
@@ -15,8 +15,8 @@ def parse_args():
                         default=os.path.join(os.getcwd(), 'data', 'val.rec'), type=str)
     parser.add_argument('--val-list', dest='val_list', help='validation list to use',
                         default="", type=str)
-    parser.add_argument('--network', dest='network', type=str, default='vgg16_ssd_300',
-                        choices=['vgg16_ssd_300', 'vgg16_ssd_512'], help='which network to use')
+    parser.add_argument('--network', dest='network', type=str, default='vgg16_reduced',
+                        help='which network to use')
     parser.add_argument('--batch-size', dest='batch_size', type=int, default=32,
                         help='training batch size')
     parser.add_argument('--resume', dest='resume', type=int, default=-1,
@@ -41,7 +41,7 @@ def parse_args():
                         help='set image shape')
     parser.add_argument('--label-width', dest='label_width', type=int, default=350,
                         help='force padding label width to sync across train and validation')
-    parser.add_argument('--lr', dest='learning_rate', type=float, default=0.004,
+    parser.add_argument('--lr', dest='learning_rate', type=float, default=0.002,
                         help='learning rate')
     parser.add_argument('--momentum', dest='momentum', type=float, default=0.9,
                         help='momentum')
@@ -53,7 +53,7 @@ def parse_args():
                         help='green mean value')
     parser.add_argument('--mean-b', dest='mean_b', type=float, default=104,
                         help='blue mean value')
-    parser.add_argument('--lr-steps', dest='lr_refactor_step', type=str, default='150, 200',
+    parser.add_argument('--lr-steps', dest='lr_refactor_step', type=str, default='80, 160',
                         help='refactor learning rate at specified epochs')
     parser.add_argument('--lr-factor', dest='lr_refactor_ratio', type=str, default=0.1,
                         help='ratio to refactor learning rate')
@@ -92,9 +92,9 @@ def parse_class_names(args):
     num_class = args.num_class
     if len(args.class_names) > 0:
         if os.path.isfile(args.class_names):
-                # try to open it to read class names
-                with open(args.class_names, 'r') as f:
-                    class_names = [l.strip() for l in f.readlines()]
+            # try to open it to read class names
+            with open(args.class_names, 'r') as f:
+                class_names = [l.strip() for l in f.readlines()]
         else:
             class_names = [c.strip() for c in args.class_names.split(',')]
         assert len(class_names) == num_class, str(len(class_names))

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/example/ssd/train/train_net.py
----------------------------------------------------------------------
diff --git a/example/ssd/train/train_net.py b/example/ssd/train/train_net.py
index 3f73ee8..54cae05 100644
Binary files a/example/ssd/train/train_net.py and b/example/ssd/train/train_net.py differ

http://git-wip-us.apache.org/repos/asf/incubator-mxnet-test/blob/cc62aded/src/io/image_det_aug_default.cc
----------------------------------------------------------------------
diff --git a/src/io/image_det_aug_default.cc b/src/io/image_det_aug_default.cc
index b5d4209..cb7966d 100644
--- a/src/io/image_det_aug_default.cc
+++ b/src/io/image_det_aug_default.cc
@@ -255,9 +255,9 @@ class ImageDetLabel {
       obj.right = *(it++);
       obj.bottom = *(it++);
       obj.extra.assign(it, it - 5 + object_width_);
-      objects_.push_back(obj);
-      CHECK_GT(obj.right, obj.left);
-      CHECK_GT(obj.bottom, obj.top);
+      if (obj.right > obj.left && obj.bottom > obj.top) {
+        objects_.push_back(obj);
+      }
     }
   }