You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/08/09 16:02:55 UTC
[2/2] incubator-singa git commit: SINGA-231 Batchnormlized VGG model for cifar-10

SINGA-231 Batchnormlized VGG model for cifar-10

Merge the training of vgg and alexnet into train.py
The validation accuracy of vgg could reach 0.89


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/28678ae8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/28678ae8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/28678ae8

Branch: refs/heads/dev
Commit: 28678ae8329112ca1f11086b52ded7149ec9ab2c
Parents: bc3b74b
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Tue Aug 9 20:06:29 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Aug 10 00:01:03 2016 +0800

----------------------------------------------------------------------
 examples/cifar10/alexnet.py           |  16 ++-
 examples/cifar10/predict.py           |  14 ++-
 examples/cifar10/run-parallel.sh      |   1 +
 examples/cifar10/train.py             |  63 +++++++----
 examples/cifar10/train_vgg_cifar10.py | 162 -----------------------------
 examples/cifar10/vgg-parallel.cc      |  24 ++---
 examples/cifar10/vgg.py               |  66 ++++++++++--
 7 files changed, 138 insertions(+), 208 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index 4b3daec..96c339a 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -14,15 +14,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
+''' This model is created following the structure from
+https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-18pct.cfg
+Following the same setting for hyper-parameters and data pre-processing, the final
+validation accuracy would be about 82%.
+'''
+
 import sys
 import os
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
 from singa import layer
+from singa import initializer
 from singa import metric
 from singa import loss
 from singa import net as ffnet
-from singa.proto import core_pb2
 
 
 def create_net():
@@ -44,4 +50,12 @@ def create_net():
     net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
     net.add(layer.Flatten('flat'))
     net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
+    for (p, specs) in zip(net.param_values(), net.param_specs()):
+        filler = specs.filler
+        if filler.type == 'gaussian':
+            initializer.gaussian(p, filler.mean, filler.std)
+        else:
+            p.set_value(0)
+        print specs.name, filler.type, p.l1()
+
     return net

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/predict.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/predict.py b/examples/cifar10/predict.py
index d083d0b..07b1145 100644
--- a/examples/cifar10/predict.py
+++ b/examples/cifar10/predict.py
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # =============================================================================
-
+import cPickle as pickle
 import numpy as np
 import sys
 import os
@@ -27,6 +27,15 @@ import net as ffnet
 
 
 def predict(net, images, cuda, topk=5):
+    '''Predict the label of each image.
+
+    Args:
+        net, a pretrained neural net
+        images, a batch of images [batch_size, 3, 32, 32], which have been
+            pre-processed
+        cuda, the cuda device
+        topk, return the topk labels for each image.
+    '''
     x = tensor.from_numpy(images.astype(np.float32))
     x.to_device(cuda)
     y = net.predict(x)
@@ -40,7 +49,7 @@ def predict(net, images, cuda, topk=5):
 def load_dataset(filepath):
     print 'Loading data file %s' % filepath
     with open(filepath, 'rb') as fd:
-        cifar10 = cPickle.load(fd)
+        cifar10 = pickle.load(fd)
     image = cifar10['data'].astype(dtype=np.uint8)
     image = image.reshape((-1, 3, 32, 32))
     label = np.asarray(cifar10['labels'], dtype=np.uint8)
@@ -79,4 +88,5 @@ if __name__ == '__main__':
 
     mean = compute_image_mean('cifar-10-batches-py')
     test_images, _ = load_test_data('cifar-10-batches-py')
+    # minus mean is for alexnet; vgg uses a different pre-processing strategy
     print predict(model, test_images - mean, cuda)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/run-parallel.sh
----------------------------------------------------------------------
diff --git a/examples/cifar10/run-parallel.sh b/examples/cifar10/run-parallel.sh
index 6a9109a..18193db 100755
--- a/examples/cifar10/run-parallel.sh
+++ b/examples/cifar10/run-parallel.sh
@@ -1,2 +1,3 @@
 #!/usr/bin/env sh
 ../../build/bin/alexnet-parallel -epoch 4
+#../../build/bin/vgg-parallel -epoch 4

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index f4caca4..cb4110d 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -23,9 +23,9 @@ import cPickle
 import numpy as np
 import os
 import sys
+import argparse
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
-from singa import initializer
 from singa import utils
 from singa import optimizer
 from singa import device
@@ -33,6 +33,7 @@ from singa import tensor
 from singa.proto import core_pb2
 
 import alexnet
+import vgg
 
 
 def load_dataset(filepath):
@@ -65,7 +66,28 @@ def load_test_data(dir_path):
     return np.array(images,  dtype=np.float32), np.array(labels, dtype=np.int32)
 
 
-def get_lr(epoch):
+def normalize_for_vgg(train_x, test_x):
+    mean = train_x.mean()
+    std = train_x.std()
+    train_x -= mean
+    test_x -= mean
+    train_x /= std
+    test_x /= std
+    return train_x, test_x
+
+
+def normalize_for_alexnet(train_x, test_x):
+    mean = np.average(train_x, axis=0)
+    train_x -= mean
+    test_x -= mean
+    return train_x, test_x
+
+
+def vgg_lr(epoch):
+    return 0.01 / float(1 << ((epoch / 30)))
+
+
+def alexnet_lr(epoch):
     if epoch < 120:
         return 0.001
     elif epoch < 130:
@@ -74,32 +96,21 @@ def get_lr(epoch):
         return 0.00001
 
 
-def train(data_dir, net, num_epoch=140, batch_size=100):
+def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100):
     print 'Start intialization............'
     cuda = device.create_cuda_gpu()
     net.to_device(cuda)
     opt = optimizer.SGD(momentum=0.9, weight_decay=0.004)
     for (p, specs) in zip(net.param_values(), net.param_specs()):
-        filler = specs.filler
-        if filler.type == 'gaussian':
-            initializer.gaussian(p, filler.mean, filler.std)
-        else:
-            p.set_value(0)
         opt.register(p, specs)
-        print specs.name, filler.type, p.l1()
-    print 'Loading data ..................'
-    train_x, train_y = load_train_data(data_dir)
-    test_x, test_y = load_test_data(data_dir)
-    mean = np.average(train_x, axis=0)
-    train_x -= mean
-    test_x -= mean
 
     tx = tensor.Tensor((batch_size, 3, 32, 32), cuda)
     ty = tensor.Tensor((batch_size,), cuda, core_pb2.kInt)
+    train_x, train_y, test_x, test_y = data
     num_train_batch = train_x.shape[0] / batch_size
     num_test_batch = test_x.shape[0] / batch_size
     idx = np.arange(train_x.shape[0], dtype=np.int32)
-    for epoch in range(num_epoch):
+    for epoch in range(max_epoch):
         np.random.shuffle(idx)
         loss, acc = 0.0, 0.0
         print 'Epoch %d' % epoch
@@ -135,8 +146,20 @@ def train(data_dir, net, num_epoch=140, batch_size=100):
     net.save('model.bin')  # save model params into checkpoint file
 
 if __name__ == '__main__':
-    data_dir = 'cifar-10-batches-py'
-    assert os.path.exists(data_dir), \
+    parser = argparse.ArgumentParser(description='Train vgg/alexnet for cifar10')
+    parser.add_argument('model', choices=['vgg', 'alexnet'], default='alexnet')
+    parser.add_argument('data', default='cifar-10-batches-py')
+    args = parser.parse_args()
+    assert os.path.exists(args.data), \
         'Pls download the cifar10 dataset via "download_data.py py"'
-    net = alexnet.create_net()
-    train(data_dir, net)
+    print 'Loading data ..................'
+    train_x, train_y = load_train_data(args.data)
+    test_x, test_y = load_test_data(args.data)
+    if args.model == 'alexnet':
+        train_x, test_x = normalize_for_alexnet(train_x, test_x)
+        net = alexnet.create_net()
+        train((train_x, train_y, test_x, test_y), net, 140, alexnet_lr, 0.004)
+    else:
+        train_x, test_x = normalize_for_vgg(train_x, test_x)
+        net = vgg.create_net()
+        train((train_x, train_y, test_x, test_y), net, 250, vgg_lr, 0.0005)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/train_vgg_cifar10.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train_vgg_cifar10.py b/examples/cifar10/train_vgg_cifar10.py
deleted file mode 100644
index e9df04e..0000000
--- a/examples/cifar10/train_vgg_cifar10.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-""" CIFAR10 dataset is at https://www.cs.toronto.edu/~kriz/cifar.html.
-It includes 5 binary dataset, each contains 10000 images. 1 row (1 image)
-includes 1 label & 3072 pixels.  3072 pixels are 3 channels of a 32x32 image
-"""
-
-import cPickle
-import numpy as np
-import os
-import sys
-import math
-
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
-from singa import initializer
-from singa import utils
-from singa import optimizer
-from singa import device
-from singa import tensor
-from singa.proto import core_pb2
-
-import vgg
-
-
-def load_dataset(filepath):
-    print 'Loading data file %s' % filepath
-    with open(filepath, 'rb') as fd:
-        cifar10 = cPickle.load(fd)
-    image = cifar10['data'].astype(dtype=np.uint8)
-    image = image.reshape((-1, 3, 32, 32))
-    label = np.asarray(cifar10['labels'], dtype=np.uint8)
-    label = label.reshape(label.size, 1)
-    return image, label
-
-
-def load_train_data(dir_path, num_batches=5):
-    labels = []
-    batchsize = 10000
-    images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
-    for did in range(1, num_batches + 1):
-        fname_train_data = dir_path + "/data_batch_{}".format(did)
-        image, label = load_dataset(fname_train_data)
-        images[(did - 1) * batchsize:did * batchsize] = image
-        labels.extend(label)
-    images = np.array(images, dtype=np.float32)
-    labels = np.array(labels, dtype=np.int32)
-    return images, labels
-
-
-def load_test_data(dir_path):
-    images, labels = load_dataset(dir_path + "/test_batch")
-    return np.array(images,  dtype=np.float32), np.array(labels, dtype=np.int32)
-
-
-def get_lr(epoch):
-    return 0.01 / float(1 << ((epoch / 30)))
-    #if epoch < 100:
-    #    return 0.01
-    #elif epoch < 150:
-    #    return 0.005
-    #elif epoch < 200:
-    #    return 0.001
-    #elif epoch < 250:
-    #    return 0.0001
-
-
-def train(data_dir, net, num_epoch=250, batch_size=128):
-    print 'Creating Device............'
-    cuda = device.create_cuda_gpus(2)[1]
-    net.to_device(cuda)
-    print 'Start intialization............'
-    opt = optimizer.SGD(momentum=0.9, weight_decay=0.0005)
-    for (p, name) in zip(net.param_values(), net.param_names()):
-        print name, p.shape
-        if len(p.shape) > 1:
-            if 'mean' in name  or 'beta' in name:
-                p.set_value(0.0)
-            elif 'var' in name:
-                p.set_value(1.0)
-            elif 'gamma' in name:
-                initializer.uniform(p, 0, 1)
-            elif 'conv' in name:
-                initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
-            else:
-                initializer.gaussian(p, 0, 0.02)
-
-                #stdv = 1.0/math.sqrt(p.shape[1])
-                #initializer.uniform(p, -stdv, stdv)
-        else:
-            p.set_value(0)
-        #print specs.name, filler.type, p.l1()
-        print name, p.l1()
-    print 'Loading data ..................'
-    train_x, train_y = load_train_data(data_dir)
-    test_x, test_y = load_test_data(data_dir)
-    mean = train_x.mean()
-    std = train_x.std()
-    train_x -= mean
-    test_x -= mean
-    train_x /= std
-    test_x /= std
-
-    tx = tensor.Tensor((batch_size, 3, 32, 32), cuda)
-    ty = tensor.Tensor((batch_size,), cuda, core_pb2.kInt)
-    num_train_batch = train_x.shape[0] / batch_size
-    num_test_batch = test_x.shape[0] / batch_size
-    idx = np.arange(train_x.shape[0], dtype=np.int32)
-    for epoch in range(num_epoch):
-        np.random.shuffle(idx)
-        loss, acc = 0.0, 0.0
-        print 'Epoch %d' % epoch
-        for b in range(num_train_batch):
-            x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
-            y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
-            tx.copy_from_numpy(x)
-            ty.copy_from_numpy(y)
-            grads, (l, a) = net.train(tx, ty)
-            loss += l
-            acc += a
-            for (s, p, g) in zip(net.param_specs(), net.param_values(), grads):
-                opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name))
-            # update progress bar
-            utils.update_progress(b * 1.0 / num_train_batch,
-                                  'training loss = %f, accuracy = %f' % (l, a))
-        info = '\ntraining loss = %f, training accuracy = %f' \
-            % (loss / num_train_batch, acc / num_train_batch)
-        print info
-
-        loss, acc = 0.0, 0.0
-        for b in range(num_test_batch):
-            x = test_x[b * batch_size: (b + 1) * batch_size]
-            y = test_y[b * batch_size: (b + 1) * batch_size]
-            tx.copy_from_numpy(x)
-            ty.copy_from_numpy(y)
-            l, a = net.evaluate(tx, ty)
-            loss += l
-            acc += a
-
-        print 'test loss = %f, test accuracy = %f' \
-            % (loss / num_test_batch, acc / num_test_batch)
-    net.save('model.bin')  # save model params into checkpoint file
-
-if __name__ == '__main__':
-    data_dir = 'cifar-10-batches-py'
-    assert os.path.exists(data_dir), \
-        'Pls download the cifar10 dataset via "download_data.py py"'
-    net = vgg.create_net()
-    train(data_dir, net)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/vgg-parallel.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg-parallel.cc b/examples/cifar10/vgg-parallel.cc
index ba308e9..c6b7fa1 100644
--- a/examples/cifar10/vgg-parallel.cc
+++ b/examples/cifar10/vgg-parallel.cc
@@ -32,7 +32,7 @@
 #include "../../src/model/layer/cudnn_activation.h"
 #include "../../src/model/layer/cudnn_pooling.h"
 #include "../../src/model/layer/cudnn_lrn.h"
-#include "../../src/model/layer/cudnn_dropout.h"
+#include "../../src/model/layer/dropout.h"
 #include "../../src/model/layer/cudnn_batchnorm.h"
 #include "../../src/model/layer/dense.h"
 #include "../../src/model/layer/flatten.h"
@@ -155,7 +155,7 @@ LayerConf GenBatchNormConf(string name) {
 LayerConf GenDropoutConf(string name, float dropout_ratio) {
   LayerConf conf;
   conf.set_name(name);
-  conf.set_type("CudnnDropout");
+  conf.set_type("Dropout");
   DropoutConf *dropout = conf.mutable_dropout_conf();
   dropout->set_dropout_ratio(dropout_ratio);
 
@@ -172,37 +172,37 @@ FeedForwardNet CreateNet() {
   FeedForwardNet net;
   Shape s{3, 32, 32};
   ConvBNReLU(net, "conv1_1", 64, &s);
-  net.Add(new CudnnDropout(), GenDropoutConf("drop1", 0.3));
+  net.Add(new Dropout(), GenDropoutConf("drop1", 0.3));
   ConvBNReLU(net, "conv1_2", 64);
   net.Add(new CudnnPooling(), GenPoolingConf("pool1", true, 2, 2, 0));
   ConvBNReLU(net, "conv2_1", 128);
-  net.Add(new CudnnDropout(), GenDropoutConf("drop2", 0.4));
+  net.Add(new Dropout(), GenDropoutConf("drop2", 0.4));
   ConvBNReLU(net, "conv2_2", 128);
   net.Add(new CudnnPooling(), GenPoolingConf("pool2", true, 2, 2, 0));
   ConvBNReLU(net, "conv3_1", 256);
-  net.Add(new CudnnDropout(), GenDropoutConf("drop3_1", 0.4));
+  net.Add(new Dropout(), GenDropoutConf("drop3_1", 0.4));
   ConvBNReLU(net, "conv3_2", 256);
-  net.Add(new CudnnDropout(), GenDropoutConf("drop3_2", 0.4));
+  net.Add(new Dropout(), GenDropoutConf("drop3_2", 0.4));
   ConvBNReLU(net, "conv3_3", 256);
   net.Add(new CudnnPooling(), GenPoolingConf("pool3", true, 2, 2, 0));
   ConvBNReLU(net, "conv4_1", 512);
-  net.Add(new CudnnDropout(), GenDropoutConf("drop4_1", 0.4));
+  net.Add(new Dropout(), GenDropoutConf("drop4_1", 0.4));
   ConvBNReLU(net, "conv4_2", 512);
-  net.Add(new CudnnDropout(), GenDropoutConf("drop4_2", 0.4));
+  net.Add(new Dropout(), GenDropoutConf("drop4_2", 0.4));
   ConvBNReLU(net, "conv4_3", 512);
   net.Add(new CudnnPooling(), GenPoolingConf("pool4", true, 2, 2, 0));
   ConvBNReLU(net, "conv5_1", 512);
-  net.Add(new CudnnDropout(), GenDropoutConf("drop5_1", 0.4));
+  net.Add(new Dropout(), GenDropoutConf("drop5_1", 0.4));
   ConvBNReLU(net, "conv5_2", 512);
-  net.Add(new CudnnDropout(), GenDropoutConf("drop5_2", 0.4));
+  net.Add(new Dropout(), GenDropoutConf("drop5_2", 0.4));
   ConvBNReLU(net, "conv5_3", 512);
   net.Add(new CudnnPooling(), GenPoolingConf("pool5", true, 2, 2, 0));
   net.Add(new Flatten(), GenFlattenConf("flat"));
-  net.Add(new CudnnDropout(), GenDropoutConf("flat_drop", 0.5));
+  net.Add(new Dropout(), GenDropoutConf("flat_drop", 0.5));
   net.Add(new Dense(), GenDenseConf("ip1", 512, 0.02));
   net.Add(new CudnnBatchNorm(), GenBatchNormConf("ip1_bn"));
   net.Add(new CudnnActivation(), GenReLUConf("ip1_relu"));
-  net.Add(new CudnnDropout(), GenDropoutConf("ip1_drop", 0.5));
+  net.Add(new Dropout(), GenDropoutConf("ip1_drop", 0.5));
   net.Add(new Dense(), GenDenseConf("ip2", 10, 0.02));
 
   return net;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index 8063307..0b9bb56 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -1,12 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+""" The VGG model is adapted from http://torch.ch/blog/2015/07/30/cifar.html.
+The best validation accuracy we achieved is about 89% without data augmentation.
+The performance could be improved by tuning some hyper-parameters, including
+learning rate, weight decay, max_epoch, parameter initialization, etc.
+"""
+
 import sys
 import os
+import math
 
 sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+
 from singa import layer
+from singa import initializer
 from singa import metric
 from singa import loss
 from singa import net as ffnet
-from singa.proto import core_pb2
+
 
 def ConvBnReLU(net, name, nb_filers, sample_shape=None):
     net.add(layer.Conv2D(name + '_1', nb_filers, 3, 1, pad=1,
@@ -14,39 +39,58 @@ def ConvBnReLU(net, name, nb_filers, sample_shape=None):
     net.add(layer.BatchNormalization(name + '_2'))
     net.add(layer.Activation(name + '_3'))
 
+
 def create_net():
     net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
     ConvBnReLU(net, 'conv1_1', 64, (3, 32, 32))
-    net.add(layer.Dropout('drop1', 0.3, engine='cudnn'))
+    net.add(layer.Dropout('drop1', 0.3, engine='cuda'))
     ConvBnReLU(net, 'conv1_2', 64)
     net.add(layer.MaxPooling2D('pool1', 2, 2, border_mode='valid'))
     ConvBnReLU(net, 'conv2_1', 128)
-    net.add(layer.Dropout('drop2_1', 0.4, engine='cudnn'))
+    net.add(layer.Dropout('drop2_1', 0.4, engine='cuda'))
     ConvBnReLU(net, 'conv2_2', 128)
     net.add(layer.MaxPooling2D('pool2', 2, 2, border_mode='valid'))
     ConvBnReLU(net, 'conv3_1', 256)
-    net.add(layer.Dropout('drop3_1', 0.4, engine='cudnn'))
+    net.add(layer.Dropout('drop3_1', 0.4, engine='cuda'))
     ConvBnReLU(net, 'conv3_2', 256)
-    net.add(layer.Dropout('drop3_2', 0.4, engine='cudnn'))
+    net.add(layer.Dropout('drop3_2', 0.4, engine='cuda'))
     ConvBnReLU(net, 'conv3_3', 256)
     net.add(layer.MaxPooling2D('pool3', 2, 2, border_mode='valid'))
     ConvBnReLU(net, 'conv4_1', 512)
-    net.add(layer.Dropout('drop4_1', 0.4, engine='cudnn'))
+    net.add(layer.Dropout('drop4_1', 0.4, engine='cuda'))
     ConvBnReLU(net, 'conv4_2', 512)
-    net.add(layer.Dropout('drop4_2', 0.4, engine='cudnn'))
+    net.add(layer.Dropout('drop4_2', 0.4, engine='cuda'))
     ConvBnReLU(net, 'conv4_3', 512)
     net.add(layer.MaxPooling2D('pool4', 2, 2, border_mode='valid'))
     ConvBnReLU(net, 'conv5_1', 512)
-    net.add(layer.Dropout('drop5_1', 0.4, engine='cudnn'))
+    net.add(layer.Dropout('drop5_1', 0.4, engine='cuda'))
     ConvBnReLU(net, 'conv5_2', 512)
-    net.add(layer.Dropout('drop5_2', 0.4, engine='cudnn'))
+    net.add(layer.Dropout('drop5_2', 0.4, engine='cuda'))
     ConvBnReLU(net, 'conv5_3', 512)
     net.add(layer.MaxPooling2D('pool5', 2, 2, border_mode='valid'))
     net.add(layer.Flatten('flat'))
-    net.add(layer.Dropout('drop_flat', 0.5, engine='cudnn'))
+    net.add(layer.Dropout('drop_flat', 0.5, engine='cuda'))
     net.add(layer.Dense('ip1', 512))
     net.add(layer.BatchNormalization('batchnorm_ip1'))
     net.add(layer.Activation('relu_ip1'))
-    net.add(layer.Dropout('drop_ip2', 0.5, engine='cudnn'))
+    net.add(layer.Dropout('drop_ip2', 0.5, engine='cuda'))
     net.add(layer.Dense('ip2', 10))
+    print 'Start intialization............'
+    for (p, name) in zip(net.param_values(), net.param_names()):
+        print name, p.shape
+        if len(p.shape) > 1:
+            if 'mean' in name or 'beta' in name:
+                p.set_value(0.0)
+            elif 'var' in name:
+                p.set_value(1.0)
+            elif 'gamma' in name:
+                initializer.uniform(p, 0, 1)
+            elif 'conv' in name:
+                initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
+            else:
+                initializer.gaussian(p, 0, 0.02)
+        else:
+            p.set_value(0)
+        print name, p.l1()
+
     return net