You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/08/09 16:02:55 UTC
[2/2] incubator-singa git commit: SINGA-231 Batchnormlized VGG model
for cifar-10
SINGA-231 Batchnormlized VGG model for cifar-10
Merge the training of vgg and alexnet into train.py
The validation accuracy of vgg could reach 0.89
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/28678ae8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/28678ae8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/28678ae8
Branch: refs/heads/dev
Commit: 28678ae8329112ca1f11086b52ded7149ec9ab2c
Parents: bc3b74b
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Tue Aug 9 20:06:29 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Aug 10 00:01:03 2016 +0800
----------------------------------------------------------------------
examples/cifar10/alexnet.py | 16 ++-
examples/cifar10/predict.py | 14 ++-
examples/cifar10/run-parallel.sh | 1 +
examples/cifar10/train.py | 63 +++++++----
examples/cifar10/train_vgg_cifar10.py | 162 -----------------------------
examples/cifar10/vgg-parallel.cc | 24 ++---
examples/cifar10/vgg.py | 66 ++++++++++--
7 files changed, 138 insertions(+), 208 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index 4b3daec..96c339a 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -14,15 +14,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
+''' This model is created following the structure from
+https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-18pct.cfg
+Following the same setting for hyper-parameters and data pre-processing, the final
+validation accuracy would be about 82%.
+'''
+
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
from singa import layer
+from singa import initializer
from singa import metric
from singa import loss
from singa import net as ffnet
-from singa.proto import core_pb2
def create_net():
@@ -44,4 +50,12 @@ def create_net():
net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
net.add(layer.Flatten('flat'))
net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
+ for (p, specs) in zip(net.param_values(), net.param_specs()):
+ filler = specs.filler
+ if filler.type == 'gaussian':
+ initializer.gaussian(p, filler.mean, filler.std)
+ else:
+ p.set_value(0)
+ print specs.name, filler.type, p.l1()
+
return net
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/predict.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/predict.py b/examples/cifar10/predict.py
index d083d0b..07b1145 100644
--- a/examples/cifar10/predict.py
+++ b/examples/cifar10/predict.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
-
+import cPickle as pickle
import numpy as np
import sys
import os
@@ -27,6 +27,15 @@ import net as ffnet
def predict(net, images, cuda, topk=5):
+ '''Predict the label of each image.
+
+ Args:
+ net, a pretrained neural net
+ images, a batch of images [batch_size, 3, 32, 32], which have been
+ pre-processed
+ cuda, the cuda device
+ topk, return the topk labels for each image.
+ '''
x = tensor.from_numpy(images.astype(np.float32))
x.to_device(cuda)
y = net.predict(x)
@@ -40,7 +49,7 @@ def predict(net, images, cuda, topk=5):
def load_dataset(filepath):
print 'Loading data file %s' % filepath
with open(filepath, 'rb') as fd:
- cifar10 = cPickle.load(fd)
+ cifar10 = pickle.load(fd)
image = cifar10['data'].astype(dtype=np.uint8)
image = image.reshape((-1, 3, 32, 32))
label = np.asarray(cifar10['labels'], dtype=np.uint8)
@@ -79,4 +88,5 @@ if __name__ == '__main__':
mean = compute_image_mean('cifar-10-batches-py')
test_images, _ = load_test_data('cifar-10-batches-py')
+ # minus mean is for alexnet; vgg uses a different pre-processing strategy
print predict(model, test_images - mean, cuda)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/run-parallel.sh
----------------------------------------------------------------------
diff --git a/examples/cifar10/run-parallel.sh b/examples/cifar10/run-parallel.sh
index 6a9109a..18193db 100755
--- a/examples/cifar10/run-parallel.sh
+++ b/examples/cifar10/run-parallel.sh
@@ -1,2 +1,3 @@
#!/usr/bin/env sh
../../build/bin/alexnet-parallel -epoch 4
+#../../build/bin/vgg-parallel -epoch 4
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index f4caca4..cb4110d 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -23,9 +23,9 @@ import cPickle
import numpy as np
import os
import sys
+import argparse
sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
-from singa import initializer
from singa import utils
from singa import optimizer
from singa import device
@@ -33,6 +33,7 @@ from singa import tensor
from singa.proto import core_pb2
import alexnet
+import vgg
def load_dataset(filepath):
@@ -65,7 +66,28 @@ def load_test_data(dir_path):
return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
-def get_lr(epoch):
+def normalize_for_vgg(train_x, test_x):
+ mean = train_x.mean()
+ std = train_x.std()
+ train_x -= mean
+ test_x -= mean
+ train_x /= std
+ test_x /= std
+ return train_x, test_x
+
+
+def normalize_for_alexnet(train_x, test_x):
+ mean = np.average(train_x, axis=0)
+ train_x -= mean
+ test_x -= mean
+ return train_x, test_x
+
+
+def vgg_lr(epoch):
+ return 0.01 / float(1 << ((epoch / 30)))
+
+
+def alexnet_lr(epoch):
if epoch < 120:
return 0.001
elif epoch < 130:
@@ -74,32 +96,21 @@ def get_lr(epoch):
return 0.00001
-def train(data_dir, net, num_epoch=140, batch_size=100):
+def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100):
print 'Start intialization............'
cuda = device.create_cuda_gpu()
net.to_device(cuda)
opt = optimizer.SGD(momentum=0.9, weight_decay=0.004)
for (p, specs) in zip(net.param_values(), net.param_specs()):
- filler = specs.filler
- if filler.type == 'gaussian':
- initializer.gaussian(p, filler.mean, filler.std)
- else:
- p.set_value(0)
opt.register(p, specs)
- print specs.name, filler.type, p.l1()
- print 'Loading data ..................'
- train_x, train_y = load_train_data(data_dir)
- test_x, test_y = load_test_data(data_dir)
- mean = np.average(train_x, axis=0)
- train_x -= mean
- test_x -= mean
tx = tensor.Tensor((batch_size, 3, 32, 32), cuda)
ty = tensor.Tensor((batch_size,), cuda, core_pb2.kInt)
+ train_x, train_y, test_x, test_y = data
num_train_batch = train_x.shape[0] / batch_size
num_test_batch = test_x.shape[0] / batch_size
idx = np.arange(train_x.shape[0], dtype=np.int32)
- for epoch in range(num_epoch):
+ for epoch in range(max_epoch):
np.random.shuffle(idx)
loss, acc = 0.0, 0.0
print 'Epoch %d' % epoch
@@ -135,8 +146,20 @@ def train(data_dir, net, num_epoch=140, batch_size=100):
net.save('model.bin') # save model params into checkpoint file
if __name__ == '__main__':
- data_dir = 'cifar-10-batches-py'
- assert os.path.exists(data_dir), \
+ parser = argparse.ArgumentParser(description='Train vgg/alexnet for cifar10')
+ parser.add_argument('model', choices=['vgg', 'alexnet'], default='alexnet')
+ parser.add_argument('data', default='cifar-10-batches-py')
+ args = parser.parse_args()
+ assert os.path.exists(args.data), \
'Pls download the cifar10 dataset via "download_data.py py"'
- net = alexnet.create_net()
- train(data_dir, net)
+ print 'Loading data ..................'
+ train_x, train_y = load_train_data(args.data)
+ test_x, test_y = load_test_data(args.data)
+ if args.model == 'alexnet':
+ train_x, test_x = normalize_for_alexnet(train_x, test_x)
+ net = alexnet.create_net()
+ train((train_x, train_y, test_x, test_y), net, 140, alexnet_lr, 0.004)
+ else:
+ train_x, test_x = normalize_for_vgg(train_x, test_x)
+ net = vgg.create_net()
+ train((train_x, train_y, test_x, test_y), net, 250, vgg_lr, 0.0005)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/train_vgg_cifar10.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train_vgg_cifar10.py b/examples/cifar10/train_vgg_cifar10.py
deleted file mode 100644
index e9df04e..0000000
--- a/examples/cifar10/train_vgg_cifar10.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-""" CIFAR10 dataset is at https://www.cs.toronto.edu/~kriz/cifar.html.
-It includes 5 binary dataset, each contains 10000 images. 1 row (1 image)
-includes 1 label & 3072 pixels. 3072 pixels are 3 channels of a 32x32 image
-"""
-
-import cPickle
-import numpy as np
-import os
-import sys
-import math
-
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
-from singa import initializer
-from singa import utils
-from singa import optimizer
-from singa import device
-from singa import tensor
-from singa.proto import core_pb2
-
-import vgg
-
-
-def load_dataset(filepath):
- print 'Loading data file %s' % filepath
- with open(filepath, 'rb') as fd:
- cifar10 = cPickle.load(fd)
- image = cifar10['data'].astype(dtype=np.uint8)
- image = image.reshape((-1, 3, 32, 32))
- label = np.asarray(cifar10['labels'], dtype=np.uint8)
- label = label.reshape(label.size, 1)
- return image, label
-
-
-def load_train_data(dir_path, num_batches=5):
- labels = []
- batchsize = 10000
- images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
- for did in range(1, num_batches + 1):
- fname_train_data = dir_path + "/data_batch_{}".format(did)
- image, label = load_dataset(fname_train_data)
- images[(did - 1) * batchsize:did * batchsize] = image
- labels.extend(label)
- images = np.array(images, dtype=np.float32)
- labels = np.array(labels, dtype=np.int32)
- return images, labels
-
-
-def load_test_data(dir_path):
- images, labels = load_dataset(dir_path + "/test_batch")
- return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
-
-
-def get_lr(epoch):
- return 0.01 / float(1 << ((epoch / 30)))
- #if epoch < 100:
- # return 0.01
- #elif epoch < 150:
- # return 0.005
- #elif epoch < 200:
- # return 0.001
- #elif epoch < 250:
- # return 0.0001
-
-
-def train(data_dir, net, num_epoch=250, batch_size=128):
- print 'Creating Device............'
- cuda = device.create_cuda_gpus(2)[1]
- net.to_device(cuda)
- print 'Start intialization............'
- opt = optimizer.SGD(momentum=0.9, weight_decay=0.0005)
- for (p, name) in zip(net.param_values(), net.param_names()):
- print name, p.shape
- if len(p.shape) > 1:
- if 'mean' in name or 'beta' in name:
- p.set_value(0.0)
- elif 'var' in name:
- p.set_value(1.0)
- elif 'gamma' in name:
- initializer.uniform(p, 0, 1)
- elif 'conv' in name:
- initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
- else:
- initializer.gaussian(p, 0, 0.02)
-
- #stdv = 1.0/math.sqrt(p.shape[1])
- #initializer.uniform(p, -stdv, stdv)
- else:
- p.set_value(0)
- #print specs.name, filler.type, p.l1()
- print name, p.l1()
- print 'Loading data ..................'
- train_x, train_y = load_train_data(data_dir)
- test_x, test_y = load_test_data(data_dir)
- mean = train_x.mean()
- std = train_x.std()
- train_x -= mean
- test_x -= mean
- train_x /= std
- test_x /= std
-
- tx = tensor.Tensor((batch_size, 3, 32, 32), cuda)
- ty = tensor.Tensor((batch_size,), cuda, core_pb2.kInt)
- num_train_batch = train_x.shape[0] / batch_size
- num_test_batch = test_x.shape[0] / batch_size
- idx = np.arange(train_x.shape[0], dtype=np.int32)
- for epoch in range(num_epoch):
- np.random.shuffle(idx)
- loss, acc = 0.0, 0.0
- print 'Epoch %d' % epoch
- for b in range(num_train_batch):
- x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
- y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
- tx.copy_from_numpy(x)
- ty.copy_from_numpy(y)
- grads, (l, a) = net.train(tx, ty)
- loss += l
- acc += a
- for (s, p, g) in zip(net.param_specs(), net.param_values(), grads):
- opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name))
- # update progress bar
- utils.update_progress(b * 1.0 / num_train_batch,
- 'training loss = %f, accuracy = %f' % (l, a))
- info = '\ntraining loss = %f, training accuracy = %f' \
- % (loss / num_train_batch, acc / num_train_batch)
- print info
-
- loss, acc = 0.0, 0.0
- for b in range(num_test_batch):
- x = test_x[b * batch_size: (b + 1) * batch_size]
- y = test_y[b * batch_size: (b + 1) * batch_size]
- tx.copy_from_numpy(x)
- ty.copy_from_numpy(y)
- l, a = net.evaluate(tx, ty)
- loss += l
- acc += a
-
- print 'test loss = %f, test accuracy = %f' \
- % (loss / num_test_batch, acc / num_test_batch)
- net.save('model.bin') # save model params into checkpoint file
-
-if __name__ == '__main__':
- data_dir = 'cifar-10-batches-py'
- assert os.path.exists(data_dir), \
- 'Pls download the cifar10 dataset via "download_data.py py"'
- net = vgg.create_net()
- train(data_dir, net)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/vgg-parallel.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg-parallel.cc b/examples/cifar10/vgg-parallel.cc
index ba308e9..c6b7fa1 100644
--- a/examples/cifar10/vgg-parallel.cc
+++ b/examples/cifar10/vgg-parallel.cc
@@ -32,7 +32,7 @@
#include "../../src/model/layer/cudnn_activation.h"
#include "../../src/model/layer/cudnn_pooling.h"
#include "../../src/model/layer/cudnn_lrn.h"
-#include "../../src/model/layer/cudnn_dropout.h"
+#include "../../src/model/layer/dropout.h"
#include "../../src/model/layer/cudnn_batchnorm.h"
#include "../../src/model/layer/dense.h"
#include "../../src/model/layer/flatten.h"
@@ -155,7 +155,7 @@ LayerConf GenBatchNormConf(string name) {
LayerConf GenDropoutConf(string name, float dropout_ratio) {
LayerConf conf;
conf.set_name(name);
- conf.set_type("CudnnDropout");
+ conf.set_type("Dropout");
DropoutConf *dropout = conf.mutable_dropout_conf();
dropout->set_dropout_ratio(dropout_ratio);
@@ -172,37 +172,37 @@ FeedForwardNet CreateNet() {
FeedForwardNet net;
Shape s{3, 32, 32};
ConvBNReLU(net, "conv1_1", 64, &s);
- net.Add(new CudnnDropout(), GenDropoutConf("drop1", 0.3));
+ net.Add(new Dropout(), GenDropoutConf("drop1", 0.3));
ConvBNReLU(net, "conv1_2", 64);
net.Add(new CudnnPooling(), GenPoolingConf("pool1", true, 2, 2, 0));
ConvBNReLU(net, "conv2_1", 128);
- net.Add(new CudnnDropout(), GenDropoutConf("drop2", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop2", 0.4));
ConvBNReLU(net, "conv2_2", 128);
net.Add(new CudnnPooling(), GenPoolingConf("pool2", true, 2, 2, 0));
ConvBNReLU(net, "conv3_1", 256);
- net.Add(new CudnnDropout(), GenDropoutConf("drop3_1", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop3_1", 0.4));
ConvBNReLU(net, "conv3_2", 256);
- net.Add(new CudnnDropout(), GenDropoutConf("drop3_2", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop3_2", 0.4));
ConvBNReLU(net, "conv3_3", 256);
net.Add(new CudnnPooling(), GenPoolingConf("pool3", true, 2, 2, 0));
ConvBNReLU(net, "conv4_1", 512);
- net.Add(new CudnnDropout(), GenDropoutConf("drop4_1", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop4_1", 0.4));
ConvBNReLU(net, "conv4_2", 512);
- net.Add(new CudnnDropout(), GenDropoutConf("drop4_2", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop4_2", 0.4));
ConvBNReLU(net, "conv4_3", 512);
net.Add(new CudnnPooling(), GenPoolingConf("pool4", true, 2, 2, 0));
ConvBNReLU(net, "conv5_1", 512);
- net.Add(new CudnnDropout(), GenDropoutConf("drop5_1", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop5_1", 0.4));
ConvBNReLU(net, "conv5_2", 512);
- net.Add(new CudnnDropout(), GenDropoutConf("drop5_2", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop5_2", 0.4));
ConvBNReLU(net, "conv5_3", 512);
net.Add(new CudnnPooling(), GenPoolingConf("pool5", true, 2, 2, 0));
net.Add(new Flatten(), GenFlattenConf("flat"));
- net.Add(new CudnnDropout(), GenDropoutConf("flat_drop", 0.5));
+ net.Add(new Dropout(), GenDropoutConf("flat_drop", 0.5));
net.Add(new Dense(), GenDenseConf("ip1", 512, 0.02));
net.Add(new CudnnBatchNorm(), GenBatchNormConf("ip1_bn"));
net.Add(new CudnnActivation(), GenReLUConf("ip1_relu"));
- net.Add(new CudnnDropout(), GenDropoutConf("ip1_drop", 0.5));
+ net.Add(new Dropout(), GenDropoutConf("ip1_drop", 0.5));
net.Add(new Dense(), GenDenseConf("ip2", 10, 0.02));
return net;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index 8063307..0b9bb56 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -1,12 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+""" The VGG model is adapted from http://torch.ch/blog/2015/07/30/cifar.html.
+The best validation accuracy we achieved is about 89% without data augmentation.
+The performance could be improved by tuning some hyper-parameters, including
+learning rate, weight decay, max_epoch, parameter initialization, etc.
+"""
+
import sys
import os
+import math
sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+
from singa import layer
+from singa import initializer
from singa import metric
from singa import loss
from singa import net as ffnet
-from singa.proto import core_pb2
+
def ConvBnReLU(net, name, nb_filers, sample_shape=None):
net.add(layer.Conv2D(name + '_1', nb_filers, 3, 1, pad=1,
@@ -14,39 +39,58 @@ def ConvBnReLU(net, name, nb_filers, sample_shape=None):
net.add(layer.BatchNormalization(name + '_2'))
net.add(layer.Activation(name + '_3'))
+
def create_net():
net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
ConvBnReLU(net, 'conv1_1', 64, (3, 32, 32))
- net.add(layer.Dropout('drop1', 0.3, engine='cudnn'))
+ net.add(layer.Dropout('drop1', 0.3, engine='cuda'))
ConvBnReLU(net, 'conv1_2', 64)
net.add(layer.MaxPooling2D('pool1', 2, 2, border_mode='valid'))
ConvBnReLU(net, 'conv2_1', 128)
- net.add(layer.Dropout('drop2_1', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop2_1', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv2_2', 128)
net.add(layer.MaxPooling2D('pool2', 2, 2, border_mode='valid'))
ConvBnReLU(net, 'conv3_1', 256)
- net.add(layer.Dropout('drop3_1', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop3_1', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv3_2', 256)
- net.add(layer.Dropout('drop3_2', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop3_2', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv3_3', 256)
net.add(layer.MaxPooling2D('pool3', 2, 2, border_mode='valid'))
ConvBnReLU(net, 'conv4_1', 512)
- net.add(layer.Dropout('drop4_1', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop4_1', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv4_2', 512)
- net.add(layer.Dropout('drop4_2', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop4_2', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv4_3', 512)
net.add(layer.MaxPooling2D('pool4', 2, 2, border_mode='valid'))
ConvBnReLU(net, 'conv5_1', 512)
- net.add(layer.Dropout('drop5_1', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop5_1', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv5_2', 512)
- net.add(layer.Dropout('drop5_2', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop5_2', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv5_3', 512)
net.add(layer.MaxPooling2D('pool5', 2, 2, border_mode='valid'))
net.add(layer.Flatten('flat'))
- net.add(layer.Dropout('drop_flat', 0.5, engine='cudnn'))
+ net.add(layer.Dropout('drop_flat', 0.5, engine='cuda'))
net.add(layer.Dense('ip1', 512))
net.add(layer.BatchNormalization('batchnorm_ip1'))
net.add(layer.Activation('relu_ip1'))
- net.add(layer.Dropout('drop_ip2', 0.5, engine='cudnn'))
+ net.add(layer.Dropout('drop_ip2', 0.5, engine='cuda'))
net.add(layer.Dense('ip2', 10))
+ print 'Start intialization............'
+ for (p, name) in zip(net.param_values(), net.param_names()):
+ print name, p.shape
+ if len(p.shape) > 1:
+ if 'mean' in name or 'beta' in name:
+ p.set_value(0.0)
+ elif 'var' in name:
+ p.set_value(1.0)
+ elif 'gamma' in name:
+ initializer.uniform(p, 0, 1)
+ elif 'conv' in name:
+ initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
+ else:
+ initializer.gaussian(p, 0, 0.02)
+ else:
+ p.set_value(0)
+ print name, p.l1()
+
return net