You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/08/09 16:02:54 UTC
[1/2] incubator-singa git commit: SINGA-231 Batchnormlized VGG model
for cifar-10
Repository: incubator-singa
Updated Branches:
refs/heads/dev db5478efa -> 28678ae83
SINGA-231 Batchnormlized VGG model for cifar-10
In this ticket, we implemented a batch normalized VGG model for cifar10
dataset (refer to http://torch.ch/blog/2015/07/30/cifar.html).
* +vgg-parallel.cc for parallel training
* +vgg.py using python language
* fix a bug in ResetLike() method in tensor.h, which before did not
reset shape.
* fix a bug in local_updater.cc, which may cause race condition when
multi-threads try to initialize mutexes concurrently.
* revise batch nomalization layer to support 2D tensor input
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/bc3b74b3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/bc3b74b3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/bc3b74b3
Branch: refs/heads/dev
Commit: bc3b74b3662230f867c42344f0600498368f4785
Parents: db5478e
Author: WANG Ji <ij...@gmail.com>
Authored: Sat Aug 6 17:36:28 2016 +0800
Committer: WANG Ji <ij...@gmail.com>
Committed: Mon Aug 8 11:44:01 2016 +0800
----------------------------------------------------------------------
examples/cifar10/CMakeLists.txt | 5 +
examples/cifar10/train_vgg_cifar10.py | 162 ++++++++++++++
examples/cifar10/vgg-parallel.cc | 333 +++++++++++++++++++++++++++++
examples/cifar10/vgg.py | 52 +++++
src/core/tensor/tensor.cc | 2 +-
src/model/layer/batchnorm.cc | 25 ++-
src/model/layer/batchnorm.h | 3 +-
src/model/layer/cudnn_batchnorm.cc | 31 ++-
src/model/updater/local_updater.cc | 1 +
src/python/singa/layer.py | 10 +-
src/python/singa/net.py | 6 +-
11 files changed, 613 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc3b74b3/examples/cifar10/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/examples/cifar10/CMakeLists.txt b/examples/cifar10/CMakeLists.txt
index 92f884c..76c0b73 100644
--- a/examples/cifar10/CMakeLists.txt
+++ b/examples/cifar10/CMakeLists.txt
@@ -10,4 +10,9 @@ ADD_EXECUTABLE(alexnet-parallel alexnet-parallel.cc)
ADD_DEPENDENCIES(alexnet-parallel singa_core singa_model singa_utils)
TARGET_LINK_LIBRARIES(alexnet-parallel singa_core singa_utils singa_model protobuf ${SINGA_LIBKER_LIBS})
SET_TARGET_PROPERTIES(alexnet-parallel PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")
+
+ADD_EXECUTABLE(vgg-parallel vgg-parallel.cc)
+ADD_DEPENDENCIES(vgg-parallel singa_core singa_model singa_utils)
+TARGET_LINK_LIBRARIES(vgg-parallel singa_core singa_utils singa_model protobuf ${SINGA_LIBKER_LIBS})
+SET_TARGET_PROPERTIES(vgg-parallel PROPERTIES LINK_FLAGS "${LINK_FLAGS} -pthread")
ENDIF(USE_CUDNN)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc3b74b3/examples/cifar10/train_vgg_cifar10.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train_vgg_cifar10.py b/examples/cifar10/train_vgg_cifar10.py
new file mode 100644
index 0000000..e9df04e
--- /dev/null
+++ b/examples/cifar10/train_vgg_cifar10.py
@@ -0,0 +1,162 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+""" CIFAR10 dataset is at https://www.cs.toronto.edu/~kriz/cifar.html.
+It includes 5 binary dataset, each contains 10000 images. 1 row (1 image)
+includes 1 label & 3072 pixels. 3072 pixels are 3 channels of a 32x32 image
+"""
+
+import cPickle
+import numpy as np
+import os
+import sys
+import math
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+from singa import initializer
+from singa import utils
+from singa import optimizer
+from singa import device
+from singa import tensor
+from singa.proto import core_pb2
+
+import vgg
+
+
+def load_dataset(filepath):
+ print 'Loading data file %s' % filepath
+ with open(filepath, 'rb') as fd:
+ cifar10 = cPickle.load(fd)
+ image = cifar10['data'].astype(dtype=np.uint8)
+ image = image.reshape((-1, 3, 32, 32))
+ label = np.asarray(cifar10['labels'], dtype=np.uint8)
+ label = label.reshape(label.size, 1)
+ return image, label
+
+
+def load_train_data(dir_path, num_batches=5):
+ labels = []
+ batchsize = 10000
+ images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
+ for did in range(1, num_batches + 1):
+ fname_train_data = dir_path + "/data_batch_{}".format(did)
+ image, label = load_dataset(fname_train_data)
+ images[(did - 1) * batchsize:did * batchsize] = image
+ labels.extend(label)
+ images = np.array(images, dtype=np.float32)
+ labels = np.array(labels, dtype=np.int32)
+ return images, labels
+
+
+def load_test_data(dir_path):
+ images, labels = load_dataset(dir_path + "/test_batch")
+ return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
+
+
+def get_lr(epoch):
+ return 0.01 / float(1 << ((epoch / 30)))
+ #if epoch < 100:
+ # return 0.01
+ #elif epoch < 150:
+ # return 0.005
+ #elif epoch < 200:
+ # return 0.001
+ #elif epoch < 250:
+ # return 0.0001
+
+
+def train(data_dir, net, num_epoch=250, batch_size=128):
+ print 'Creating Device............'
+ cuda = device.create_cuda_gpus(2)[1]
+ net.to_device(cuda)
+ print 'Start intialization............'
+ opt = optimizer.SGD(momentum=0.9, weight_decay=0.0005)
+ for (p, name) in zip(net.param_values(), net.param_names()):
+ print name, p.shape
+ if len(p.shape) > 1:
+ if 'mean' in name or 'beta' in name:
+ p.set_value(0.0)
+ elif 'var' in name:
+ p.set_value(1.0)
+ elif 'gamma' in name:
+ initializer.uniform(p, 0, 1)
+ elif 'conv' in name:
+ initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
+ else:
+ initializer.gaussian(p, 0, 0.02)
+
+ #stdv = 1.0/math.sqrt(p.shape[1])
+ #initializer.uniform(p, -stdv, stdv)
+ else:
+ p.set_value(0)
+ #print specs.name, filler.type, p.l1()
+ print name, p.l1()
+ print 'Loading data ..................'
+ train_x, train_y = load_train_data(data_dir)
+ test_x, test_y = load_test_data(data_dir)
+ mean = train_x.mean()
+ std = train_x.std()
+ train_x -= mean
+ test_x -= mean
+ train_x /= std
+ test_x /= std
+
+ tx = tensor.Tensor((batch_size, 3, 32, 32), cuda)
+ ty = tensor.Tensor((batch_size,), cuda, core_pb2.kInt)
+ num_train_batch = train_x.shape[0] / batch_size
+ num_test_batch = test_x.shape[0] / batch_size
+ idx = np.arange(train_x.shape[0], dtype=np.int32)
+ for epoch in range(num_epoch):
+ np.random.shuffle(idx)
+ loss, acc = 0.0, 0.0
+ print 'Epoch %d' % epoch
+ for b in range(num_train_batch):
+ x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
+ y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
+ tx.copy_from_numpy(x)
+ ty.copy_from_numpy(y)
+ grads, (l, a) = net.train(tx, ty)
+ loss += l
+ acc += a
+ for (s, p, g) in zip(net.param_specs(), net.param_values(), grads):
+ opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name))
+ # update progress bar
+ utils.update_progress(b * 1.0 / num_train_batch,
+ 'training loss = %f, accuracy = %f' % (l, a))
+ info = '\ntraining loss = %f, training accuracy = %f' \
+ % (loss / num_train_batch, acc / num_train_batch)
+ print info
+
+ loss, acc = 0.0, 0.0
+ for b in range(num_test_batch):
+ x = test_x[b * batch_size: (b + 1) * batch_size]
+ y = test_y[b * batch_size: (b + 1) * batch_size]
+ tx.copy_from_numpy(x)
+ ty.copy_from_numpy(y)
+ l, a = net.evaluate(tx, ty)
+ loss += l
+ acc += a
+
+ print 'test loss = %f, test accuracy = %f' \
+ % (loss / num_test_batch, acc / num_test_batch)
+ net.save('model.bin') # save model params into checkpoint file
+
+if __name__ == '__main__':
+ data_dir = 'cifar-10-batches-py'
+ assert os.path.exists(data_dir), \
+ 'Pls download the cifar10 dataset via "download_data.py py"'
+ net = vgg.create_net()
+ train(data_dir, net)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc3b74b3/examples/cifar10/vgg-parallel.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg-parallel.cc b/examples/cifar10/vgg-parallel.cc
new file mode 100644
index 0000000..ba308e9
--- /dev/null
+++ b/examples/cifar10/vgg-parallel.cc
@@ -0,0 +1,333 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "cifar10.h"
+#include "singa/model/feed_forward_net.h"
+#include "singa/model/optimizer.h"
+#include "singa/model/updater.h"
+#include "singa/model/initializer.h"
+#include "singa/model/metric.h"
+#include "singa/utils/channel.h"
+#include "singa/utils/string.h"
+#include "singa/core/memory.h"
+#include "../../src/model/layer/cudnn_convolution.h"
+#include "../../src/model/layer/cudnn_activation.h"
+#include "../../src/model/layer/cudnn_pooling.h"
+#include "../../src/model/layer/cudnn_lrn.h"
+#include "../../src/model/layer/cudnn_dropout.h"
+#include "../../src/model/layer/cudnn_batchnorm.h"
+#include "../../src/model/layer/dense.h"
+#include "../../src/model/layer/flatten.h"
+#include <thread>
+#include <memory>
+#include <cmath>
+
+namespace singa {
+
+const float default_wd = 0.0005f;
+
+LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride,
+ int pad, float std = .02f, float bias = .0f) {
+ LayerConf conf;
+ conf.set_name(name);
+ conf.set_type("CudnnConvolution");
+ ConvolutionConf *conv = conf.mutable_convolution_conf();
+ conv->set_num_output(nb_filter);
+ conv->add_kernel_size(kernel);
+ conv->add_stride(stride);
+ conv->add_pad(pad);
+ conv->set_bias_term(true);
+
+ ParamSpec *wspec = conf.add_param();
+ wspec->set_name(name + "_weight");
+ auto wfill = wspec->mutable_filler();
+ wfill->set_type("Gaussian");
+ wfill->set_std(sqrt(2.0f/(nb_filter*9.0f)));
+
+ ParamSpec *bspec = conf.add_param();
+ bspec->set_name(name + "_bias");
+ auto bfill = bspec->mutable_filler();
+ bfill->set_value(bias);
+ // bspec->set_lr_mult(2);
+ // bspec->set_decay_mult(0);
+ return conf;
+}
+
+LayerConf GenPoolingConf(string name, bool max_pool, int kernel, int stride,
+ int pad) {
+ LayerConf conf;
+ conf.set_name(name);
+ conf.set_type("CudnnPooling");
+ PoolingConf *pool = conf.mutable_pooling_conf();
+ pool->set_kernel_size(kernel);
+ pool->set_stride(stride);
+ pool->set_pad(pad);
+ if (!max_pool) pool->set_pool(PoolingConf_PoolMethod_AVE);
+ return conf;
+}
+
+LayerConf GenReLUConf(string name) {
+ LayerConf conf;
+ conf.set_name(name);
+ conf.set_type("RELU");
+ return conf;
+}
+
+LayerConf GenDenseConf(string name, int num_output, float std, float wd = default_wd) {
+ LayerConf conf;
+ conf.set_name(name);
+ conf.set_type("Dense");
+ DenseConf *dense = conf.mutable_dense_conf();
+ dense->set_num_output(num_output);
+
+ ParamSpec *wspec = conf.add_param();
+ wspec->set_name(name + "_weight");
+ wspec->set_decay_mult(wd);
+ auto wfill = wspec->mutable_filler();
+ wfill->set_type("Gaussian");
+ wfill->set_std(std);
+
+ ParamSpec *bspec = conf.add_param();
+ bspec->set_name(name + "_bias");
+ bspec->set_lr_mult(2);
+ bspec->set_decay_mult(0);
+
+ return conf;
+}
+
+LayerConf GenFlattenConf(string name) {
+ LayerConf conf;
+ conf.set_name(name);
+ conf.set_type("Flatten");
+ return conf;
+}
+
+LayerConf GenBatchNormConf(string name) {
+ LayerConf conf;
+ conf.set_name(name);
+ conf.set_type("CudnnBatchNorm");
+ ParamSpec *gammaspec = conf.add_param();
+ gammaspec->set_name(name + "_gamma");
+ auto gammafill = gammaspec->mutable_filler();
+ gammafill->set_type("uniform");
+ gammafill->set_min(0);
+ gammafill->set_max(1);
+
+ ParamSpec *betaspec = conf.add_param();
+ betaspec->set_name(name + "_beta");
+ auto betafill = betaspec->mutable_filler();
+ betafill->set_type("constant");
+ betafill->set_value(0);
+
+ ParamSpec *meanspec = conf.add_param();
+ meanspec->set_name(name + "_mean");
+ auto meanfill = meanspec->mutable_filler();
+ meanfill->set_type("constant");
+ meanfill->set_value(0);
+
+ ParamSpec *varspec = conf.add_param();
+ varspec->set_name(name + "_var");
+ auto varfill = varspec->mutable_filler();
+ varfill->set_type("constant");
+ varfill->set_value(1);
+
+ return conf;
+}
+
+LayerConf GenDropoutConf(string name, float dropout_ratio) {
+ LayerConf conf;
+ conf.set_name(name);
+ conf.set_type("CudnnDropout");
+ DropoutConf *dropout = conf.mutable_dropout_conf();
+ dropout->set_dropout_ratio(dropout_ratio);
+
+ return conf;
+}
+
+void ConvBNReLU(FeedForwardNet& net, string name, int nb_filter, Shape* shape = nullptr) {
+ net.Add(new CudnnConvolution(), GenConvConf(name+"_conv", nb_filter, 3, 1, 1), shape);
+ net.Add(new CudnnBatchNorm(), GenBatchNormConf(name+"_bn"));
+ net.Add(new CudnnActivation(), GenReLUConf(name+"_relu"));
+}
+
+FeedForwardNet CreateNet() {
+ FeedForwardNet net;
+ Shape s{3, 32, 32};
+ ConvBNReLU(net, "conv1_1", 64, &s);
+ net.Add(new CudnnDropout(), GenDropoutConf("drop1", 0.3));
+ ConvBNReLU(net, "conv1_2", 64);
+ net.Add(new CudnnPooling(), GenPoolingConf("pool1", true, 2, 2, 0));
+ ConvBNReLU(net, "conv2_1", 128);
+ net.Add(new CudnnDropout(), GenDropoutConf("drop2", 0.4));
+ ConvBNReLU(net, "conv2_2", 128);
+ net.Add(new CudnnPooling(), GenPoolingConf("pool2", true, 2, 2, 0));
+ ConvBNReLU(net, "conv3_1", 256);
+ net.Add(new CudnnDropout(), GenDropoutConf("drop3_1", 0.4));
+ ConvBNReLU(net, "conv3_2", 256);
+ net.Add(new CudnnDropout(), GenDropoutConf("drop3_2", 0.4));
+ ConvBNReLU(net, "conv3_3", 256);
+ net.Add(new CudnnPooling(), GenPoolingConf("pool3", true, 2, 2, 0));
+ ConvBNReLU(net, "conv4_1", 512);
+ net.Add(new CudnnDropout(), GenDropoutConf("drop4_1", 0.4));
+ ConvBNReLU(net, "conv4_2", 512);
+ net.Add(new CudnnDropout(), GenDropoutConf("drop4_2", 0.4));
+ ConvBNReLU(net, "conv4_3", 512);
+ net.Add(new CudnnPooling(), GenPoolingConf("pool4", true, 2, 2, 0));
+ ConvBNReLU(net, "conv5_1", 512);
+ net.Add(new CudnnDropout(), GenDropoutConf("drop5_1", 0.4));
+ ConvBNReLU(net, "conv5_2", 512);
+ net.Add(new CudnnDropout(), GenDropoutConf("drop5_2", 0.4));
+ ConvBNReLU(net, "conv5_3", 512);
+ net.Add(new CudnnPooling(), GenPoolingConf("pool5", true, 2, 2, 0));
+ net.Add(new Flatten(), GenFlattenConf("flat"));
+ net.Add(new CudnnDropout(), GenDropoutConf("flat_drop", 0.5));
+ net.Add(new Dense(), GenDenseConf("ip1", 512, 0.02));
+ net.Add(new CudnnBatchNorm(), GenBatchNormConf("ip1_bn"));
+ net.Add(new CudnnActivation(), GenReLUConf("ip1_relu"));
+ net.Add(new CudnnDropout(), GenDropoutConf("ip1_drop", 0.5));
+ net.Add(new Dense(), GenDenseConf("ip2", 10, 0.02));
+
+ return net;
+}
+
+void Train(float lr, int num_epoch, string data_dir) {
+ Cifar10 data(data_dir);
+ Tensor train_x, train_y, test_x, test_y;
+ Tensor train_x_1, train_x_2, train_y_1, train_y_2;
+ {
+ auto train = data.ReadTrainData();
+ size_t nsamples = train.first.shape(0);
+ auto mtrain =
+ Reshape(train.first, Shape{nsamples, train.first.Size() / nsamples});
+ const Tensor &mean = Average(mtrain, 0);
+ SubRow(mean, &mtrain);
+ Tensor std = Square(mtrain);
+ std = Average(std, 0);
+ std = Sqrt(std);;
+ std += 1e-6f;
+ DivRow(std, &mtrain);
+
+ train_x = Reshape(mtrain, train.first.shape());
+ train_y = train.second;
+
+ LOG(INFO) << "Slicing training data...";
+ train_x_1.Reshape(Shape{nsamples / 2, train.first.shape(1),
+ train.first.shape(2), train.first.shape(3)});
+ LOG(INFO) << "Copying first data slice...";
+ CopyDataToFrom(&train_x_1, train_x, train_x.Size() / 2);
+ train_x_2.Reshape(Shape{nsamples / 2, train.first.shape(1),
+ train.first.shape(2), train.first.shape(3)});
+ LOG(INFO) << "Copying second data slice...";
+ CopyDataToFrom(&train_x_2, train_x, train_x.Size() / 2, 0,
+ train_x.Size() / 2);
+ train_y_1.Reshape(Shape{nsamples / 2});
+ train_y_1.AsType(kInt);
+ LOG(INFO) << "Copying first label slice...";
+ CopyDataToFrom(&train_y_1, train_y, train_y.Size() / 2);
+ train_y_2.Reshape(Shape{nsamples / 2});
+ train_y_2.AsType(kInt);
+ LOG(INFO) << "Copying second label slice...";
+ CopyDataToFrom(&train_y_2, train_y, train_y.Size() / 2, 0,
+ train_y.Size() / 2);
+
+ auto test = data.ReadTestData();
+ nsamples = test.first.shape(0);
+ auto mtest =
+ Reshape(test.first, Shape{nsamples, test.first.Size() / nsamples});
+ SubRow(mean, &mtest);
+ DivRow(std, &mtest);
+ test_x = Reshape(mtest, test.first.shape());
+ test_y = test.second;
+ }
+
+ CHECK_EQ(train_x.shape(0), train_y.shape(0));
+ CHECK_EQ(test_x.shape(0), test_y.shape(0));
+ LOG(INFO) << "Total Training samples = " << train_y.shape(0)
+ << ", Total Test samples = " << test_y.shape(0);
+ CHECK_EQ(train_x_1.shape(0), train_y_1.shape(0));
+ LOG(INFO) << "On net 1, Training samples = " << train_y_1.shape(0)
+ << ", Test samples = " << test_y.shape(0);
+ CHECK_EQ(train_x_2.shape(0), train_y_2.shape(0));
+ LOG(INFO) << "On net 2, Training samples = " << train_y_2.shape(0);
+
+ auto net_1 = CreateNet();
+ auto net_2 = CreateNet();
+
+ SGD sgd;
+ OptimizerConf opt_conf;
+ opt_conf.set_momentum(0.9);
+ auto reg = opt_conf.mutable_regularizer();
+ reg->set_coefficient(0.0005);
+ sgd.Setup(opt_conf);
+ sgd.SetLearningRateGenerator([lr](int epoch) {
+ return 0.01f / static_cast<float>(1u << (epoch/30));
+ });
+
+ SoftmaxCrossEntropy loss_1, loss_2;
+ Accuracy acc_1, acc_2;
+ /// Create updater aggregating gradient on CPU
+ std::shared_ptr<Updater> updater = std::make_shared<LocalUpdater>(2, &sgd);
+
+ /// Only need to register parameter once.
+ net_1.Compile(true, true, updater, &loss_1, &acc_1);
+ net_2.Compile(true, false, updater, &loss_2, &acc_2);
+
+ MemPoolConf mem_conf;
+ mem_conf.add_device(0);
+ mem_conf.add_device(1);
+ std::shared_ptr<DeviceMemPool> mem_pool(new CnMemPool(mem_conf));
+ std::shared_ptr<CudaGPU> cuda_1(new CudaGPU(0, mem_pool));
+ std::shared_ptr<CudaGPU> cuda_2(new CudaGPU(1, mem_pool));
+ net_1.ToDevice(cuda_1);
+ net_2.ToDevice(cuda_2);
+
+ train_x_1.ToDevice(cuda_1);
+ train_y_1.ToDevice(cuda_1);
+ test_x.ToDevice(cuda_1);
+ test_y.ToDevice(cuda_1);
+ train_x_2.ToDevice(cuda_2);
+ train_y_2.ToDevice(cuda_2);
+
+ LOG(INFO) << "Launching thread...";
+ std::thread t1 =
+ net_1.TrainThread(50, num_epoch, train_x_1, train_y_1, test_x, test_y);
+ std::thread t2 = net_2.TrainThread(50, num_epoch, train_x_2, train_y_2);
+ t1.join();
+ t2.join();
+}
+}
+
+int main(int argc, char **argv) {
+ singa::InitChannel(nullptr);
+ int pos = singa::ArgPos(argc, argv, "-epoch");
+ int nEpoch = 1;
+ if (pos != -1) nEpoch = atoi(argv[pos + 1]);
+ pos = singa::ArgPos(argc, argv, "-lr");
+ float lr = 0.001;
+ if (pos != -1) lr = atof(argv[pos + 1]);
+ pos = singa::ArgPos(argc, argv, "-data");
+ string data = "cifar-10-batches-bin";
+ if (pos != -1) data = argv[pos + 1];
+
+ LOG(INFO) << "Start training";
+ singa::Train(lr, nEpoch, data);
+ LOG(INFO) << "End training";
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc3b74b3/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
new file mode 100644
index 0000000..8063307
--- /dev/null
+++ b/examples/cifar10/vgg.py
@@ -0,0 +1,52 @@
+import sys
+import os
+
+sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+from singa import layer
+from singa import metric
+from singa import loss
+from singa import net as ffnet
+from singa.proto import core_pb2
+
+def ConvBnReLU(net, name, nb_filers, sample_shape=None):
+ net.add(layer.Conv2D(name + '_1', nb_filers, 3, 1, pad=1,
+ input_sample_shape=sample_shape))
+ net.add(layer.BatchNormalization(name + '_2'))
+ net.add(layer.Activation(name + '_3'))
+
+def create_net():
+ net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
+ ConvBnReLU(net, 'conv1_1', 64, (3, 32, 32))
+ net.add(layer.Dropout('drop1', 0.3, engine='cudnn'))
+ ConvBnReLU(net, 'conv1_2', 64)
+ net.add(layer.MaxPooling2D('pool1', 2, 2, border_mode='valid'))
+ ConvBnReLU(net, 'conv2_1', 128)
+ net.add(layer.Dropout('drop2_1', 0.4, engine='cudnn'))
+ ConvBnReLU(net, 'conv2_2', 128)
+ net.add(layer.MaxPooling2D('pool2', 2, 2, border_mode='valid'))
+ ConvBnReLU(net, 'conv3_1', 256)
+ net.add(layer.Dropout('drop3_1', 0.4, engine='cudnn'))
+ ConvBnReLU(net, 'conv3_2', 256)
+ net.add(layer.Dropout('drop3_2', 0.4, engine='cudnn'))
+ ConvBnReLU(net, 'conv3_3', 256)
+ net.add(layer.MaxPooling2D('pool3', 2, 2, border_mode='valid'))
+ ConvBnReLU(net, 'conv4_1', 512)
+ net.add(layer.Dropout('drop4_1', 0.4, engine='cudnn'))
+ ConvBnReLU(net, 'conv4_2', 512)
+ net.add(layer.Dropout('drop4_2', 0.4, engine='cudnn'))
+ ConvBnReLU(net, 'conv4_3', 512)
+ net.add(layer.MaxPooling2D('pool4', 2, 2, border_mode='valid'))
+ ConvBnReLU(net, 'conv5_1', 512)
+ net.add(layer.Dropout('drop5_1', 0.4, engine='cudnn'))
+ ConvBnReLU(net, 'conv5_2', 512)
+ net.add(layer.Dropout('drop5_2', 0.4, engine='cudnn'))
+ ConvBnReLU(net, 'conv5_3', 512)
+ net.add(layer.MaxPooling2D('pool5', 2, 2, border_mode='valid'))
+ net.add(layer.Flatten('flat'))
+ net.add(layer.Dropout('drop_flat', 0.5, engine='cudnn'))
+ net.add(layer.Dense('ip1', 512))
+ net.add(layer.BatchNormalization('batchnorm_ip1'))
+ net.add(layer.Activation('relu_ip1'))
+ net.add(layer.Dropout('drop_ip2', 0.5, engine='cudnn'))
+ net.add(layer.Dense('ip2', 10))
+ return net
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc3b74b3/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index 4972a86..c16bd29 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -80,11 +80,11 @@ void Tensor::ResetLike(const Tensor &in) {
if (block_ == nullptr || device_ != in.device_ || MemSize() != in.MemSize()) {
if (block_ != nullptr && block_->DecRefCount() == 0)
device_->FreeBlock(block_);
- shape_ = in.shape_;
device_ = in.device_;
data_type_ = in.data_type_;
block_ = device_->NewBlock(in.MemSize());
}
+ shape_ = in.shape_;
}
void Tensor::Reshape(const Shape &shape) {
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc3b74b3/src/model/layer/batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/batchnorm.cc b/src/model/layer/batchnorm.cc
index b6edc9e..6ea9f2a 100644
--- a/src/model/layer/batchnorm.cc
+++ b/src/model/layer/batchnorm.cc
@@ -27,8 +27,18 @@ void BatchNorm::Setup(const Shape& in_sample, const LayerConf& conf) {
out_sample_shape_ = in_sample;
factor_ = conf.batchnorm_conf().factor();
channels_ = in_sample.at(0);
- height_ = in_sample.at(1);
- width_ = in_sample.at(2);
+ if (in_sample.size() == 3u)
+ height_ = in_sample.at(1);
+ else
+ height_ = 1;
+ if (in_sample.size() == 3u)
+ width_ = in_sample.at(2);
+ else
+ width_ = 1;
+ if (in_sample.size() == 1u)
+ is_2d_ = true;
+ else
+ is_2d_ = false;
bnScale_.Reshape(Shape{channels_ * height_ * width_});
bnBias_.ResetLike(bnScale_);
@@ -92,7 +102,8 @@ const Tensor BatchNorm::Forward(int flag, const Tensor& input) {
AddRow(bnBias_, &output);
}
- output.Reshape(Shape{output.shape(0), channels_, height_, width_});
+ if (!is_2d_)
+ output.Reshape(Shape{output.shape(0), channels_, height_, width_});
return output;
}
@@ -170,10 +181,16 @@ const std::pair<Tensor, vector<Tensor>> BatchNorm::Backward(
SumRows(dy, &dbnBias_);
param_grad.push_back(dbnScale_);
param_grad.push_back(dbnBias_);
+ Tensor dummy;
+ dummy.ResetLike(runningMean_);
+ dummy.SetValue(.0f);
+ param_grad.push_back(dummy);
+ param_grad.push_back(dummy);
} else {
LOG(ERROR) << "Do not call backward for evaluation phase";
}
- dx.Reshape(Shape{dx.shape(0), channels_, height_, width_});
+ if (!is_2d_)
+ dx.Reshape(Shape{dx.shape(0), channels_, height_, width_});
return std::make_pair(dx, param_grad);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc3b74b3/src/model/layer/batchnorm.h
----------------------------------------------------------------------
diff --git a/src/model/layer/batchnorm.h b/src/model/layer/batchnorm.h
index 6ff818b..f3d83ab 100644
--- a/src/model/layer/batchnorm.h
+++ b/src/model/layer/batchnorm.h
@@ -44,7 +44,7 @@ class BatchNorm : public Layer {
/// \copydoc Layer::Backward(int, const Tensor&, const Tensor&);
const std::pair<Tensor, vector<Tensor>> Backward(
int flag, const Tensor& grad) override;
- const std::vector<Tensor> param_values() override {
+ virtual const std::vector<Tensor> param_values() override {
return std::vector<Tensor> { bnScale_, bnBias_, runningMean_,
runningVariance_ };
}
@@ -77,6 +77,7 @@ class BatchNorm : public Layer {
protected:
float factor_;
size_t channels_, height_, width_;
+ bool is_2d_ = false;
Tensor bnScale_, bnBias_;
Tensor dbnScale_, dbnBias_;
Tensor runningMean_, runningVariance_;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc3b74b3/src/model/layer/cudnn_batchnorm.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_batchnorm.cc b/src/model/layer/cudnn_batchnorm.cc
index 9e1e892..461f1b6 100644
--- a/src/model/layer/cudnn_batchnorm.cc
+++ b/src/model/layer/cudnn_batchnorm.cc
@@ -75,14 +75,20 @@ const Tensor CudnnBatchNorm::Forward(int flag, const Tensor& input) {
auto shape = input.shape();
auto dtype = input.data_type();
Tensor output;
+ Tensor x;
+ if(is_2d_)
+ x = Reshape(input, Shape{shape.at(0), shape.at(1), 1, 1});
+ else
+ x = input;
+ shape = x.shape();
if (!has_init_cudnn_)
InitCudnn(shape, dtype);
// TODO(wangji): check device id of input and params
- output.ResetLike(input);
+ output.ResetLike(x);
if ((flag & kTrain) == kTrain) {
output.device()->Exec(
[=](Context* ctx) {
- Block *inBlock = input.block(), *outBlock = output.block(),
+ Block *inBlock = x.block(), *outBlock = output.block(),
*saveMeanBlock = resultSaveMean_.block(),
*saveVarBlock = resultSaveVariance_.block(),
*runningMeanBlock = runningMean_.block(),
@@ -110,7 +116,7 @@ const Tensor CudnnBatchNorm::Forward(int flag, const Tensor& input) {
saveMeanBlock->mutable_data(),
saveVarBlock->mutable_data()));
},
- {input.block(),
+ {x.block(),
bnScale_.block(),
bnBias_.block()},
{output.block(),
@@ -118,11 +124,11 @@ const Tensor CudnnBatchNorm::Forward(int flag, const Tensor& input) {
runningVariance_.block(),
resultSaveMean_.block(),
resultSaveVariance_.block()});
- buf_.push(input);
+ buf_.push(x);
} else {
output.device()->Exec(
[=](Context* ctx) {
- Block *inBlock = input.block(), *outBlock = output.block(),
+ Block *inBlock = x.block(), *outBlock = output.block(),
*runningMeanBlock = runningMean_.block(),
*runningVarBlock = runningVariance_.block(),
*bnScaleBlock = bnScale_.block(),
@@ -145,13 +151,15 @@ const Tensor CudnnBatchNorm::Forward(int flag, const Tensor& input) {
runningVarBlock->data(),
epsilon));
},
- {input.block(),
+ {x.block(),
bnScale_.block(),
bnBias_.block(),
runningMean_.block(),
runningVariance_.block()},
{output.block()});
}
+ if (is_2d_)
+ output.Reshape(Shape{shape.at(0), shape.at(1)});
return output;
}
@@ -160,13 +168,13 @@ const std::pair<Tensor, vector<Tensor>> CudnnBatchNorm::Backward(
vector <Tensor> param_grad;
Tensor dx;
if ((flag & kTrain) == kTrain) {
- Tensor input = buf_.top();
+ Tensor x = buf_.top();
buf_.pop();
dx.ResetLike(grad);
dx.device()->Exec(
[=](Context* ctx) {
Block *dyblock = grad.block(), *dxblock = dx.block(),
- *xblock = input.block(),
+ *xblock = x.block(),
*bnScaleBlock = bnScale_.block(),
*dbnScaleBlock = dbnScale_.block(),
*dbnBiasBlock = dbnBias_.block(),
@@ -208,6 +216,13 @@ const std::pair<Tensor, vector<Tensor>> CudnnBatchNorm::Backward(
}
param_grad.push_back(dbnScale_);
param_grad.push_back(dbnBias_);
+ Tensor dummy;
+ dummy.ResetLike(dbnScale_);
+ dummy.SetValue(.0f);
+ param_grad.push_back(dummy);
+ param_grad.push_back(dummy);
+ if (is_2d_)
+ dx.Reshape(Shape{dx.shape().at(0), dx.shape().at(1)});
return std::make_pair(dx, param_grad);
}
} // namespace
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc3b74b3/src/model/updater/local_updater.cc
----------------------------------------------------------------------
diff --git a/src/model/updater/local_updater.cc b/src/model/updater/local_updater.cc
index eab4a7c..c3c6793 100644
--- a/src/model/updater/local_updater.cc
+++ b/src/model/updater/local_updater.cc
@@ -33,6 +33,7 @@ void LocalUpdater::Register(const string& name, const ParamSpec& specs) {
}
dev_index_[name] = 0;
to_updater_finished_[name] = 0;
+ mtx_[name];
}
void LocalUpdater::Apply(int step, const string& name, Tensor& grad,
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc3b74b3/src/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/src/python/singa/layer.py b/src/python/singa/layer.py
index 937a7e1..a443e1a 100644
--- a/src/python/singa/layer.py
+++ b/src/python/singa/layer.py
@@ -327,10 +327,16 @@ class BatchNormalization(Layer):
beta_specs['name'] = name + '_beta'
if 'name' not in gamma_specs:
gamma_specs['name'] = name + '_gamma'
- self.conf.param.extend([_construct_param_specs_from_dict(beta_specs)])
+ mean_specs = {'init': 'constant', 'value': 0, 'name': name+'_mean'}
+ var_specs = {'init': 'constant', 'value': 1, 'name': name+'_var'}
self.conf.param.extend([_construct_param_specs_from_dict(gamma_specs)])
- self.param_specs.append(_construct_param_specs_from_dict(beta_specs))
+ self.conf.param.extend([_construct_param_specs_from_dict(beta_specs)])
+ self.conf.param.extend([_construct_param_specs_from_dict(mean_specs)])
+ self.conf.param.extend([_construct_param_specs_from_dict(var_specs)])
self.param_specs.append(_construct_param_specs_from_dict(gamma_specs))
+ self.param_specs.append(_construct_param_specs_from_dict(beta_specs))
+ self.param_specs.append(_construct_param_specs_from_dict(mean_specs))
+ self.param_specs.append(_construct_param_specs_from_dict(var_specs))
_check_engine(engine, ['cudnn'])
self.layer = _create_layer(engine, 'BatchNorm')
if input_sample_shape is not None:
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bc3b74b3/src/python/singa/net.py
----------------------------------------------------------------------
diff --git a/src/python/singa/net.py b/src/python/singa/net.py
index 084db4b..c0ba61d 100644
--- a/src/python/singa/net.py
+++ b/src/python/singa/net.py
@@ -64,6 +64,9 @@ class FeedForwardNet(object):
specs.extend(lyr.param_specs)
return specs
+ def param_names(self):
+ return [spec.name for spec in self.param_specs()]
+
def train(self, x, y):
out = self.forward(kTrain, x)
l = self.loss.forward(kTrain, out, y)
@@ -89,9 +92,10 @@ class FeedForwardNet(object):
return tensor.softmax(xx)
def forward(self, flag, x):
+ #print x.l1()
for lyr in self.layers:
x = lyr.forward(flag, x)
- # print lyr.name, x.l1()
+ # print lyr.name, x.l1()
return x
def backward(self, flag=kTrain):
[2/2] incubator-singa git commit: SINGA-231 Batchnormlized VGG model
for cifar-10
Posted by wa...@apache.org.
SINGA-231 Batchnormlized VGG model for cifar-10
Merge the training of vgg and alexnet into train.py
The validation accuracy of vgg could reach 0.89
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/28678ae8
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/28678ae8
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/28678ae8
Branch: refs/heads/dev
Commit: 28678ae8329112ca1f11086b52ded7149ec9ab2c
Parents: bc3b74b
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Tue Aug 9 20:06:29 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Aug 10 00:01:03 2016 +0800
----------------------------------------------------------------------
examples/cifar10/alexnet.py | 16 ++-
examples/cifar10/predict.py | 14 ++-
examples/cifar10/run-parallel.sh | 1 +
examples/cifar10/train.py | 63 +++++++----
examples/cifar10/train_vgg_cifar10.py | 162 -----------------------------
examples/cifar10/vgg-parallel.cc | 24 ++---
examples/cifar10/vgg.py | 66 ++++++++++--
7 files changed, 138 insertions(+), 208 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/alexnet.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/alexnet.py b/examples/cifar10/alexnet.py
index 4b3daec..96c339a 100644
--- a/examples/cifar10/alexnet.py
+++ b/examples/cifar10/alexnet.py
@@ -14,15 +14,21 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
+''' This model is created following the structure from
+https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-18pct.cfg
+Following the same setting for hyper-parameters and data pre-processing, the final
+validation accuracy would be about 82%.
+'''
+
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
from singa import layer
+from singa import initializer
from singa import metric
from singa import loss
from singa import net as ffnet
-from singa.proto import core_pb2
def create_net():
@@ -44,4 +50,12 @@ def create_net():
net.add(layer.MaxPooling2D('pool3', 3, 2, pad=1))
net.add(layer.Flatten('flat'))
net.add(layer.Dense('dense', 10, W_specs=W2_specs.copy(), b_specs=b_specs.copy()))
+ for (p, specs) in zip(net.param_values(), net.param_specs()):
+ filler = specs.filler
+ if filler.type == 'gaussian':
+ initializer.gaussian(p, filler.mean, filler.std)
+ else:
+ p.set_value(0)
+ print specs.name, filler.type, p.l1()
+
return net
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/predict.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/predict.py b/examples/cifar10/predict.py
index d083d0b..07b1145 100644
--- a/examples/cifar10/predict.py
+++ b/examples/cifar10/predict.py
@@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# =============================================================================
-
+import cPickle as pickle
import numpy as np
import sys
import os
@@ -27,6 +27,15 @@ import net as ffnet
def predict(net, images, cuda, topk=5):
+ '''Predict the label of each image.
+
+ Args:
+ net, a pretrained neural net
+ images, a batch of images [batch_size, 3, 32, 32], which have been
+ pre-processed
+ cuda, the cuda device
+ topk, return the topk labels for each image.
+ '''
x = tensor.from_numpy(images.astype(np.float32))
x.to_device(cuda)
y = net.predict(x)
@@ -40,7 +49,7 @@ def predict(net, images, cuda, topk=5):
def load_dataset(filepath):
print 'Loading data file %s' % filepath
with open(filepath, 'rb') as fd:
- cifar10 = cPickle.load(fd)
+ cifar10 = pickle.load(fd)
image = cifar10['data'].astype(dtype=np.uint8)
image = image.reshape((-1, 3, 32, 32))
label = np.asarray(cifar10['labels'], dtype=np.uint8)
@@ -79,4 +88,5 @@ if __name__ == '__main__':
mean = compute_image_mean('cifar-10-batches-py')
test_images, _ = load_test_data('cifar-10-batches-py')
+ # minus mean is for alexnet; vgg uses a different pre-processing strategy
print predict(model, test_images - mean, cuda)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/run-parallel.sh
----------------------------------------------------------------------
diff --git a/examples/cifar10/run-parallel.sh b/examples/cifar10/run-parallel.sh
index 6a9109a..18193db 100755
--- a/examples/cifar10/run-parallel.sh
+++ b/examples/cifar10/run-parallel.sh
@@ -1,2 +1,3 @@
#!/usr/bin/env sh
../../build/bin/alexnet-parallel -epoch 4
+#../../build/bin/vgg-parallel -epoch 4
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/train.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py
index f4caca4..cb4110d 100644
--- a/examples/cifar10/train.py
+++ b/examples/cifar10/train.py
@@ -23,9 +23,9 @@ import cPickle
import numpy as np
import os
import sys
+import argparse
sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
-from singa import initializer
from singa import utils
from singa import optimizer
from singa import device
@@ -33,6 +33,7 @@ from singa import tensor
from singa.proto import core_pb2
import alexnet
+import vgg
def load_dataset(filepath):
@@ -65,7 +66,28 @@ def load_test_data(dir_path):
return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
-def get_lr(epoch):
+def normalize_for_vgg(train_x, test_x):
+ mean = train_x.mean()
+ std = train_x.std()
+ train_x -= mean
+ test_x -= mean
+ train_x /= std
+ test_x /= std
+ return train_x, test_x
+
+
+def normalize_for_alexnet(train_x, test_x):
+ mean = np.average(train_x, axis=0)
+ train_x -= mean
+ test_x -= mean
+ return train_x, test_x
+
+
+def vgg_lr(epoch):
+ return 0.01 / float(1 << ((epoch / 30)))
+
+
+def alexnet_lr(epoch):
if epoch < 120:
return 0.001
elif epoch < 130:
@@ -74,32 +96,21 @@ def get_lr(epoch):
return 0.00001
-def train(data_dir, net, num_epoch=140, batch_size=100):
+def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100):
print 'Start intialization............'
cuda = device.create_cuda_gpu()
net.to_device(cuda)
opt = optimizer.SGD(momentum=0.9, weight_decay=0.004)
for (p, specs) in zip(net.param_values(), net.param_specs()):
- filler = specs.filler
- if filler.type == 'gaussian':
- initializer.gaussian(p, filler.mean, filler.std)
- else:
- p.set_value(0)
opt.register(p, specs)
- print specs.name, filler.type, p.l1()
- print 'Loading data ..................'
- train_x, train_y = load_train_data(data_dir)
- test_x, test_y = load_test_data(data_dir)
- mean = np.average(train_x, axis=0)
- train_x -= mean
- test_x -= mean
tx = tensor.Tensor((batch_size, 3, 32, 32), cuda)
ty = tensor.Tensor((batch_size,), cuda, core_pb2.kInt)
+ train_x, train_y, test_x, test_y = data
num_train_batch = train_x.shape[0] / batch_size
num_test_batch = test_x.shape[0] / batch_size
idx = np.arange(train_x.shape[0], dtype=np.int32)
- for epoch in range(num_epoch):
+ for epoch in range(max_epoch):
np.random.shuffle(idx)
loss, acc = 0.0, 0.0
print 'Epoch %d' % epoch
@@ -135,8 +146,20 @@ def train(data_dir, net, num_epoch=140, batch_size=100):
net.save('model.bin') # save model params into checkpoint file
if __name__ == '__main__':
- data_dir = 'cifar-10-batches-py'
- assert os.path.exists(data_dir), \
+ parser = argparse.ArgumentParser(description='Train vgg/alexnet for cifar10')
+ parser.add_argument('model', choices=['vgg', 'alexnet'], default='alexnet')
+ parser.add_argument('data', default='cifar-10-batches-py')
+ args = parser.parse_args()
+ assert os.path.exists(args.data), \
'Pls download the cifar10 dataset via "download_data.py py"'
- net = alexnet.create_net()
- train(data_dir, net)
+ print 'Loading data ..................'
+ train_x, train_y = load_train_data(args.data)
+ test_x, test_y = load_test_data(args.data)
+ if args.model == 'alexnet':
+ train_x, test_x = normalize_for_alexnet(train_x, test_x)
+ net = alexnet.create_net()
+ train((train_x, train_y, test_x, test_y), net, 140, alexnet_lr, 0.004)
+ else:
+ train_x, test_x = normalize_for_vgg(train_x, test_x)
+ net = vgg.create_net()
+ train((train_x, train_y, test_x, test_y), net, 250, vgg_lr, 0.0005)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/train_vgg_cifar10.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/train_vgg_cifar10.py b/examples/cifar10/train_vgg_cifar10.py
deleted file mode 100644
index e9df04e..0000000
--- a/examples/cifar10/train_vgg_cifar10.py
+++ /dev/null
@@ -1,162 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# =============================================================================
-""" CIFAR10 dataset is at https://www.cs.toronto.edu/~kriz/cifar.html.
-It includes 5 binary dataset, each contains 10000 images. 1 row (1 image)
-includes 1 label & 3072 pixels. 3072 pixels are 3 channels of a 32x32 image
-"""
-
-import cPickle
-import numpy as np
-import os
-import sys
-import math
-
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
-from singa import initializer
-from singa import utils
-from singa import optimizer
-from singa import device
-from singa import tensor
-from singa.proto import core_pb2
-
-import vgg
-
-
-def load_dataset(filepath):
- print 'Loading data file %s' % filepath
- with open(filepath, 'rb') as fd:
- cifar10 = cPickle.load(fd)
- image = cifar10['data'].astype(dtype=np.uint8)
- image = image.reshape((-1, 3, 32, 32))
- label = np.asarray(cifar10['labels'], dtype=np.uint8)
- label = label.reshape(label.size, 1)
- return image, label
-
-
-def load_train_data(dir_path, num_batches=5):
- labels = []
- batchsize = 10000
- images = np.empty((num_batches * batchsize, 3, 32, 32), dtype=np.uint8)
- for did in range(1, num_batches + 1):
- fname_train_data = dir_path + "/data_batch_{}".format(did)
- image, label = load_dataset(fname_train_data)
- images[(did - 1) * batchsize:did * batchsize] = image
- labels.extend(label)
- images = np.array(images, dtype=np.float32)
- labels = np.array(labels, dtype=np.int32)
- return images, labels
-
-
-def load_test_data(dir_path):
- images, labels = load_dataset(dir_path + "/test_batch")
- return np.array(images, dtype=np.float32), np.array(labels, dtype=np.int32)
-
-
-def get_lr(epoch):
- return 0.01 / float(1 << ((epoch / 30)))
- #if epoch < 100:
- # return 0.01
- #elif epoch < 150:
- # return 0.005
- #elif epoch < 200:
- # return 0.001
- #elif epoch < 250:
- # return 0.0001
-
-
-def train(data_dir, net, num_epoch=250, batch_size=128):
- print 'Creating Device............'
- cuda = device.create_cuda_gpus(2)[1]
- net.to_device(cuda)
- print 'Start intialization............'
- opt = optimizer.SGD(momentum=0.9, weight_decay=0.0005)
- for (p, name) in zip(net.param_values(), net.param_names()):
- print name, p.shape
- if len(p.shape) > 1:
- if 'mean' in name or 'beta' in name:
- p.set_value(0.0)
- elif 'var' in name:
- p.set_value(1.0)
- elif 'gamma' in name:
- initializer.uniform(p, 0, 1)
- elif 'conv' in name:
- initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
- else:
- initializer.gaussian(p, 0, 0.02)
-
- #stdv = 1.0/math.sqrt(p.shape[1])
- #initializer.uniform(p, -stdv, stdv)
- else:
- p.set_value(0)
- #print specs.name, filler.type, p.l1()
- print name, p.l1()
- print 'Loading data ..................'
- train_x, train_y = load_train_data(data_dir)
- test_x, test_y = load_test_data(data_dir)
- mean = train_x.mean()
- std = train_x.std()
- train_x -= mean
- test_x -= mean
- train_x /= std
- test_x /= std
-
- tx = tensor.Tensor((batch_size, 3, 32, 32), cuda)
- ty = tensor.Tensor((batch_size,), cuda, core_pb2.kInt)
- num_train_batch = train_x.shape[0] / batch_size
- num_test_batch = test_x.shape[0] / batch_size
- idx = np.arange(train_x.shape[0], dtype=np.int32)
- for epoch in range(num_epoch):
- np.random.shuffle(idx)
- loss, acc = 0.0, 0.0
- print 'Epoch %d' % epoch
- for b in range(num_train_batch):
- x = train_x[idx[b * batch_size: (b + 1) * batch_size]]
- y = train_y[idx[b * batch_size: (b + 1) * batch_size]]
- tx.copy_from_numpy(x)
- ty.copy_from_numpy(y)
- grads, (l, a) = net.train(tx, ty)
- loss += l
- acc += a
- for (s, p, g) in zip(net.param_specs(), net.param_values(), grads):
- opt.apply_with_lr(epoch, get_lr(epoch), g, p, str(s.name))
- # update progress bar
- utils.update_progress(b * 1.0 / num_train_batch,
- 'training loss = %f, accuracy = %f' % (l, a))
- info = '\ntraining loss = %f, training accuracy = %f' \
- % (loss / num_train_batch, acc / num_train_batch)
- print info
-
- loss, acc = 0.0, 0.0
- for b in range(num_test_batch):
- x = test_x[b * batch_size: (b + 1) * batch_size]
- y = test_y[b * batch_size: (b + 1) * batch_size]
- tx.copy_from_numpy(x)
- ty.copy_from_numpy(y)
- l, a = net.evaluate(tx, ty)
- loss += l
- acc += a
-
- print 'test loss = %f, test accuracy = %f' \
- % (loss / num_test_batch, acc / num_test_batch)
- net.save('model.bin') # save model params into checkpoint file
-
-if __name__ == '__main__':
- data_dir = 'cifar-10-batches-py'
- assert os.path.exists(data_dir), \
- 'Pls download the cifar10 dataset via "download_data.py py"'
- net = vgg.create_net()
- train(data_dir, net)
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/vgg-parallel.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg-parallel.cc b/examples/cifar10/vgg-parallel.cc
index ba308e9..c6b7fa1 100644
--- a/examples/cifar10/vgg-parallel.cc
+++ b/examples/cifar10/vgg-parallel.cc
@@ -32,7 +32,7 @@
#include "../../src/model/layer/cudnn_activation.h"
#include "../../src/model/layer/cudnn_pooling.h"
#include "../../src/model/layer/cudnn_lrn.h"
-#include "../../src/model/layer/cudnn_dropout.h"
+#include "../../src/model/layer/dropout.h"
#include "../../src/model/layer/cudnn_batchnorm.h"
#include "../../src/model/layer/dense.h"
#include "../../src/model/layer/flatten.h"
@@ -155,7 +155,7 @@ LayerConf GenBatchNormConf(string name) {
LayerConf GenDropoutConf(string name, float dropout_ratio) {
LayerConf conf;
conf.set_name(name);
- conf.set_type("CudnnDropout");
+ conf.set_type("Dropout");
DropoutConf *dropout = conf.mutable_dropout_conf();
dropout->set_dropout_ratio(dropout_ratio);
@@ -172,37 +172,37 @@ FeedForwardNet CreateNet() {
FeedForwardNet net;
Shape s{3, 32, 32};
ConvBNReLU(net, "conv1_1", 64, &s);
- net.Add(new CudnnDropout(), GenDropoutConf("drop1", 0.3));
+ net.Add(new Dropout(), GenDropoutConf("drop1", 0.3));
ConvBNReLU(net, "conv1_2", 64);
net.Add(new CudnnPooling(), GenPoolingConf("pool1", true, 2, 2, 0));
ConvBNReLU(net, "conv2_1", 128);
- net.Add(new CudnnDropout(), GenDropoutConf("drop2", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop2", 0.4));
ConvBNReLU(net, "conv2_2", 128);
net.Add(new CudnnPooling(), GenPoolingConf("pool2", true, 2, 2, 0));
ConvBNReLU(net, "conv3_1", 256);
- net.Add(new CudnnDropout(), GenDropoutConf("drop3_1", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop3_1", 0.4));
ConvBNReLU(net, "conv3_2", 256);
- net.Add(new CudnnDropout(), GenDropoutConf("drop3_2", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop3_2", 0.4));
ConvBNReLU(net, "conv3_3", 256);
net.Add(new CudnnPooling(), GenPoolingConf("pool3", true, 2, 2, 0));
ConvBNReLU(net, "conv4_1", 512);
- net.Add(new CudnnDropout(), GenDropoutConf("drop4_1", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop4_1", 0.4));
ConvBNReLU(net, "conv4_2", 512);
- net.Add(new CudnnDropout(), GenDropoutConf("drop4_2", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop4_2", 0.4));
ConvBNReLU(net, "conv4_3", 512);
net.Add(new CudnnPooling(), GenPoolingConf("pool4", true, 2, 2, 0));
ConvBNReLU(net, "conv5_1", 512);
- net.Add(new CudnnDropout(), GenDropoutConf("drop5_1", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop5_1", 0.4));
ConvBNReLU(net, "conv5_2", 512);
- net.Add(new CudnnDropout(), GenDropoutConf("drop5_2", 0.4));
+ net.Add(new Dropout(), GenDropoutConf("drop5_2", 0.4));
ConvBNReLU(net, "conv5_3", 512);
net.Add(new CudnnPooling(), GenPoolingConf("pool5", true, 2, 2, 0));
net.Add(new Flatten(), GenFlattenConf("flat"));
- net.Add(new CudnnDropout(), GenDropoutConf("flat_drop", 0.5));
+ net.Add(new Dropout(), GenDropoutConf("flat_drop", 0.5));
net.Add(new Dense(), GenDenseConf("ip1", 512, 0.02));
net.Add(new CudnnBatchNorm(), GenBatchNormConf("ip1_bn"));
net.Add(new CudnnActivation(), GenReLUConf("ip1_relu"));
- net.Add(new CudnnDropout(), GenDropoutConf("ip1_drop", 0.5));
+ net.Add(new Dropout(), GenDropoutConf("ip1_drop", 0.5));
net.Add(new Dense(), GenDenseConf("ip2", 10, 0.02));
return net;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/28678ae8/examples/cifar10/vgg.py
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg.py b/examples/cifar10/vgg.py
index 8063307..0b9bb56 100644
--- a/examples/cifar10/vgg.py
+++ b/examples/cifar10/vgg.py
@@ -1,12 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+""" The VGG model is adapted from http://torch.ch/blog/2015/07/30/cifar.html.
+The best validation accuracy we achieved is about 89% without data augmentation.
+The performance could be improved by tuning some hyper-parameters, including
+learning rate, weight decay, max_epoch, parameter initialization, etc.
+"""
+
import sys
import os
+import math
sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
+
from singa import layer
+from singa import initializer
from singa import metric
from singa import loss
from singa import net as ffnet
-from singa.proto import core_pb2
+
def ConvBnReLU(net, name, nb_filers, sample_shape=None):
net.add(layer.Conv2D(name + '_1', nb_filers, 3, 1, pad=1,
@@ -14,39 +39,58 @@ def ConvBnReLU(net, name, nb_filers, sample_shape=None):
net.add(layer.BatchNormalization(name + '_2'))
net.add(layer.Activation(name + '_3'))
+
def create_net():
net = ffnet.FeedForwardNet(loss.SoftmaxCrossEntropy(), metric.Accuracy())
ConvBnReLU(net, 'conv1_1', 64, (3, 32, 32))
- net.add(layer.Dropout('drop1', 0.3, engine='cudnn'))
+ net.add(layer.Dropout('drop1', 0.3, engine='cuda'))
ConvBnReLU(net, 'conv1_2', 64)
net.add(layer.MaxPooling2D('pool1', 2, 2, border_mode='valid'))
ConvBnReLU(net, 'conv2_1', 128)
- net.add(layer.Dropout('drop2_1', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop2_1', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv2_2', 128)
net.add(layer.MaxPooling2D('pool2', 2, 2, border_mode='valid'))
ConvBnReLU(net, 'conv3_1', 256)
- net.add(layer.Dropout('drop3_1', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop3_1', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv3_2', 256)
- net.add(layer.Dropout('drop3_2', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop3_2', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv3_3', 256)
net.add(layer.MaxPooling2D('pool3', 2, 2, border_mode='valid'))
ConvBnReLU(net, 'conv4_1', 512)
- net.add(layer.Dropout('drop4_1', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop4_1', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv4_2', 512)
- net.add(layer.Dropout('drop4_2', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop4_2', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv4_3', 512)
net.add(layer.MaxPooling2D('pool4', 2, 2, border_mode='valid'))
ConvBnReLU(net, 'conv5_1', 512)
- net.add(layer.Dropout('drop5_1', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop5_1', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv5_2', 512)
- net.add(layer.Dropout('drop5_2', 0.4, engine='cudnn'))
+ net.add(layer.Dropout('drop5_2', 0.4, engine='cuda'))
ConvBnReLU(net, 'conv5_3', 512)
net.add(layer.MaxPooling2D('pool5', 2, 2, border_mode='valid'))
net.add(layer.Flatten('flat'))
- net.add(layer.Dropout('drop_flat', 0.5, engine='cudnn'))
+ net.add(layer.Dropout('drop_flat', 0.5, engine='cuda'))
net.add(layer.Dense('ip1', 512))
net.add(layer.BatchNormalization('batchnorm_ip1'))
net.add(layer.Activation('relu_ip1'))
- net.add(layer.Dropout('drop_ip2', 0.5, engine='cudnn'))
+ net.add(layer.Dropout('drop_ip2', 0.5, engine='cuda'))
net.add(layer.Dense('ip2', 10))
+ print 'Start intialization............'
+ for (p, name) in zip(net.param_values(), net.param_names()):
+ print name, p.shape
+ if len(p.shape) > 1:
+ if 'mean' in name or 'beta' in name:
+ p.set_value(0.0)
+ elif 'var' in name:
+ p.set_value(1.0)
+ elif 'gamma' in name:
+ initializer.uniform(p, 0, 1)
+ elif 'conv' in name:
+ initializer.gaussian(p, 0, math.sqrt(2.0/(9.0 * p.shape[0])))
+ else:
+ initializer.gaussian(p, 0, 0.02)
+ else:
+ p.set_value(0)
+ print name, p.l1()
+
return net