You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/01/05 17:59:16 UTC
[3/3] incubator-singa git commit: SINGA-98 Add Support for AlexNet
ImageNet Classification Model
SINGA-98 Add Support for AlexNet ImageNet Classification Model
Update the CudnnActivationLayer to share the data and grad blob with conv layer for memory space reduction.
It is controlled by the share_src_blobs field in the job config file.
The loss reduces after 3000 iterations using 256 mini-batch like Caffe.
cpplint check; updte job conf for cpu training;
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/bb75a0be
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/bb75a0be
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/bb75a0be
Branch: refs/heads/master
Commit: bb75a0be5f1bf00d24552fb943b5fc40453b5855
Parents: 6e815db
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Tue Dec 29 19:10:00 2015 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Wed Jan 6 00:58:18 2016 +0800
----------------------------------------------------------------------
examples/alexnet/cudnn.conf | 18 ++++++++++--
examples/alexnet/im2rec.cc | 35 +++++++++++++++++++-----
examples/alexnet/job.conf | 50 ++++++++++++++++++----------------
examples/alexnet/rec2im_test.cc | 39 ++++++++++++++++++++------
src/neuralnet/layer.cc | 2 +-
src/neuralnet/neuron_layer/lrn.cc | 5 ++--
src/proto/job.proto | 3 ++
src/utils/updater.cc | 2 +-
8 files changed, 108 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/examples/alexnet/cudnn.conf
----------------------------------------------------------------------
diff --git a/examples/alexnet/cudnn.conf b/examples/alexnet/cudnn.conf
index eef20f9..6324185 100644
--- a/examples/alexnet/cudnn.conf
+++ b/examples/alexnet/cudnn.conf
@@ -1,7 +1,7 @@
name: "alexnet"
train_steps: 450000
-#test_steps: 500
-#test_freq: 1000
+test_steps: 500
+test_freq: 1000
disp_freq: 20
checkpoint_freq: 100000
checkpoint_after: 100000
@@ -423,6 +423,20 @@ neuralnet {
}
srclayers: "ip8"
srclayers: "data"
+ include: kTrain
+ }
+ layer {
+ name : "softmax"
+ type: kCudnnSoftmax
+ srclayers: "ip8"
+ include: kTest
+ }
+ layer {
+ name : "accuracy"
+ type: kAccuracy
+ srclayers: "softmax"
+ srclayers: "data"
+ include: kTest
}
}
cluster {
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/examples/alexnet/im2rec.cc
----------------------------------------------------------------------
diff --git a/examples/alexnet/im2rec.cc b/examples/alexnet/im2rec.cc
index cf6eedf..58ee44f 100644
--- a/examples/alexnet/im2rec.cc
+++ b/examples/alexnet/im2rec.cc
@@ -1,4 +1,27 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+
#include <glog/logging.h>
+#include <opencv2/opencv.hpp>
#include <algorithm>
#include <random>
#include <chrono>
@@ -7,7 +30,6 @@
#include <cstdint>
#include <iostream>
#include <vector>
-#include <opencv2/opencv.hpp>
#include "singa/io/store.h"
#include "singa/proto/common.pb.h"
@@ -20,8 +42,7 @@ const int kImageNBytes = 256*256*3;
void create_data(const string& image_list,
const string& input_folder,
const string& output_folder,
- const string& backend = "kvfile")
-{
+ const string& backend = "kvfile") {
singa::RecordProto image;
image.add_shape(3);
image.add_shape(kImageSize);
@@ -49,12 +70,12 @@ void create_data(const string& image_list,
string rec_buf;
cv::Mat img, res;
std::vector<std::pair<string, int>> file_list;
- while(image_list_file >> image_file_name >> label)
+ while (image_list_file >> image_file_name >> label)
file_list.push_back(std::make_pair(image_file_name, label));
LOG(INFO) << "Data Shuffling";
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
std::shuffle(file_list.begin(), file_list.end()
- ,std::default_random_engine());
+ , std::default_random_engine());
LOG(INFO) << "Total number of images is " << file_list.size();
int ImageNum = file_list.size();
@@ -120,8 +141,8 @@ void create_data(const string& image_list,
int main(int argc, char** argv) {
if (argc < 4) {
- std::cout << "Create Datashard for ImageNet dataset.\n"
- << "Usage: <image_list> <input_folder> <output_folder>"
+ std::cout << "Create data stores for ImageNet dataset.\n"
+ << "Usage: <image_list_file> <input_image_folder> <output_folder>"
<< " <Optional: backend {lmdb, kvfile} default: kvfile>\n";
} else {
google::InitGoogleLogging(argv[0]);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/examples/alexnet/job.conf
----------------------------------------------------------------------
diff --git a/examples/alexnet/job.conf b/examples/alexnet/job.conf
index 1898a09..3b7eaf4 100644
--- a/examples/alexnet/job.conf
+++ b/examples/alexnet/job.conf
@@ -1,10 +1,10 @@
name: "alexnet"
-train_steps: 100
-test_steps: 0
-test_freq: 300
-disp_freq: 5
-#debug: true
-#checkpoint_path: "examples/alexnet/checkpoint/step10000-worker0"
+train_steps: 450000
+test_steps: 500
+test_freq: 1000
+disp_freq: 20
+checkpoint_freq: 100000
+checkpoint_after: 100000
train_one_batch {
alg: kBP
}
@@ -13,8 +13,12 @@ updater{
weight_decay: 0.0005
momentum: 0.9
learning_rate {
- type: kFixed
+ type: kStep
base_lr: 0.01
+ step_conf {
+ gamma: 0.1
+ change_freq: 100000
+ }
}
}
neuralnet {
@@ -25,22 +29,22 @@ neuralnet {
backend: "kvfile"
path :"/data/dataset/imagenet/train_record.bin"
mean_file: "/data/dataset/imagenet/image_mean.bin"
- batchsize: 32
- #random_skip: 5000
+ batchsize: 256
+ #random_skip: 1000
shape: 3
shape: 256
shape: 256
}
- include: kTrain
+ include: kTrain
}
layer{
name: "data"
type: kRecordInput
store_conf {
backend: "kvfile"
- path :"/data/dataset/val_record.bin"
- mean_file: "/data/dataset/image_mean.bin"
- batchsize: 256
+ path :"/data/dataset/imagenet/val_record.bin"
+ mean_file: "/data/dataset/imagenet/image_mean.bin"
+ batchsize: 100
shape: 3
shape: 256
shape: 256
@@ -59,7 +63,7 @@ neuralnet {
}
layer{
name: "conv1"
- type: kCConvolution
+ type: kConvolution
srclayers: "image"
convolution_conf {
num_filters: 96
@@ -92,7 +96,7 @@ neuralnet {
}
layer {
name: "pool1"
- type: kCPooling
+ type: kPooling
pooling_conf {
pool: MAX
kernel: 3
@@ -108,7 +112,7 @@ neuralnet {
local_size: 5
alpha: 0.0001
beta: 0.75
- knorm: 2
+ knorm: 1
}
srclayers: "pool1"
# partition_dim: 0
@@ -116,7 +120,7 @@ neuralnet {
layer{
name: "conv2"
- type: kCConvolution
+ type: kConvolution
srclayers: "norm1"
convolution_conf {
num_filters: 256
@@ -149,7 +153,7 @@ neuralnet {
}
layer {
name: "pool2"
- type: kCPooling
+ type: kPooling
pooling_conf {
pool: MAX
kernel: 3
@@ -166,14 +170,14 @@ neuralnet {
local_size: 5
alpha: 0.0001
beta: 0.75
- knorm: 2
+ knorm: 1
}
srclayers: "pool2"
# partition_dim: 0
}
layer{
name: "conv3"
- type: kCConvolution
+ type: kConvolution
srclayers: "norm2"
convolution_conf {
num_filters: 384
@@ -206,7 +210,7 @@ neuralnet {
}
layer{
name: "conv4"
- type: kCConvolution
+ type: kConvolution
srclayers: "relu3"
convolution_conf {
num_filters: 384
@@ -239,7 +243,7 @@ neuralnet {
}
layer{
name: "conv5"
- type: kCConvolution
+ type: kConvolution
srclayers: "relu4"
convolution_conf {
num_filters: 256
@@ -272,7 +276,7 @@ neuralnet {
}
layer {
name: "pool5"
- type: kCPooling
+ type: kPooling
pooling_conf {
pool: MAX
kernel: 3
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/examples/alexnet/rec2im_test.cc
----------------------------------------------------------------------
diff --git a/examples/alexnet/rec2im_test.cc b/examples/alexnet/rec2im_test.cc
index 0fa3505..bb92d95 100644
--- a/examples/alexnet/rec2im_test.cc
+++ b/examples/alexnet/rec2im_test.cc
@@ -1,4 +1,28 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+
#include <glog/logging.h>
+#include <opencv2/opencv.hpp>
+
#include <algorithm>
#include <random>
#include <chrono>
@@ -7,7 +31,6 @@
#include <cstdint>
#include <iostream>
#include <vector>
-#include <opencv2/opencv.hpp>
#include "singa/io/store.h"
#include "singa/proto/common.pb.h"
@@ -19,8 +42,7 @@ const int kImageNBytes = 256*256*3;
void generate_image(const string& output_folder,
const string& key,
- const string& val)
-{
+ const string& val) {
float image_buf[kImageNBytes];
singa::RecordProto image;
image.ParseFromString(val);
@@ -38,6 +60,7 @@ void generate_image(const string& output_folder,
static_cast<uchar>(
static_cast<uint8_t>(
pixel[(c * kImageSize + h) * kImageSize + w]));
+ }
}
cv::imwrite(image_name, img);
@@ -45,8 +68,7 @@ void generate_image(const string& output_folder,
void visualize(const string& input_file,
const string& output_folder,
- const string& id_list)
-{
+ const string& id_list) {
auto store = singa::io::OpenStore("kvfile", input_file,
singa::io::kRead);
@@ -55,7 +77,7 @@ void visualize(const string& input_file,
std::ifstream id_list_file(id_list.c_str(), std::ios::in);
CHECK(id_list_file.is_open()) << "Unable to open image id list";
string id_;
- while(id_list_file >> id_) {
+ while (id_list_file >> id_) {
int x;
x = std::stoi(id_);
image_id_list.push_back(x);
@@ -80,10 +102,9 @@ void visualize(const string& input_file,
}
}
-int main(int argc, char** argv)
-{
+int main(int argc, char** argv) {
if (argc != 4) {
- std::cout << "Visualize images from binary kvfile record.\n"
+ std::cout << "Visualize images from binary kvfile records.\n"
<< "Usage: <input_file> <output_folder> <id_list>\n";
} else {
google::InitGoogleLogging(argv[0]);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/src/neuralnet/layer.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc
index ac673dd..cb1f3b8 100644
--- a/src/neuralnet/layer.cc
+++ b/src/neuralnet/layer.cc
@@ -56,7 +56,7 @@ const std::string Layer::ToString(bool debug, int flag) {
if ((flag & kBackward) == kBackward && grad_.count() != 0) {
ret += StringPrintf("grad:%e ", Asum(grad_));
for (Param* p : GetParams())
- ret += StringPrintf("%13.9f ",
+ ret += StringPrintf("%s:%13.9f ",
p->name().c_str(), Asum(p->grad()));
}
return ret;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/src/neuralnet/neuron_layer/lrn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/lrn.cc b/src/neuralnet/neuron_layer/lrn.cc
index ce96d11..b199b9a 100644
--- a/src/neuralnet/neuron_layer/lrn.cc
+++ b/src/neuralnet/neuron_layer/lrn.cc
@@ -64,12 +64,11 @@ void LRNLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers) {
auto grad = Tensor4(&grad_);
auto gsrc = Tensor4(srclayers[0]->mutable_grad(this));
- gsrc = grad * expr::F<op::power>(norm, -beta_ );
+ gsrc = grad * expr::F<op::power>(norm, -beta_);
Tensor<cpu, 4> tmp(gsrc.shape);
AllocSpace(tmp);
tmp = gsrc * src / norm;
- gsrc += ( - 2.0f * beta_ * salpha ) * expr::chpool<red::sum>(tmp, lsize_ )
- * src;
+ gsrc += (- 2.0f * beta_ * salpha) * expr::chpool<red::sum>(tmp, lsize_) * src;
FreeSpace(tmp);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/src/proto/job.proto
----------------------------------------------------------------------
diff --git a/src/proto/job.proto b/src/proto/job.proto
index 03ee327..db55987 100644
--- a/src/proto/job.proto
+++ b/src/proto/job.proto
@@ -192,6 +192,9 @@ message LayerProto {
optional LayerType type = 20 [default = kUserLayer];
// type of user layer
optional string user_type = 21;
+ // share data and grad blob with the single src layer, e.g., relu layer can
+ // share blobs from conv layer. It is useful for saving memory space.
+ optional bool share_src_blobs = 22 [default = false];
// overrides the partition dimension for neural net
optional int32 partition_dim = 60 [default = -1];
// names of parameters shared from other layers
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/bb75a0be/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index bb055c2..21608fa 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -60,7 +60,7 @@ float StepLRGen::Get(int step) {
// do not cast int to float
int freq = proto_.step_conf().change_freq();
float lr = proto_.base_lr() * pow(proto_.step_conf().gamma(), step / freq);
- LOG_IF(ERROR, step % freq == 0) << "Update learning rate to " << lr
+ LOG_IF(INFO, step % freq == 0) << "Update learning rate to " << lr
<< " @ step " << step;
return lr;
}