You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/05/09 18:05:50 UTC
incubator-singa git commit: update example of training MLP on MNIST
dataset (replacing lmdb with DataShard)
Repository: incubator-singa
Updated Branches:
refs/heads/master 654d733ba -> 48b8fea59
update example of training MLP on MNIST dataset (replacing lmdb with DataShard)
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/48b8fea5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/48b8fea5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/48b8fea5
Branch: refs/heads/master
Commit: 48b8fea5949ff1ba0baee72deb455136c177525e
Parents: 654d733
Author: wang wei <wa...@comp.nus.edu.sg>
Authored: Sat May 9 22:50:29 2015 +0800
Committer: wang wei <wa...@comp.nus.edu.sg>
Committed: Sat May 9 22:50:29 2015 +0800
----------------------------------------------------------------------
examples/mnist/cluster.conf | 4 +-
examples/mnist/create_shard.cc | 115 +++++++++++++++++++
examples/mnist/mlp-lmdb.conf | 223 ++++++++++++++++++++++++++++++++++++
examples/mnist/mlp.conf | 10 +-
4 files changed, 345 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/cluster.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/cluster.conf b/examples/mnist/cluster.conf
index 1953d1d..6b8a8e6 100644
--- a/examples/mnist/cluster.conf
+++ b/examples/mnist/cluster.conf
@@ -1,3 +1,5 @@
nworker_groups: 1
nserver_groups: 1
-workspace: "/data1/wangwei/singa/data/mnist"
+nservers_per_group: 1
+nworkers_per_group: 1
+workspace: "examples/cifar10/"
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/create_shard.cc
----------------------------------------------------------------------
diff --git a/examples/mnist/create_shard.cc b/examples/mnist/create_shard.cc
new file mode 100644
index 0000000..f545b80
--- /dev/null
+++ b/examples/mnist/create_shard.cc
@@ -0,0 +1,115 @@
+//
+// This code creates DataShard for MNIST dataset.
+// It is adapted from the convert_mnist_data from Caffe
+//
+// Usage:
+// create_shard.bin input_image_file input_label_file output_folder
+// The MNIST dataset could be downloaded at
+// http://yann.lecun.com/exdb/mnist/
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <cstdint>
+#include <iostream>
+
+#include <fstream>
+#include <string>
+
+#include "utils/data_shard.h"
+#include "utils/common.h"
+#include "proto/model.pb.h"
+
+using singa::DataShard;
+using singa::WriteProtoToBinaryFile;
+using std::string;
+
+uint32_t swap_endian(uint32_t val) {
+ val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
+ return (val << 16) | (val >> 16);
+}
+
+void create_shard(const char* image_filename, const char* label_filename,
+ const char* output) {
+ // Open files
+ std::ifstream image_file(image_filename, std::ios::in | std::ios::binary);
+ std::ifstream label_file(label_filename, std::ios::in | std::ios::binary);
+ CHECK(image_file) << "Unable to open file " << image_filename;
+ CHECK(label_file) << "Unable to open file " << label_filename;
+ // Read the magic and the meta data
+ uint32_t magic;
+ uint32_t num_items;
+ uint32_t num_labels;
+ uint32_t rows;
+ uint32_t cols;
+
+ image_file.read(reinterpret_cast<char*>(&magic), 4);
+ magic = swap_endian(magic);
+ CHECK_EQ(magic, 2051) << "Incorrect image file magic.";
+ label_file.read(reinterpret_cast<char*>(&magic), 4);
+ magic = swap_endian(magic);
+ CHECK_EQ(magic, 2049) << "Incorrect label file magic.";
+ image_file.read(reinterpret_cast<char*>(&num_items), 4);
+ num_items = swap_endian(num_items);
+ label_file.read(reinterpret_cast<char*>(&num_labels), 4);
+ num_labels = swap_endian(num_labels);
+ CHECK_EQ(num_items, num_labels);
+ image_file.read(reinterpret_cast<char*>(&rows), 4);
+ rows = swap_endian(rows);
+ image_file.read(reinterpret_cast<char*>(&cols), 4);
+ cols = swap_endian(cols);
+
+ DataShard shard(output, DataShard::kCreate);
+ char label;
+ char* pixels = new char[rows * cols];
+ int count = 0;
+ const int kMaxKeyLength = 10;
+ char key[kMaxKeyLength];
+ string value;
+
+ singa::Record record;
+ singa::SingleLabelImageRecord* image=record.mutable_image();
+ image->add_shape(rows);
+ image->add_shape(cols);
+ LOG(INFO) << "A total of " << num_items << " items.";
+ LOG(INFO) << "Rows: " << rows << " Cols: " << cols;
+ for (int item_id = 0; item_id < num_items; ++item_id) {
+ image_file.read(pixels, rows * cols);
+ label_file.read(&label, 1);
+ image->set_pixel(pixels, rows*cols);
+ image->set_label(label);
+ snprintf(key, kMaxKeyLength, "%08d", item_id);
+ shard.Insert(string(key), record);
+ }
+ delete pixels;
+ shard.Flush();
+}
+
+int main(int argc, char** argv) {
+/*
+#ifndef GFLAGS_GFLAGS_H_
+ namespace gflags = google;
+#endif
+ gflags::SetUsageMessage("This program create a DataShard for a MNIST dataset\n"
+ "Usage:\n"
+ " create_shard.bin input_image_file input_label_file output_db_file\n"
+ "The MNIST dataset could be downloaded at\n"
+ " http://yann.lecun.com/exdb/mnist/\n"
+ "You should gunzip them after downloading.");
+ gflags::ParseCommandLineFlags(&argc, &argv, true);
+ gflags::ShowUsageWithFlagsRestrict(argv[0],
+ "examples/mnist/create_shard.bin");
+*/
+
+ if (argc != 4) {
+ std::cout<<"This program create a DataShard for a MNIST dataset\n"
+ "Usage:\n"
+ " create_shard.bin input_image_file input_label_file output_db_file\n"
+ "The MNIST dataset could be downloaded at\n"
+ " http://yann.lecun.com/exdb/mnist/\n"
+ "You should gunzip them after downloading.";
+ } else {
+ google::InitGoogleLogging(argv[0]);
+ create_shard(argv[1], argv[2], argv[3]);
+ }
+ return 0;
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/mlp-lmdb.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/mlp-lmdb.conf b/examples/mnist/mlp-lmdb.conf
new file mode 100644
index 0000000..d0ed08f
--- /dev/null
+++ b/examples/mnist/mlp-lmdb.conf
@@ -0,0 +1,223 @@
+name: "deep-big-simple-mlp"
+train_steps: 10000
+test_steps:10
+test_frequency:60
+display_frequency:30
+checkpoint_frequency:120
+updater{
+ base_learning_rate: 0.001
+ learning_rate_change_method: kStep
+ learning_rate_change_frequency: 60
+ gamma: 0.997
+ param_type: "Param"
+}
+
+neuralnet {
+layer {
+ name: "data"
+ type: "kLMDBData"
+ data_param {
+ path: "/home/wangwei/program/singa/examples/mnist/mnist_train_lmdb"
+ batchsize: 1000
+ random_skip: 10000
+ }
+ exclude: kTest
+}
+
+layer {
+ name: "data"
+ type: "kLMDBData"
+ data_param {
+ path: "/home/wangwei/program/singa/examples/mnist/mnist_test_lmdb"
+ batchsize: 1000
+ }
+ exclude: kTrain
+}
+
+layer{
+ name:"mnist"
+ type: "kMnistImage"
+ srclayers: "data"
+ mnist_param {
+# sigma: 6
+# alpha: 38
+# gamma: 15
+# kernel: 21
+# elastic_freq:100
+# beta:15
+# resize: 29
+ norm_a: 127.5
+ norm_b: 1
+ }
+}
+
+
+layer{
+ name: "label"
+ type: "kLabel"
+ srclayers: "data"
+}
+
+layer{
+ name: "fc1"
+ type: "kInnerProduct"
+ srclayers:"mnist"
+ inner_product_param{
+ num_output: 2500
+ }
+ param{
+ name: "weight"
+ init_method: kUniform
+ low:-0.05
+ high:0.05
+ }
+ param{
+ name: "bias"
+ init_method: kUniform
+ low: -0.05
+ high:0.05
+ }
+}
+
+layer{
+ name: "tanh1"
+ type:"kTanh"
+ srclayers:"fc1"
+}
+layer{
+ name: "fc2"
+ type: "kInnerProduct"
+ srclayers:"tanh1"
+ inner_product_param{
+ num_output: 2000
+ }
+ param{
+ name: "weight"
+ init_method: kUniform
+ low:-0.05
+ high:0.05
+ }
+ param{
+ name: "bias"
+ init_method: kUniform
+ low: -0.05
+ high:0.05
+ }
+}
+
+layer{
+ name: "tanh2"
+ type:"kTanh"
+ srclayers:"fc2"
+}
+layer{
+ name: "fc3"
+ type: "kInnerProduct"
+ srclayers:"tanh2"
+ inner_product_param{
+ num_output: 1500
+ }
+ param{
+ name: "weight"
+ init_method: kUniform
+ low:-0.05
+ high:0.05
+ }
+ param{
+ name: "bias"
+ init_method: kUniform
+ low: -0.05
+ high:0.05
+ }
+
+}
+
+layer{
+ name: "tanh3"
+ type:"kTanh"
+ srclayers:"fc3"
+}
+layer{
+ name: "fc4"
+ type: "kInnerProduct"
+ srclayers:"tanh3"
+ inner_product_param{
+ num_output: 1000
+ }
+ param{
+ name: "weight"
+ init_method: kUniform
+ low:-0.05
+ high:0.05
+ }
+ param{
+ name: "bias"
+ init_method: kUniform
+ low: -0.05
+ high:0.05
+ }
+
+}
+
+layer{
+ name: "tanh4"
+ type:"kTanh"
+ srclayers:"fc4"
+}
+layer{
+ name: "fc5"
+ type: "kInnerProduct"
+ srclayers:"tanh4"
+ inner_product_param{
+ num_output: 500
+ }
+ param{
+ name: "weight"
+ init_method: kUniform
+ low:-0.05
+ high:0.05
+ }
+ param{
+ name: "bias"
+ init_method: kUniform
+ low: -0.05
+ high:0.05
+ }
+
+}
+
+layer{
+ name: "tanh5"
+ type:"kTanh"
+ srclayers:"fc5"
+}
+layer{
+ name: "fc6"
+ type: "kInnerProduct"
+ srclayers:"tanh5"
+ inner_product_param{
+ num_output: 10
+ }
+ param{
+ name: "weight"
+ init_method: kUniform
+ low:-0.05
+ high:0.05
+ }
+ param{
+ name: "bias"
+ init_method: kUniform
+ low: -0.05
+ high:0.05
+ }
+}
+layer{
+ name: "loss"
+ type:"kSoftmaxLoss"
+ softmaxloss_param{
+ topk:1
+ }
+ srclayers:"fc6"
+ srclayers:"label"
+}
+}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/mlp.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/mlp.conf b/examples/mnist/mlp.conf
index d0ed08f..9eeb1c6 100644
--- a/examples/mnist/mlp.conf
+++ b/examples/mnist/mlp.conf
@@ -3,7 +3,6 @@ train_steps: 10000
test_steps:10
test_frequency:60
display_frequency:30
-checkpoint_frequency:120
updater{
base_learning_rate: 0.001
learning_rate_change_method: kStep
@@ -15,20 +14,19 @@ updater{
neuralnet {
layer {
name: "data"
- type: "kLMDBData"
+ type: "kShardData"
data_param {
- path: "/home/wangwei/program/singa/examples/mnist/mnist_train_lmdb"
+ path: "examples/mnist/mnist_train_shard"
batchsize: 1000
- random_skip: 10000
}
exclude: kTest
}
layer {
name: "data"
- type: "kLMDBData"
+ type: "kShardData"
data_param {
- path: "/home/wangwei/program/singa/examples/mnist/mnist_test_lmdb"
+ path: "examples/mnist/mnist_test_shard"
batchsize: 1000
}
exclude: kTrain