You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2015/05/09 18:05:50 UTC

incubator-singa git commit: update example of training MLP on MNIST dataset (replacing lmdb with DataShard)

Repository: incubator-singa
Updated Branches:
  refs/heads/master 654d733ba -> 48b8fea59


update example of training MLP on MNIST dataset (replacing lmdb with DataShard)


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/48b8fea5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/48b8fea5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/48b8fea5

Branch: refs/heads/master
Commit: 48b8fea5949ff1ba0baee72deb455136c177525e
Parents: 654d733
Author: wang wei <wa...@comp.nus.edu.sg>
Authored: Sat May 9 22:50:29 2015 +0800
Committer: wang wei <wa...@comp.nus.edu.sg>
Committed: Sat May 9 22:50:29 2015 +0800

----------------------------------------------------------------------
 examples/mnist/cluster.conf    |   4 +-
 examples/mnist/create_shard.cc | 115 +++++++++++++++++++
 examples/mnist/mlp-lmdb.conf   | 223 ++++++++++++++++++++++++++++++++++++
 examples/mnist/mlp.conf        |  10 +-
 4 files changed, 345 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/cluster.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/cluster.conf b/examples/mnist/cluster.conf
index 1953d1d..6b8a8e6 100644
--- a/examples/mnist/cluster.conf
+++ b/examples/mnist/cluster.conf
@@ -1,3 +1,5 @@
 nworker_groups: 1
 nserver_groups: 1
-workspace: "/data1/wangwei/singa/data/mnist"
+nservers_per_group: 1
+nworkers_per_group: 1
+workspace: "examples/cifar10/"

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/create_shard.cc
----------------------------------------------------------------------
diff --git a/examples/mnist/create_shard.cc b/examples/mnist/create_shard.cc
new file mode 100644
index 0000000..f545b80
--- /dev/null
+++ b/examples/mnist/create_shard.cc
@@ -0,0 +1,115 @@
+//
+// This code creates DataShard for MNIST dataset.
+// It is adapted from the convert_mnist_data from Caffe
+//
+// Usage:
+//    create_shard.bin input_image_file input_label_file output_folder
+// The MNIST dataset could be downloaded at
+//    http://yann.lecun.com/exdb/mnist/
+
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <cstdint>
+#include <iostream>
+
+#include <fstream>
+#include <string>
+
+#include "utils/data_shard.h"
+#include "utils/common.h"
+#include "proto/model.pb.h"
+
+using singa::DataShard;
+using singa::WriteProtoToBinaryFile;
+using std::string;
+
+uint32_t swap_endian(uint32_t val) {
+    val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
+    return (val << 16) | (val >> 16);
+}
+
+void create_shard(const char* image_filename, const char* label_filename,
+        const char* output) {
+  // Open files
+  std::ifstream image_file(image_filename, std::ios::in | std::ios::binary);
+  std::ifstream label_file(label_filename, std::ios::in | std::ios::binary);
+  CHECK(image_file) << "Unable to open file " << image_filename;
+  CHECK(label_file) << "Unable to open file " << label_filename;
+  // Read the magic and the meta data
+  uint32_t magic;
+  uint32_t num_items;
+  uint32_t num_labels;
+  uint32_t rows;
+  uint32_t cols;
+
+  image_file.read(reinterpret_cast<char*>(&magic), 4);
+  magic = swap_endian(magic);
+  CHECK_EQ(magic, 2051) << "Incorrect image file magic.";
+  label_file.read(reinterpret_cast<char*>(&magic), 4);
+  magic = swap_endian(magic);
+  CHECK_EQ(magic, 2049) << "Incorrect label file magic.";
+  image_file.read(reinterpret_cast<char*>(&num_items), 4);
+  num_items = swap_endian(num_items);
+  label_file.read(reinterpret_cast<char*>(&num_labels), 4);
+  num_labels = swap_endian(num_labels);
+  CHECK_EQ(num_items, num_labels);
+  image_file.read(reinterpret_cast<char*>(&rows), 4);
+  rows = swap_endian(rows);
+  image_file.read(reinterpret_cast<char*>(&cols), 4);
+  cols = swap_endian(cols);
+
+  DataShard shard(output, DataShard::kCreate);
+  char label;
+  char* pixels = new char[rows * cols];
+  int count = 0;
+  const int kMaxKeyLength = 10;
+  char key[kMaxKeyLength];
+  string value;
+
+  singa::Record record;
+  singa::SingleLabelImageRecord* image=record.mutable_image();
+  image->add_shape(rows);
+  image->add_shape(cols);
+  LOG(INFO) << "A total of " << num_items << " items.";
+  LOG(INFO) << "Rows: " << rows << " Cols: " << cols;
+  for (int item_id = 0; item_id < num_items; ++item_id) {
+    image_file.read(pixels, rows * cols);
+    label_file.read(&label, 1);
+    image->set_pixel(pixels, rows*cols);
+    image->set_label(label);
+    snprintf(key, kMaxKeyLength, "%08d", item_id);
+    shard.Insert(string(key), record);
+  }
+  delete pixels;
+  shard.Flush();
+}
+
+int main(int argc, char** argv) {
+/*
+#ifndef GFLAGS_GFLAGS_H_
+  namespace gflags = google;
+#endif
+  gflags::SetUsageMessage("This program create a DataShard for a MNIST dataset\n"
+        "Usage:\n"
+        "    create_shard.bin  input_image_file input_label_file output_db_file\n"
+        "The MNIST dataset could be downloaded at\n"
+        "    http://yann.lecun.com/exdb/mnist/\n"
+        "You should gunzip them after downloading.");
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+    gflags::ShowUsageWithFlagsRestrict(argv[0],
+        "examples/mnist/create_shard.bin");
+*/
+
+  if (argc != 4) {
+    std::cout<<"This program create a DataShard for a MNIST dataset\n"
+        "Usage:\n"
+        "    create_shard.bin  input_image_file input_label_file output_db_file\n"
+        "The MNIST dataset could be downloaded at\n"
+        "    http://yann.lecun.com/exdb/mnist/\n"
+        "You should gunzip them after downloading.";
+  } else {
+    google::InitGoogleLogging(argv[0]);
+    create_shard(argv[1], argv[2], argv[3]);
+  }
+  return 0;
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/mlp-lmdb.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/mlp-lmdb.conf b/examples/mnist/mlp-lmdb.conf
new file mode 100644
index 0000000..d0ed08f
--- /dev/null
+++ b/examples/mnist/mlp-lmdb.conf
@@ -0,0 +1,223 @@
+name: "deep-big-simple-mlp"
+train_steps: 10000
+test_steps:10
+test_frequency:60
+display_frequency:30
+checkpoint_frequency:120
+updater{
+  base_learning_rate: 0.001
+  learning_rate_change_method: kStep
+  learning_rate_change_frequency: 60
+  gamma: 0.997
+  param_type: "Param"
+}
+
+neuralnet {
+layer {
+  name: "data"
+  type: "kLMDBData"
+  data_param {
+    path: "/home/wangwei/program/singa/examples/mnist/mnist_train_lmdb"
+    batchsize: 1000
+    random_skip: 10000
+  }
+  exclude: kTest
+}
+
+layer {
+  name: "data"
+  type: "kLMDBData"
+  data_param {
+    path: "/home/wangwei/program/singa/examples/mnist/mnist_test_lmdb"
+    batchsize: 1000
+  }
+  exclude: kTrain
+}
+
+layer{
+  name:"mnist"
+  type: "kMnistImage"
+  srclayers: "data"
+  mnist_param {
+#    sigma: 6
+#    alpha: 38
+#    gamma: 15
+#    kernel: 21
+#    elastic_freq:100
+#    beta:15
+#    resize: 29
+    norm_a: 127.5
+    norm_b: 1
+  }
+}
+
+
+layer{
+  name: "label"
+  type: "kLabel"
+  srclayers: "data"
+}
+
+layer{
+  name: "fc1"
+  type: "kInnerProduct"
+  srclayers:"mnist"
+  inner_product_param{
+    num_output: 2500
+  }
+  param{
+    name: "weight"
+    init_method: kUniform
+    low:-0.05
+    high:0.05
+  }
+  param{
+    name: "bias"
+    init_method: kUniform
+    low: -0.05
+    high:0.05
+  }
+}
+
+layer{
+  name: "tanh1"
+  type:"kTanh"
+  srclayers:"fc1"
+}
+layer{
+  name: "fc2"
+  type: "kInnerProduct"
+  srclayers:"tanh1"
+  inner_product_param{
+    num_output: 2000
+  }
+  param{
+    name: "weight"
+    init_method: kUniform
+    low:-0.05
+    high:0.05
+  }
+  param{
+    name: "bias"
+    init_method: kUniform
+    low: -0.05
+    high:0.05
+  }
+}
+
+layer{
+  name: "tanh2"
+  type:"kTanh"
+  srclayers:"fc2"
+}
+layer{
+  name: "fc3"
+  type: "kInnerProduct"
+  srclayers:"tanh2"
+  inner_product_param{
+    num_output: 1500
+  }
+  param{
+    name: "weight"
+    init_method: kUniform
+    low:-0.05
+    high:0.05
+  }
+  param{
+    name: "bias"
+    init_method: kUniform
+    low: -0.05
+    high:0.05
+  }
+
+}
+
+layer{
+  name: "tanh3"
+  type:"kTanh"
+  srclayers:"fc3"
+}
+layer{
+  name: "fc4"
+  type: "kInnerProduct"
+  srclayers:"tanh3"
+  inner_product_param{
+    num_output: 1000
+  }
+  param{
+    name: "weight"
+    init_method: kUniform
+    low:-0.05
+    high:0.05
+  }
+  param{
+    name: "bias"
+    init_method: kUniform
+    low: -0.05
+    high:0.05
+  }
+
+}
+
+layer{
+  name: "tanh4"
+  type:"kTanh"
+  srclayers:"fc4"
+}
+layer{
+  name: "fc5"
+  type: "kInnerProduct"
+  srclayers:"tanh4"
+  inner_product_param{
+    num_output: 500
+  }
+  param{
+    name: "weight"
+    init_method: kUniform
+    low:-0.05
+    high:0.05
+  }
+  param{
+    name: "bias"
+    init_method: kUniform
+    low: -0.05
+    high:0.05
+  }
+
+}
+
+layer{
+  name: "tanh5"
+  type:"kTanh"
+  srclayers:"fc5"
+}
+layer{
+  name: "fc6"
+  type: "kInnerProduct"
+  srclayers:"tanh5"
+  inner_product_param{
+    num_output: 10
+  }
+  param{
+    name: "weight"
+    init_method: kUniform
+    low:-0.05
+    high:0.05
+  }
+  param{
+    name: "bias"
+    init_method: kUniform
+    low: -0.05
+    high:0.05
+  }
+}
+layer{
+  name: "loss"
+  type:"kSoftmaxLoss"
+  softmaxloss_param{
+    topk:1
+  }
+  srclayers:"fc6"
+  srclayers:"label"
+}
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/mlp.conf
----------------------------------------------------------------------
diff --git a/examples/mnist/mlp.conf b/examples/mnist/mlp.conf
index d0ed08f..9eeb1c6 100644
--- a/examples/mnist/mlp.conf
+++ b/examples/mnist/mlp.conf
@@ -3,7 +3,6 @@ train_steps: 10000
 test_steps:10
 test_frequency:60
 display_frequency:30
-checkpoint_frequency:120
 updater{
   base_learning_rate: 0.001
   learning_rate_change_method: kStep
@@ -15,20 +14,19 @@ updater{
 neuralnet {
 layer {
   name: "data"
-  type: "kLMDBData"
+  type: "kShardData"
   data_param {
-    path: "/home/wangwei/program/singa/examples/mnist/mnist_train_lmdb"
+    path: "examples/mnist/mnist_train_shard"
     batchsize: 1000
-    random_skip: 10000
   }
   exclude: kTest
 }
 
 layer {
   name: "data"
-  type: "kLMDBData"
+  type: "kShardData"
   data_param {
-    path: "/home/wangwei/program/singa/examples/mnist/mnist_test_lmdb"
+    path: "examples/mnist/mnist_test_shard"
     batchsize: 1000
   }
   exclude: kTrain