You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/01/02 16:20:02 UTC

[1/7] incubator-singa git commit: SIGNA-97 Add HDFS Store

Repository: incubator-singa
Updated Branches:
  refs/heads/master 9ff176c30 -> 714fd2cb3


SIGNA-97 Add HDFS Store

Check with cpplint
Remove 'make compile' option in Makefile.example
Add check for unrecognized backend type


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/714fd2cb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/714fd2cb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/714fd2cb

Branch: refs/heads/master
Commit: 714fd2cb35484a682dec8c01b9f80c88339b767d
Parents: befe5ca
Author: WANG Sheng <wa...@gmail.com>
Authored: Sat Jan 2 19:22:31 2016 +0800
Committer: WANG Sheng <wa...@gmail.com>
Committed: Sat Jan 2 19:58:14 2016 +0800

----------------------------------------------------------------------
 examples/cifar10/Makefile.example |  8 +-------
 examples/cifar10/create_data.cc   |  3 ++-
 examples/mnist/Makefile.example   |  8 +-------
 examples/mnist/create_data.cc     |  5 +++--
 include/singa/io/kvfile_store.h   |  2 +-
 include/singa/io/textfile_store.h |  2 +-
 src/io/kvfile_store.cc            |  2 +-
 src/io/store.cc                   | 16 ++++++++++------
 8 files changed, 20 insertions(+), 26 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/714fd2cb/examples/cifar10/Makefile.example
----------------------------------------------------------------------
diff --git a/examples/cifar10/Makefile.example b/examples/cifar10/Makefile.example
index 16dc052..72c326b 100644
--- a/examples/cifar10/Makefile.example
+++ b/examples/cifar10/Makefile.example
@@ -30,11 +30,6 @@ cifar-10-binary-bin:
 	wget http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
 	tar xf cifar-10-binary.tar.gz
 
-compile:
-	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -lhdfs3 \
-		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \
-		-Wl,-rpath=../../.libs/  -o create_data.bin
-
 create:
 	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog \
 		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \
@@ -42,8 +37,7 @@ create:
 	./create_data.bin cifar-10-batches-bin .
 
 create_hdfs:
-	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog \
+	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -lhdfs3 \
 		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \
 		-Wl,-rpath=../../.libs/  -o create_data.bin
 	./create_data.bin cifar-10-batches-bin $(HDFS_DIR) 
-

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/714fd2cb/examples/cifar10/create_data.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/create_data.cc b/examples/cifar10/create_data.cc
index 37c58f6..5564c38 100644
--- a/examples/cifar10/create_data.cc
+++ b/examples/cifar10/create_data.cc
@@ -69,7 +69,8 @@ void create_data(const string& input_folder, const string& output_folder) {
   for (int i = 0; i < kCIFARImageNBytes; i++)
     mean.add_data(0.f);
 
-  string store_backend = (output_folder.find("hdfs")!=-1) ? "hdfsfile" : "kvfile";  
+  string store_backend = output_folder.find("hdfs") !=-1 ?
+                         "hdfsfile" : "kvfile";
   auto store = singa::io::CreateStore(store_backend);
   CHECK(store->Open(output_folder + "/train_data.bin", singa::io::kCreate));
   LOG(INFO) << "Preparing training data";

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/714fd2cb/examples/mnist/Makefile.example
----------------------------------------------------------------------
diff --git a/examples/mnist/Makefile.example b/examples/mnist/Makefile.example
index 8c1c838..7b516e7 100644
--- a/examples/mnist/Makefile.example
+++ b/examples/mnist/Makefile.example
@@ -26,7 +26,6 @@ libs :=singa glog protobuf
 HDFS_MNIST_TRAIN := hdfs://node0:9000/examples/mnist/train_data.bin
 HDFS_MNIST_TEST := hdfs://node0:9000/examples/mnist/test_data.bin
 
-
 download: mnist
 
 mnist:
@@ -37,11 +36,6 @@ mnist:
 	gunzip train-images-idx3-ubyte.gz && gunzip train-labels-idx1-ubyte.gz
 	gunzip t10k-images-idx3-ubyte.gz && gunzip t10k-labels-idx1-ubyte.gz
 
-compile:
-	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -lhdfs3 -I../../include \
-		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \
-		-o create_data.bin
-
 create:
 	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -I../../include \
 		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \
@@ -50,7 +44,7 @@ create:
 	./create_data.bin t10k-images-idx3-ubyte t10k-labels-idx1-ubyte test_data.bin
 
 create_hdfs:
-	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -I../../include \
+	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -lhdfs3 -I../../include \
 		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \
 		-o create_data.bin
 	./create_data.bin train-images-idx3-ubyte train-labels-idx1-ubyte $(HDFS_MNIST_TRAIN)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/714fd2cb/examples/mnist/create_data.cc
----------------------------------------------------------------------
diff --git a/examples/mnist/create_data.cc b/examples/mnist/create_data.cc
index ff166b4..34c287f 100644
--- a/examples/mnist/create_data.cc
+++ b/examples/mnist/create_data.cc
@@ -79,8 +79,9 @@ void create_data(const char* image_filename, const char* label_filename,
   cols = swap_endian(cols);
 
   // read backend from the job.conf
-  string store_backend = (string(output).find("hdfs")!=-1) ? "hdfsfile" : "kvfile";  
-	auto store = singa::io::OpenStore(store_backend, output, singa::io::kCreate);
+  string store_backend = string(output).find("hdfs") != -1 ?
+                         "hdfsfile" : "kvfile";
+  auto store = singa::io::OpenStore(store_backend, output, singa::io::kCreate);
   char label;
   char* pixels = new char[rows * cols];
   int count = 0;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/714fd2cb/include/singa/io/kvfile_store.h
----------------------------------------------------------------------
diff --git a/include/singa/io/kvfile_store.h b/include/singa/io/kvfile_store.h
index 73e4127..50b8f4f 100644
--- a/include/singa/io/kvfile_store.h
+++ b/include/singa/io/kvfile_store.h
@@ -41,7 +41,7 @@ class KVFileStore : public Store {
   void Close() override;
   bool Read(std::string* key, std::string* value) override;
   void SeekToFirst() override;
-  void Seek(int offset) override; 
+  void Seek(int offset) override;
   bool Write(const std::string& key, const std::string& value) override;
   void Flush() override;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/714fd2cb/include/singa/io/textfile_store.h
----------------------------------------------------------------------
diff --git a/include/singa/io/textfile_store.h b/include/singa/io/textfile_store.h
index 2cc6571..83bcbfa 100644
--- a/include/singa/io/textfile_store.h
+++ b/include/singa/io/textfile_store.h
@@ -41,7 +41,7 @@ class TextFileStore : public Store {
   void Close() override;
   bool Read(std::string* key, std::string* value) override;
   void SeekToFirst() override;
-  void Seek(int offset) override; 
+  void Seek(int offset) override;
   bool Write(const std::string& key, const std::string& value) override;
   void Flush() override;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/714fd2cb/src/io/kvfile_store.cc
----------------------------------------------------------------------
diff --git a/src/io/kvfile_store.cc b/src/io/kvfile_store.cc
index e77f49b..a2a40cd 100644
--- a/src/io/kvfile_store.cc
+++ b/src/io/kvfile_store.cc
@@ -56,7 +56,7 @@ void KVFileStore::SeekToFirst() {
   file_->SeekToFirst();
 }
 
-void KVFileStore::Seek(int offset){
+void KVFileStore::Seek(int offset) {
   LOG(FATAL) << "Operation not supported.";
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/714fd2cb/src/io/store.cc
----------------------------------------------------------------------
diff --git a/src/io/store.cc b/src/io/store.cc
index f2a4404..4621772 100644
--- a/src/io/store.cc
+++ b/src/io/store.cc
@@ -20,13 +20,15 @@
 *************************************************************/
 
 #include "singa/io/store.h"
+#include <glog/logging.h>
 #include "singa/io/kvfile_store.h"
 #include "singa/io/textfile_store.h"
 #ifdef USE_HDFS
 #include "singa/io/hdfs_store.h"
 #endif
 
-namespace singa { namespace io {
+namespace singa {
+namespace io {
 
 Store* CreateStore(const std::string& backend) {
   Store *store = nullptr;
@@ -38,21 +40,23 @@ Store* CreateStore(const std::string& backend) {
 
 #ifdef USE_LMDB
   if (backend == "lmdb") {
-    return new LMDBStore();
+    store = new LMDBStore();
   }
 #endif
 
 #ifdef USE_OPENCV
   if (backend == "imagefolder") {
-    store =  new ImageFolderStore();
+    store = new ImageFolderStore();
   }
 #endif
 
 #ifdef USE_HDFS
   if (backend == "hdfsfile") {
-    store =  new HDFSStore();
+    store = new HDFSStore();
   }
 #endif
+
+  CHECK(store) << "Backend type (" << backend << ") not recognized";
   return store;
 }
 
@@ -61,6 +65,6 @@ Store* OpenStore(const string& backend, const string& path, Mode mode) {
   store->Open(path, mode);
   return store;
 }
-} /* io */
 
-} /* singa */
+}  // namespace io
+}  // namespace singa


[3/7] incubator-singa git commit: SINGA-97 Add HDFS Store

Posted by wa...@apache.org.
SINGA-97 Add HDFS Store

Revert changes to the examples directory, assuming that users upload data
to HDFS manually.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/374f11d6
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/374f11d6
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/374f11d6

Branch: refs/heads/master
Commit: 374f11d62e83df6b681c36f9557b6825ab8c236a
Parents: 8a07a29
Author: Anh Dinh <ug...@gmail.com>
Authored: Mon Dec 28 15:51:40 2015 +0800
Committer: WANG Sheng <wa...@gmail.com>
Committed: Sat Jan 2 19:58:14 2016 +0800

----------------------------------------------------------------------
 Makefile.am                       |  2 +-
 examples/cifar10/Makefile.example |  5 -----
 examples/cifar10/create_data.cc   | 30 ++++++++++++------------------
 examples/mnist/Makefile.example   |  5 -----
 examples/mnist/create_data.cc     | 13 +------------
 include/singa/io/hdfsfile.h       |  2 +-
 src/io/kvfile_store.cc            |  4 +++-
 7 files changed, 18 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/374f11d6/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index d78a150..cc308f7 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -33,7 +33,7 @@ CFLAGS += $(DEBUG)
 CXXFLAGS += $(DEBUG)
 AC_CXXFLAGS = $(DEBUG)
 
-INCLUDES = -I$(top_srcdir)/include -I/usr/local/include/hdfs
+INCLUDES = -I$(top_srcdir)/include -I/usr/local/include
 
 PROTOS := $(top_srcdir)/src/proto/singa.proto \
           $(top_srcdir)/src/proto/job.proto \

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/374f11d6/examples/cifar10/Makefile.example
----------------------------------------------------------------------
diff --git a/examples/cifar10/Makefile.example b/examples/cifar10/Makefile.example
index 775e165..dd65d7d 100644
--- a/examples/cifar10/Makefile.example
+++ b/examples/cifar10/Makefile.example
@@ -28,11 +28,6 @@ cifar-10-binary-bin:
 	wget http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
 	tar xf cifar-10-binary.tar.gz
 
-compile:
-	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -lhdfs3 \
-		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \
-		-Wl,-rpath=../../.libs/  -o create_data.bin
-
 create:
 	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog \
 		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/374f11d6/examples/cifar10/create_data.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/create_data.cc b/examples/cifar10/create_data.cc
index 05169d8..5873c0e 100644
--- a/examples/cifar10/create_data.cc
+++ b/examples/cifar10/create_data.cc
@@ -19,16 +19,16 @@
 *
 *************************************************************/
 
-
-/**
- * Create training and test DataShard for CIFAR dataset. 
- * It is adapted from convert_cifar_data from Caffe. 
- *    create_shard.bin <input> <output_folder> 
- * 
- * Read from JobConf object the option to use KVfile, HDFS or other (1st layer
- * store_conf object). 
- * To load to HDFS, specify "hdfs://namenode/examples" as the output folder
- */
+//
+// This code creates training and test DataShard for CIFAR dataset.
+// It is adapted from the convert_cifar_data from Caffe
+//
+// Usage:
+//    create_shard.bin input_folder output_folder
+//
+// The CIFAR dataset could be downloaded at
+//    http://www.cs.toronto.edu/~kriz/cifar.html
+//
 
 #include <glog/logging.h>
 #include <fstream>
@@ -38,8 +38,6 @@
 
 #include "singa/io/store.h"
 #include "singa/proto/common.pb.h"
-#include "singa/utils/common.h"
-#include "singa/proto/job.pb.h"
 
 using std::string;
 
@@ -47,7 +45,6 @@ const int kCIFARSize = 32;
 const int kCIFARImageNBytes = 3072;
 const int kCIFARBatchSize = 10000;
 const int kCIFARTrainBatches = 5;
-const char JOB_CONFIG[] = "job.conf";
 
 void read_image(std::ifstream* file, int* label, char* buffer) {
   char label_char;
@@ -61,6 +58,7 @@ void create_data(const string& input_folder, const string& output_folder) {
   int label;
   char str_buffer[kCIFARImageNBytes];
   string rec_buf;
+
   singa::RecordProto image;
   image.add_shape(3);
   image.add_shape(kCIFARSize);
@@ -71,11 +69,7 @@ void create_data(const string& input_folder, const string& output_folder) {
   for (int i = 0; i < kCIFARImageNBytes; i++)
     mean.add_data(0.f);
 
-  singa::JobProto job_proto;
-  singa::ReadProtoFromTextFile(JOB_CONFIG, &job_proto);
-  string store_backend =
-        job_proto.neuralnet().layer(0).store_conf().backend();
-  auto store = singa::io::CreateStore(store_backend);
+  auto store = singa::io::CreateStore("kvfile");
   CHECK(store->Open(output_folder + "/train_data.bin", singa::io::kCreate));
   LOG(INFO) << "Preparing training data";
   int count = 0;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/374f11d6/examples/mnist/Makefile.example
----------------------------------------------------------------------
diff --git a/examples/mnist/Makefile.example b/examples/mnist/Makefile.example
index ba2308b..733633d 100644
--- a/examples/mnist/Makefile.example
+++ b/examples/mnist/Makefile.example
@@ -33,11 +33,6 @@ mnist:
 	gunzip train-images-idx3-ubyte.gz && gunzip train-labels-idx1-ubyte.gz
 	gunzip t10k-images-idx3-ubyte.gz && gunzip t10k-labels-idx1-ubyte.gz
 
-compile:
-	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -lhdfs3 -I../../include \
-		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \
-		-o create_data.bin
-
 create:
 	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -I../../include \
 		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/374f11d6/examples/mnist/create_data.cc
----------------------------------------------------------------------
diff --git a/examples/mnist/create_data.cc b/examples/mnist/create_data.cc
index 66a4905..5e51e97 100644
--- a/examples/mnist/create_data.cc
+++ b/examples/mnist/create_data.cc
@@ -38,19 +38,14 @@
 #include "singa/io/store.h"
 #include "singa/utils/common.h"
 #include "singa/proto/common.pb.h"
-#include "singa/proto/job.pb.h"
 
 using std::string;
 
-const char JOB_CONFIG[] = "job.conf";
-
 uint32_t swap_endian(uint32_t val) {
     val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
     return (val << 16) | (val >> 16);
 }
 
-// output is the full path, unlike create_data in CIFAR with only
-// specifies the directory
 void create_data(const char* image_filename, const char* label_filename,
         const char* output) {
   // Open files
@@ -81,13 +76,7 @@ void create_data(const char* image_filename, const char* label_filename,
   image_file.read(reinterpret_cast<char*>(&cols), 4);
   cols = swap_endian(cols);
 
-  // read backend from the job.conf
-  singa::JobProto job_proto;
-  singa::ReadProtoFromTextFile(JOB_CONFIG.c_str(), &job_proto);
-  string store_backend =
-    job_proto.neuralnet().layer(0).store_conf().backend();
-
-  auto store = singa::io::OpenStore(store_backend, output, singa::io::kCreate);
+  auto store = singa::io::OpenStore("kvfile", output, singa::io::kCreate);
   char label;
   char* pixels = new char[rows * cols];
   int count = 0;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/374f11d6/include/singa/io/hdfsfile.h
----------------------------------------------------------------------
diff --git a/include/singa/io/hdfsfile.h b/include/singa/io/hdfsfile.h
index f92910e..cd3ded3 100644
--- a/include/singa/io/hdfsfile.h
+++ b/include/singa/io/hdfsfile.h
@@ -33,7 +33,7 @@
 #include <google/protobuf/message.h>
 #endif
 
-#include <hdfs.h>
+#include <hdfs/hdfs.h>
 
 namespace singa {
 namespace io {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/374f11d6/src/io/kvfile_store.cc
----------------------------------------------------------------------
diff --git a/src/io/kvfile_store.cc b/src/io/kvfile_store.cc
index 79e2a40..e77f49b 100644
--- a/src/io/kvfile_store.cc
+++ b/src/io/kvfile_store.cc
@@ -56,7 +56,9 @@ void KVFileStore::SeekToFirst() {
   file_->SeekToFirst();
 }
 
-void KVFileStore::Seek(int offset){}
+void KVFileStore::Seek(int offset){
+  LOG(FATAL) << "Operation not supported.";
+}
 
 bool KVFileStore::Write(const std::string& key, const std::string& value) {
   CHECK_NE(mode_, kRead);


[7/7] incubator-singa git commit: SINGA-97 Add HDFS Store

Posted by wa...@apache.org.
SINGA-97 Add HDFS Store

minor change in makefile to make compile without hdfs correct


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/4cfe8137
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/4cfe8137
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/4cfe8137

Branch: refs/heads/master
Commit: 4cfe81373f25b4e4e6f76daf8983ebac3995388a
Parents: 9fbc8ee
Author: WANG Sheng <wa...@gmail.com>
Authored: Fri Jan 1 13:42:12 2016 +0800
Committer: WANG Sheng <wa...@gmail.com>
Committed: Sat Jan 2 19:58:14 2016 +0800

----------------------------------------------------------------------
 Makefile.am     | 2 --
 src/io/store.cc | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4cfe8137/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index a4315c8..3c282e3 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -149,8 +149,6 @@ SINGA_HDRS := include/singa.h \
               include/singa/io/kvfile_store.h \
               include/singa/io/textfile_store.h \
               include/mshadow/cxxnet_op.h \
-              include/singa/io/hdfsfile.h \
-              include/singa/io/hdfsfile_store.h \
               include/mshadow/tensor_expr.h \
               include/mshadow/tensor_container.h \
               include/mshadow/tensor_expr_ext.h \

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4cfe8137/src/io/store.cc
----------------------------------------------------------------------
diff --git a/src/io/store.cc b/src/io/store.cc
index 1e5a17f..f2a4404 100644
--- a/src/io/store.cc
+++ b/src/io/store.cc
@@ -22,9 +22,9 @@
 #include "singa/io/store.h"
 #include "singa/io/kvfile_store.h"
 #include "singa/io/textfile_store.h"
+#ifdef USE_HDFS
 #include "singa/io/hdfs_store.h"
-
-#define USE_HDFS 1
+#endif
 
 namespace singa { namespace io {
 


[5/7] incubator-singa git commit: SINGA-97 Add HDFS Store

Posted by wa...@apache.org.
SINGA-97 Add HDFS Store

Modify compilation files. Now as a user, one can build SINGA with hdfs support by running:
	./configure --enable-hdfs --with-libhdfs=/PATH/TO/HDFS3
--with-libhdfs is optional as by default the path is /usr/local/.wq


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/9fbc8ee7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/9fbc8ee7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/9fbc8ee7

Branch: refs/heads/master
Commit: 9fbc8ee7aabbbdc2f76cdcccdf346e14d4544f1a
Parents: 374f11d
Author: xiezl <xi...@comp.nus.edu.sg>
Authored: Thu Dec 31 15:25:01 2015 +0800
Committer: WANG Sheng <wa...@gmail.com>
Committed: Sat Jan 2 19:58:14 2016 +0800

----------------------------------------------------------------------
 Makefile.am  | 36 ++++++++++++++++++++++++++----------
 configure.ac | 50 +++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 69 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9fbc8ee7/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index cc308f7..a4315c8 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -29,11 +29,11 @@ MSHADOW_FLAGS = -DMSHADOW_USE_CUDA=0 -DMSHADOW_USE_CBLAS=1 -DMSHADOW_USE_MKL=0
 DEFAULT_FLAGS = -Wall -pthread -fPIC -std=c++11 -Wno-unknown-pragmas \
               $(MSHADOW_FLAGS) -funroll-loops -DTHREADED
 
-CFLAGS += $(DEBUG)
-CXXFLAGS += $(DEBUG)
-AC_CXXFLAGS = $(DEBUG)
+CFLAGS = $(DEBUG)
+CXXFLAGS = $(DEBUG)
+#AC_CXXFLAGS = $(DEBUG)
 
-INCLUDES = -I$(top_srcdir)/include -I/usr/local/include
+INCLUDES = -I$(top_srcdir)/include 
 
 PROTOS := $(top_srcdir)/src/proto/singa.proto \
           $(top_srcdir)/src/proto/job.proto \
@@ -62,6 +62,11 @@ CUDNN_SRCS := src/neuralnet/loss_layer/cudnn_softmaxloss.cc \
 PY_SRCS := tool/python/singa/driver_wrap.cxx \
 		   src/driver.cc
 
+HDFS_SRCS := src/io/hdfsfile.cc \
+			 src/io/hdfsfile_store.cc 
+HDFS_HDRS := include/singa/io/hdfsfile.h \
+			 include/singa/io/hdfsfile_store.h 
+
 SINGA_SRCS := src/driver.cc \
               src/server.cc \
               src/worker.cc \
@@ -102,8 +107,6 @@ SINGA_SRCS := src/driver.cc \
               src/io/kvfile_store.cc \
               src/io/textfile_store.cc \
               src/io/store.cc \
-              src/io/hdfsfile.cc \
-              src/io/hdfsfile_store.cc \
               src/utils/cluster.cc \
               src/utils/cluster_rt.cc \
               src/utils/graph.cc \
@@ -202,6 +205,11 @@ libsinga_la_CXXFLAGS += $(CUDNN_CFLAGS)
 libsinga_la_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS)
 endif
 
+if DHDFS
+libsinga_la_SOURCES += $(HDFS_SRCS)
+libsinga_la_CXXFLAGS += $(HDFS_CFLAGS)
+libsinga_la_LDFLAGS += $(HDFS_LDFLAGS) $(HDFS_LIBS)
+endif
 
 #bin_PROGRAMS = singa
 singa_SOURCES = src/main.cc
@@ -213,8 +221,7 @@ singa_LDFLAGS = -lsinga \
                 -lopenblas \
                 -lzmq \
                 -lczmq \
-                -lzookeeper_mt \
-                -lhdfs3
+                -lzookeeper_mt 
 if LMDB
 singa_LDFLAGS += -llmdb
 endif
@@ -231,6 +238,11 @@ singa_CXXFLAGS += $(CUDNN_CFLAGS)
 singa_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS)
 endif
 
+if DHDFS
+singa_SOURCES += $(HDFS_SRCS)
+singa_CXXFLAGS += $(HDFS_CFLAGS)
+singa_LDFLAGS += $(HDFS_LDFLAGS) $(HDFS_LIBS)
+endif
 #bin_PROGRAMS += singatool
 singatool_SOURCES = src/utils/tool.cc #$(CUDA_SRCS) $(CUDA_HDRS) $(CUDNN_SRCS)
 singatool_CXXFLAGS = -Wall -pthread -fPIC -std=c++11 -MMD -Wno-unknown-pragmas \
@@ -238,8 +250,7 @@ singatool_CXXFLAGS = -Wall -pthread -fPIC -std=c++11 -MMD -Wno-unknown-pragmas \
 singatool_LDFLAGS = -lsinga \
                     -lglog  \
                     -lprotobuf \
-                    -lzookeeper_mt \
-                    -lhdfs3
+                    -lzookeeper_mt 
 
 #if DCUDA
 #singatool_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)  
@@ -253,6 +264,11 @@ singatool_LDFLAGS = -lsinga \
 #singatool_LDFLAGS += $(CUDNN_LDFLAGS) $(CUDNN_LIBS)
 #endif
 
+if DHDFS
+singatool_SOURCES += $(HDFS_SRCS)
+singatool_CXXFLAGS += $(HDFS_CFLAGS)
+singatool_LDFLAGS += $(HDFS_LDFLAGS) $(HDFS_LIBS)
+endif
 #lib_LTLIBRARIES += libgtest.la
 libgtest_la_SOURCES = $(GTEST_HDRS) $(GTEST_SRCS)
 libgtest_la_CXXFLAGS = $(DEFAULT_FLAGS) -msse3 -fpermissive -I$(top_srcdir)/include

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/9fbc8ee7/configure.ac
----------------------------------------------------------------------
diff --git a/configure.ac b/configure.ac
index bde1d8e..82db55a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -55,7 +55,6 @@ if test "$cuda_prefix" == "yes"; then
     fi
 fi
 
-
 if test x"$cudaval" = x"yes"; then
     AC_MSG_CHECKING([nvcc in $cuda_prefix/bin])
     if test -x "$cuda_prefix/bin/nvcc"; then
@@ -158,23 +157,60 @@ if test x"$enable_lmdb" = x"yes"; then
   AC_DEFINE(LMDB, 1, [Enable Option layer])
 fi
 
+PROGS=''
+LTLIBS=''
+AC_ARG_ENABLE(hdfs,
+  AS_HELP_STRING([--enable-hdfs],[enable hdfs support]),
+  [enable_hdfs=yes],[enable_hdfs=no])
+AM_CONDITIONAL(DHDFS, test "$enable_hdfs" = yes)
+
+AC_ARG_WITH([libhdfs],
+    [AS_HELP_STRING([--with-libhdfs=PATH], [prefix where libhdfs is installed])],
+    [hdfs_prefix=$withval], [hdfs_prefix="/usr/local"])
+if test "$hdfs_prefix" == "yes"; then
+    if test "$withval" == "yes"; then
+        cudnn_prefix="/usr/local"
+    fi
+fi
+
+if test x"$enable_hdfs" != x"no"; then
+  HDFS_CFLAGS="-I$hdfs_prefix/include"
+  HDFS_LDFLAGS="-L$hdfs_prefix/lib"
+  HDFS_LIBS="-lhdfs3"
+  LIBS="$LIBS $HDFS_LIBS"
+  LDFLAGS="$LDFLAGS $HDFS_LDFLAGS"
+  DEBUG+=" -DUSE_HDFS"
+  AC_DEFINE(DHDFS,[1],[Defined if HDFS should be used])
+  AC_CHECK_LIB([hdfs3], [main], [], [
+      AC_MSG_ERROR([unable to find hdfs3 library])
+      ])
+else
+  HDFS_CFLAGS=""
+  HDFS_LDFLAGS=""
+  HDFS_LIBS=""
+fi
+
+AC_SUBST(HDFS_CFLAGS)
+AC_SUBST(HDFS_LDFLAGS)
+AC_SUBST(HDFS_LIBS)
+
 AC_ARG_ENABLE(test,
   AS_HELP_STRING([--enable-test],[enable singa test]),
   [enable_test=yes],[enable_test=no])
 AM_CONDITIONAL(SINGATEST, test "$enable_test" = yes)
 if test x"$enable_test" != x"no"; then
-  PROGS='singatest test '
-  LTLIBS='libgtest.la '
+  PROGS+='singatest test '
+  LTLIBS+='libgtest.la '
 else
-  PROGS=''
-  LTLIBS=''
+  PROGS+=''
+  LTLIBS+=''
 fi
 
 AC_ARG_ENABLE(debug,
   AS_HELP_STRING([--enable-debug],[enable debug mode]),
   [enable_debug=yes],[enable_debug=no])
-AM_CONDITIONAL(DEBUG, test "$enable_debug" = yes)
-if test x"$enable_debug" != x"no"; then
+AM_CONDITIONAL(SINGADEBUG, [test "$enable_debug" = yes])
+if test x"$enable_debug" == x"yes"; then
   DEBUG+=' -g'
 else
   DEBUG+=' -O2'


[4/7] incubator-singa git commit: SINGA-97 Add HDFS Store

Posted by wa...@apache.org.
SINGA-97 Add HDFS Store

This ticket implements HDFS Store for reading data from HDFS. It complements
the existing CSV Store which reads data from CSV file. HDFS is the popular
distributed file system with high (sequential) I/O throughputs, thus supporting
it is necessary in order for SINGA to scale.

HDFS usage in SINGA is different to that in standard MapReduce applications.
Specifically, each SINGA worker may train on sequences of records which do not
lie within block boundary, whereas in MapReduce  each Mapper process a number
of complete blocks.  In MapReduce, the runtime engine may fetch and cache the
entire block over the network, knowing that the block will be processed
entirely. In SINGA, such pre-fetching and caching strategy will be sub-optimal
because it wastes I/O and network bandwidth on data records which are not used.

We defer I/O optimization to a future ticket.

For implementation, we choose `libhdfs3` from Pivotal for HDFS implementation
in C++. This library is built natively for C++, hence it is more optimized and
easier to deploy than the original  `libhdfs` library that is shipped with
Hadoop. libhdfs3 makes extensive use of short-circuit reads to improve local
reads, and it often complain when such option is not set.

Finally, we test the implementation in a distributed environment set up from a
number of  Docker containers. We test with both CIFAR and MNIST examples.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8a07a294
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8a07a294
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8a07a294

Branch: refs/heads/master
Commit: 8a07a29462c6d8ad1d2da17da4a018dfc327c121
Parents: 9ff176c
Author: Anh Dinh <ug...@gmail.com>
Authored: Thu Nov 26 17:44:15 2015 +0800
Committer: WANG Sheng <wa...@gmail.com>
Committed: Sat Jan 2 19:58:14 2016 +0800

----------------------------------------------------------------------
 Makefile.am                       |  12 ++-
 examples/cifar10/Makefile.example |   5 ++
 examples/cifar10/create_data.cc   |  30 +++++---
 examples/mnist/Makefile.example   |   5 ++
 examples/mnist/create_data.cc     |  13 +++-
 include/singa/io/hdfs_store.h     |  38 +++++++++-
 include/singa/io/hdfsfile.h       | 131 ++++++++++++++++++++++++++++++++
 include/singa/io/kvfile_store.h   |   1 +
 include/singa/io/store.h          |   6 ++
 include/singa/io/textfile_store.h |   1 +
 src/io/hdfsfile.cc                | 135 +++++++++++++++++++++++++++++++++
 src/io/hdfsfile_store.cc          |  75 ++++++++++++++++++
 src/io/kvfile_store.cc            |   3 +
 src/io/store.cc                   |  10 ++-
 src/io/textfile_store.cc          |   3 +
 15 files changed, 448 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/Makefile.am
----------------------------------------------------------------------
diff --git a/Makefile.am b/Makefile.am
index 6466f92..d78a150 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -33,7 +33,7 @@ CFLAGS += $(DEBUG)
 CXXFLAGS += $(DEBUG)
 AC_CXXFLAGS = $(DEBUG)
 
-INCLUDES = -I$(top_srcdir)/include
+INCLUDES = -I$(top_srcdir)/include -I/usr/local/include/hdfs
 
 PROTOS := $(top_srcdir)/src/proto/singa.proto \
           $(top_srcdir)/src/proto/job.proto \
@@ -102,6 +102,8 @@ SINGA_SRCS := src/driver.cc \
               src/io/kvfile_store.cc \
               src/io/textfile_store.cc \
               src/io/store.cc \
+              src/io/hdfsfile.cc \
+              src/io/hdfsfile_store.cc \
               src/utils/cluster.cc \
               src/utils/cluster_rt.cc \
               src/utils/graph.cc \
@@ -144,6 +146,8 @@ SINGA_HDRS := include/singa.h \
               include/singa/io/kvfile_store.h \
               include/singa/io/textfile_store.h \
               include/mshadow/cxxnet_op.h \
+              include/singa/io/hdfsfile.h \
+              include/singa/io/hdfsfile_store.h \
               include/mshadow/tensor_expr.h \
               include/mshadow/tensor_container.h \
               include/mshadow/tensor_expr_ext.h \
@@ -209,7 +213,8 @@ singa_LDFLAGS = -lsinga \
                 -lopenblas \
                 -lzmq \
                 -lczmq \
-                -lzookeeper_mt
+                -lzookeeper_mt \
+                -lhdfs3
 if LMDB
 singa_LDFLAGS += -llmdb
 endif
@@ -233,7 +238,8 @@ singatool_CXXFLAGS = -Wall -pthread -fPIC -std=c++11 -MMD -Wno-unknown-pragmas \
 singatool_LDFLAGS = -lsinga \
                     -lglog  \
                     -lprotobuf \
-                    -lzookeeper_mt 
+                    -lzookeeper_mt \
+                    -lhdfs3
 
 #if DCUDA
 #singatool_SOURCES += $(CUDA_SRCS) $(CUDA_HDRS)  

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/examples/cifar10/Makefile.example
----------------------------------------------------------------------
diff --git a/examples/cifar10/Makefile.example b/examples/cifar10/Makefile.example
index dd65d7d..775e165 100644
--- a/examples/cifar10/Makefile.example
+++ b/examples/cifar10/Makefile.example
@@ -28,6 +28,11 @@ cifar-10-binary-bin:
 	wget http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
 	tar xf cifar-10-binary.tar.gz
 
+compile:
+	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -lhdfs3 \
+		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \
+		-Wl,-rpath=../../.libs/  -o create_data.bin
+
 create:
 	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog \
 		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/examples/cifar10/create_data.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/create_data.cc b/examples/cifar10/create_data.cc
index 5873c0e..05169d8 100644
--- a/examples/cifar10/create_data.cc
+++ b/examples/cifar10/create_data.cc
@@ -19,16 +19,16 @@
 *
 *************************************************************/
 
-//
-// This code creates training and test DataShard for CIFAR dataset.
-// It is adapted from the convert_cifar_data from Caffe
-//
-// Usage:
-//    create_shard.bin input_folder output_folder
-//
-// The CIFAR dataset could be downloaded at
-//    http://www.cs.toronto.edu/~kriz/cifar.html
-//
+
+/**
+ * Create training and test DataShard for CIFAR dataset. 
+ * It is adapted from convert_cifar_data from Caffe. 
+ *    create_shard.bin <input> <output_folder> 
+ * 
+ * Read from JobConf object the option to use KVfile, HDFS or other (1st layer
+ * store_conf object). 
+ * To load to HDFS, specify "hdfs://namenode/examples" as the output folder
+ */
 
 #include <glog/logging.h>
 #include <fstream>
@@ -38,6 +38,8 @@
 
 #include "singa/io/store.h"
 #include "singa/proto/common.pb.h"
+#include "singa/utils/common.h"
+#include "singa/proto/job.pb.h"
 
 using std::string;
 
@@ -45,6 +47,7 @@ const int kCIFARSize = 32;
 const int kCIFARImageNBytes = 3072;
 const int kCIFARBatchSize = 10000;
 const int kCIFARTrainBatches = 5;
+const char JOB_CONFIG[] = "job.conf";
 
 void read_image(std::ifstream* file, int* label, char* buffer) {
   char label_char;
@@ -58,7 +61,6 @@ void create_data(const string& input_folder, const string& output_folder) {
   int label;
   char str_buffer[kCIFARImageNBytes];
   string rec_buf;
-
   singa::RecordProto image;
   image.add_shape(3);
   image.add_shape(kCIFARSize);
@@ -69,7 +71,11 @@ void create_data(const string& input_folder, const string& output_folder) {
   for (int i = 0; i < kCIFARImageNBytes; i++)
     mean.add_data(0.f);
 
-  auto store = singa::io::CreateStore("kvfile");
+  singa::JobProto job_proto;
+  singa::ReadProtoFromTextFile(JOB_CONFIG, &job_proto);
+  string store_backend =
+        job_proto.neuralnet().layer(0).store_conf().backend();
+  auto store = singa::io::CreateStore(store_backend);
   CHECK(store->Open(output_folder + "/train_data.bin", singa::io::kCreate));
   LOG(INFO) << "Preparing training data";
   int count = 0;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/examples/mnist/Makefile.example
----------------------------------------------------------------------
diff --git a/examples/mnist/Makefile.example b/examples/mnist/Makefile.example
index 733633d..ba2308b 100644
--- a/examples/mnist/Makefile.example
+++ b/examples/mnist/Makefile.example
@@ -33,6 +33,11 @@ mnist:
 	gunzip train-images-idx3-ubyte.gz && gunzip train-labels-idx1-ubyte.gz
 	gunzip t10k-images-idx3-ubyte.gz && gunzip t10k-labels-idx1-ubyte.gz
 
+compile:
+	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -lhdfs3 -I../../include \
+		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \
+		-o create_data.bin
+
 create:
 	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -I../../include \
 		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/examples/mnist/create_data.cc
----------------------------------------------------------------------
diff --git a/examples/mnist/create_data.cc b/examples/mnist/create_data.cc
index 5e51e97..66a4905 100644
--- a/examples/mnist/create_data.cc
+++ b/examples/mnist/create_data.cc
@@ -38,14 +38,19 @@
 #include "singa/io/store.h"
 #include "singa/utils/common.h"
 #include "singa/proto/common.pb.h"
+#include "singa/proto/job.pb.h"
 
 using std::string;
 
+const char JOB_CONFIG[] = "job.conf";
+
 uint32_t swap_endian(uint32_t val) {
     val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF);
     return (val << 16) | (val >> 16);
 }
 
+// output is the full path, unlike create_data in CIFAR with only
+// specifies the directory
 void create_data(const char* image_filename, const char* label_filename,
         const char* output) {
   // Open files
@@ -76,7 +81,13 @@ void create_data(const char* image_filename, const char* label_filename,
   image_file.read(reinterpret_cast<char*>(&cols), 4);
   cols = swap_endian(cols);
 
-  auto store = singa::io::OpenStore("kvfile", output, singa::io::kCreate);
+  // read backend from the job.conf
+  singa::JobProto job_proto;
+  singa::ReadProtoFromTextFile(JOB_CONFIG.c_str(), &job_proto);
+  string store_backend =
+    job_proto.neuralnet().layer(0).store_conf().backend();
+
+  auto store = singa::io::OpenStore(store_backend, output, singa::io::kCreate);
   char label;
   char* pixels = new char[rows * cols];
   int count = 0;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/include/singa/io/hdfs_store.h
----------------------------------------------------------------------
diff --git a/include/singa/io/hdfs_store.h b/include/singa/io/hdfs_store.h
index f85615b..1fb9258 100644
--- a/include/singa/io/hdfs_store.h
+++ b/include/singa/io/hdfs_store.h
@@ -19,4 +19,40 @@
 *
 *************************************************************/
 
-// TODO(wangwei) use hdfs as data storage
+#ifndef SINGA_IO_HDFS_STORE_H_
+#define SINGA_IO_HDFS_STORE_H_
+
+#include <string>
+#include "singa/io/store.h"
+#include "singa/io/hdfsfile.h"
+
+namespace singa {
+namespace io {
+
+/**
+ * HDFS implementation of the Store interface. The store manages key-value 
+ * records storing in HDFS files. 
+ *
+ * The store consists of records of the following format:
+ *      [<length><content>] 
+ */
+class HDFSStore : public Store {
+ public:
+  ~HDFSStore() { Close();}
+  bool Open(const std::string& source, Mode mode) override;
+  void Close() override;
+  bool Read(std::string* key, std::string* value) override;
+  void SeekToFirst() override;
+  void Seek(int offset) override;
+  bool Write(const std::string& key, const std::string& value) override;
+  void Flush() override;
+
+ private:
+  HDFSFile* file_ = nullptr;
+  Mode mode_;
+};
+
+}  // namespace io
+}  // namespace singa
+
+#endif  // SINGA_IO_HDFS_STORE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/include/singa/io/hdfsfile.h
----------------------------------------------------------------------
diff --git a/include/singa/io/hdfsfile.h b/include/singa/io/hdfsfile.h
new file mode 100644
index 0000000..f92910e
--- /dev/null
+++ b/include/singa/io/hdfsfile.h
@@ -0,0 +1,131 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#ifndef SINGA_IO_HDFSFILE_H_
+#define SINGA_IO_HDFSFILE_H_
+
+#include <fstream>
+#include <string>
+#include <unordered_set>
+
+
+#define USE_PROTOBUF 1
+
+#ifdef USE_PROTOBUF
+#include <google/protobuf/message.h>
+#endif
+
+#include <hdfs.h>
+
+namespace singa {
+namespace io {
+
+/**
+ * HDFSFile represents a specific partition of the HDFS file storing training/validation
+ * or test data. HDFS library maintains its own buffer, so we don't need one. 
+ * 
+ * Each record is of the form: <length><content>
+ */
+class HDFSFile {
+ public:
+  enum Mode {
+    // read only mode used in training
+    kRead = 0,
+    // write mode used in creating HDFSFile (will overwrite previous one)
+    kCreate = 1,
+    // append mode, e.g. used when previous creating crashes
+    kAppend = 2
+  };
+
+  /**
+   * HDFSFile constructor.
+   *
+   * @param path path to file, of the form "hdfs://namenode/file_path"
+   * @param mode HDFSFile::kRead, HDFSFile::kCreate or HDFSFile::kAppend
+   */
+  HDFSFile(const std::string& path, Mode mode);
+  ~HDFSFile();
+
+#ifdef USE_PROTOBUF
+  /**
+   * read next tuple from the HDFSFile.
+   *
+   * @param val Record of type Message
+   * @return false if read unsuccess, e.g., the tuple was not inserted
+   *         completely.
+   */
+  bool Next(google::protobuf::Message* val);
+  /**
+   * Append one record to the HDFSFile.
+   *
+   * @param val
+   * @return false if unsucess, e.g., inserted before
+   */
+  bool Insert(const google::protobuf::Message& tuple);
+#endif
+
+  /**
+   * Read next record from the HDFSFile.
+   *
+   * @param val Record of type string
+   * @return false if unsuccess, e.g. the tuple was not inserted completely.
+   */
+  bool Next(std::string* val);
+  /**
+   * Append record to the KVFile.
+   *
+   * @param key e.g., image path
+   * @param val
+   * @return false if unsucess, e.g., inserted before
+   */
+  bool Insert(const std::string& tuple);
+  /**
+   * Move the read pointer to the head of the KVFile file.
+   * Used for repeated reading.
+   */
+  void Seek(int offset);
+
+  /**
+   * Flush buffered data to disk.
+   * Used only for kCreate or kAppend.
+   */
+  void Flush();
+    /**
+   * @return path to HDFSFile file
+   */
+  inline std::string path() { return path_; }
+
+ private:
+  std::string path_ = "";
+  Mode mode_;
+  // handle to HDFS
+  hdfsFS fs_;
+  // handle to the HDFS open file
+  hdfsFile file_;
+
+  //!< to avoid replicated record
+  std::unordered_set<std::string> keys_;
+};
+}  // namespace io
+
+}  // namespace singa
+
+#endif  // SINGA_IO_HDFSFILE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/include/singa/io/kvfile_store.h
----------------------------------------------------------------------
diff --git a/include/singa/io/kvfile_store.h b/include/singa/io/kvfile_store.h
index 74ff127..73e4127 100644
--- a/include/singa/io/kvfile_store.h
+++ b/include/singa/io/kvfile_store.h
@@ -41,6 +41,7 @@ class KVFileStore : public Store {
   void Close() override;
   bool Read(std::string* key, std::string* value) override;
   void SeekToFirst() override;
+  void Seek(int offset) override; 
   bool Write(const std::string& key, const std::string& value) override;
   void Flush() override;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/include/singa/io/store.h
----------------------------------------------------------------------
diff --git a/include/singa/io/store.h b/include/singa/io/store.h
index 15afb6a..a63a981 100644
--- a/include/singa/io/store.h
+++ b/include/singa/io/store.h
@@ -68,6 +68,12 @@ class Store {
    * Seek the read header to the first tuple.
    */
   virtual void SeekToFirst() = 0;
+
+  /**
+   * Seek to an offset. This allows concurrent workers to start reading from
+   * different positions (HDFS). 
+   */
+  virtual void Seek(int offset) = 0; 
   /**
    * Write a tuple.
    *

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/include/singa/io/textfile_store.h
----------------------------------------------------------------------
diff --git a/include/singa/io/textfile_store.h b/include/singa/io/textfile_store.h
index dcc559d..2cc6571 100644
--- a/include/singa/io/textfile_store.h
+++ b/include/singa/io/textfile_store.h
@@ -41,6 +41,7 @@ class TextFileStore : public Store {
   void Close() override;
   bool Read(std::string* key, std::string* value) override;
   void SeekToFirst() override;
+  void Seek(int offset) override; 
   bool Write(const std::string& key, const std::string& value) override;
   void Flush() override;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/src/io/hdfsfile.cc
----------------------------------------------------------------------
diff --git a/src/io/hdfsfile.cc b/src/io/hdfsfile.cc
new file mode 100644
index 0000000..e093d81
--- /dev/null
+++ b/src/io/hdfsfile.cc
@@ -0,0 +1,135 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "singa/io/hdfsfile.h"
+
+#include <glog/logging.h>
+#include <iostream>
+namespace singa {
+namespace io {
+
+HDFSFile::HDFSFile(const std::string& path, Mode mode): path_(path),
+  mode_(mode) {
+  // check that path starts with hdfs://
+  CHECK_EQ(path.find("hdfs://"), 0);
+
+  // extract namenode from path
+  int path_idx = path.find_first_of("/", 7);
+  int colon_idx = path.find_first_of(":", 7);
+  std::string namenode = path.substr(7, colon_idx-7);
+  int port = atoi(path.substr(colon_idx+1, path_idx-colon_idx-1).c_str());
+  std::string filepath = path.substr(path_idx);
+
+  // connect to HDFS
+  fs_ = hdfsConnect(namenode.c_str(), port);
+  CHECK_NOTNULL(fs_);
+
+  if (mode == HDFSFile::kRead) {
+    file_ = hdfsOpenFile(fs_, filepath.c_str(), O_RDONLY, 0, 0, 0);
+  } else {
+    // check if the directory exists, create it if not.
+    int file_idx = path.find_last_of("/");
+    std::string hdfs_directory_path = path.substr(path_idx, file_idx-path_idx);
+    if (hdfsExists(fs_, hdfs_directory_path.c_str()) == -1)
+      CHECK_EQ(hdfsCreateDirectory(fs_, hdfs_directory_path.c_str()), 0);
+    file_ = hdfsOpenFile(fs_, filepath.c_str(), O_WRONLY, 0, 0, 0);
+  }
+
+  CHECK_NOTNULL(file_);
+}
+
+HDFSFile::~HDFSFile() {
+  if (mode_ != HDFSFile::kRead)
+    Flush();
+  hdfsCloseFile(fs_, file_);
+}
+
+#ifdef USE_PROTOBUF
+bool HDFSFile::Next(google::protobuf::Message* val) {
+  // read from file_, then turns it to a message
+  // red size, then content
+  int size;
+  if (hdfsRead(fs_, file_, &size, sizeof(int)) <= 0)
+    return false;
+  char *temp_buf = reinterpret_cast<char*>(malloc(size*sizeof(char)));
+  CHECK(hdfsRead(fs_, file_, temp_buf, size));
+  val->ParseFromArray(temp_buf, size);
+  free(temp_buf);
+  return true;
+}
+
+bool HDFSFile::Insert(const google::protobuf::Message& val) {
+  std::string str;
+  val.SerializeToString(&str);
+  return Insert(str);
+}
+#endif
+
+bool HDFSFile::Next(std::string* val) {
+  char size_buf[sizeof(int)];
+  // a hack to read across blocks. The first read my return in complete data,
+  // so try the second read.
+  int read_size_size = hdfsRead(fs_, file_, size_buf, sizeof(int));
+
+  if (read_size_size == 0)
+    return false;
+
+  if (read_size_size < (static_cast<int>(sizeof(int))))
+    CHECK_EQ(hdfsRead(fs_, file_, size_buf+read_size_size,
+      sizeof(int)-read_size_size),
+      sizeof(int)-read_size_size);
+  int size;
+  memcpy(&size, size_buf, sizeof(int));
+
+  char *temp_buf = reinterpret_cast<char*>(malloc(size*sizeof(char)));
+
+  int read_size = hdfsRead(fs_, file_, temp_buf, size);
+  if (read_size < size)
+    CHECK_EQ(hdfsRead(fs_, file_, temp_buf+read_size, size-read_size),
+      size-read_size);
+  val->clear();
+  val->append(temp_buf, size);
+  free(temp_buf);
+  return true;
+}
+
+// append one record to the end of the file
+bool HDFSFile::Insert(const std::string& val) {
+  CHECK(mode_ != HDFSFile::kRead);
+  // write length, then content
+  int size = val.length();
+  CHECK_EQ(hdfsWrite(fs_, file_, &size, sizeof(int)), sizeof(int));
+  CHECK_EQ(hdfsWrite(fs_, file_, val.c_str(), val.length()), val.length());
+  return true;
+}
+
+void HDFSFile::Seek(int offset) {
+  CHECK_EQ(mode_, kRead);
+  // seek back to the parition offset
+  CHECK_EQ(hdfsSeek(fs_, file_, offset), 0);
+}
+
+void HDFSFile::Flush() {
+  CHECK_EQ(hdfsFlush(fs_, file_), 0);
+}
+
+}  // namespace io
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/src/io/hdfsfile_store.cc
----------------------------------------------------------------------
diff --git a/src/io/hdfsfile_store.cc b/src/io/hdfsfile_store.cc
new file mode 100644
index 0000000..9464169
--- /dev/null
+++ b/src/io/hdfsfile_store.cc
@@ -0,0 +1,75 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include <glog/logging.h>
+#include "singa/io/hdfs_store.h"
+
+namespace singa {
+namespace io {
+
+bool HDFSStore::Open(const std::string& source, Mode mode) {
+  CHECK(file_ == nullptr);
+  if (mode == kRead)
+    file_ = new HDFSFile(source, HDFSFile::kRead);
+  else if (mode == kCreate)
+    file_ = new HDFSFile(source, HDFSFile::kCreate);
+  else if (mode == kAppend)
+    file_ = new HDFSFile(source, HDFSFile::kAppend);
+  mode_ = mode;
+  return file_ != nullptr;
+}
+
+void HDFSStore::Close() {
+  if (file_ != nullptr)
+    delete file_;
+  file_ = nullptr;
+}
+
+bool HDFSStore::Read(std::string* key, std::string* value) {
+  CHECK_EQ(mode_, kRead);
+  CHECK(file_ != nullptr);
+  return file_->Next(value);
+}
+
+void HDFSStore::SeekToFirst() {
+  CHECK_EQ(mode_, kRead);
+  CHECK(file_ != nullptr);
+  file_->Seek(0);
+}
+
+void HDFSStore::Seek(int offset) {
+  file_->Seek(offset);
+}
+
+bool HDFSStore::Write(const std::string& key, const std::string& value) {
+  CHECK_NE(mode_, kRead);
+  CHECK(file_ != nullptr);
+  return file_->Insert(value);
+}
+
+void HDFSStore::Flush() {
+  CHECK_NE(mode_, kRead);
+  CHECK(file_!= nullptr);
+  file_->Flush();
+}
+
+}  // namespace io
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/src/io/kvfile_store.cc
----------------------------------------------------------------------
diff --git a/src/io/kvfile_store.cc b/src/io/kvfile_store.cc
index fbf6982..79e2a40 100644
--- a/src/io/kvfile_store.cc
+++ b/src/io/kvfile_store.cc
@@ -55,6 +55,9 @@ void KVFileStore::SeekToFirst() {
   CHECK(file_ != nullptr);
   file_->SeekToFirst();
 }
+
+void KVFileStore::Seek(int offset){}
+
 bool KVFileStore::Write(const std::string& key, const std::string& value) {
   CHECK_NE(mode_, kRead);
   CHECK(file_ != nullptr);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/src/io/store.cc
----------------------------------------------------------------------
diff --git a/src/io/store.cc b/src/io/store.cc
index 530ca58..1e5a17f 100644
--- a/src/io/store.cc
+++ b/src/io/store.cc
@@ -22,8 +22,12 @@
 #include "singa/io/store.h"
 #include "singa/io/kvfile_store.h"
 #include "singa/io/textfile_store.h"
+#include "singa/io/hdfs_store.h"
+
+#define USE_HDFS 1
 
 namespace singa { namespace io {
+
 Store* CreateStore(const std::string& backend) {
   Store *store = nullptr;
   if (backend.compare("textfile") == 0) {
@@ -40,13 +44,13 @@ Store* CreateStore(const std::string& backend) {
 
 #ifdef USE_OPENCV
   if (backend == "imagefolder") {
-    return new ImageFolderStore();
+    store =  new ImageFolderStore();
   }
 #endif
 
 #ifdef USE_HDFS
-  if (backend == "hdfs") {
-    return new HDFSStore();
+  if (backend == "hdfsfile") {
+    store =  new HDFSStore();
   }
 #endif
   return store;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8a07a294/src/io/textfile_store.cc
----------------------------------------------------------------------
diff --git a/src/io/textfile_store.cc b/src/io/textfile_store.cc
index e203517..4c2f1b9 100644
--- a/src/io/textfile_store.cc
+++ b/src/io/textfile_store.cc
@@ -70,6 +70,9 @@ void TextFileStore::SeekToFirst() {
   fs_->seekg(0);
 }
 
+void TextFileStore::Seek(int offset) {
+}
+
 bool TextFileStore::Write(const std::string& key, const std::string& value) {
   CHECK_NE(mode_, kRead);
   CHECK(fs_ != nullptr);


[2/7] incubator-singa git commit: SINGA-97 Add HDFS Store

Posted by wa...@apache.org.
SINGA-97  Add HDFS Store

Change create_data.cc to support "make create_hdfs" which creates and uploads data
to HDFS. Default HDFS directory is "hdfs://node0:9000/examples/cifar10", which can be
customized with "make create_hdfs HDFS_DIR=xxx".


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/aada3658
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/aada3658
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/aada3658

Branch: refs/heads/master
Commit: aada36581c9ec8965976ef5aa91311ae5f52ad70
Parents: 4cfe813
Author: Anh Dinh <ug...@gmail.com>
Authored: Fri Jan 1 18:46:52 2016 +0800
Committer: WANG Sheng <wa...@gmail.com>
Committed: Sat Jan 2 19:58:14 2016 +0800

----------------------------------------------------------------------
 examples/cifar10/Makefile.example | 14 ++++++++++++++
 examples/cifar10/create_data.cc   | 25 +++++++++++++------------
 examples/mnist/Makefile.example   | 14 ++++++++++++++
 examples/mnist/create_data.cc     |  6 +++++-
 4 files changed, 46 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/aada3658/examples/cifar10/Makefile.example
----------------------------------------------------------------------
diff --git a/examples/cifar10/Makefile.example b/examples/cifar10/Makefile.example
index dd65d7d..16dc052 100644
--- a/examples/cifar10/Makefile.example
+++ b/examples/cifar10/Makefile.example
@@ -22,14 +22,28 @@ libs :=singa glog protobuf
 
 .PHONY: all download create
 
+HDFS_DIR := hdfs://node0:9000/examples/cifar10
+
 download: cifar-10-binary-bin
 
 cifar-10-binary-bin:
 	wget http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
 	tar xf cifar-10-binary.tar.gz
 
+compile:
+	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -lhdfs3 \
+		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \
+		-Wl,-rpath=../../.libs/  -o create_data.bin
+
 create:
 	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog \
 		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \
 		-Wl,-rpath=../../.libs/  -o create_data.bin
 	./create_data.bin cifar-10-batches-bin .
+
+create_hdfs:
+	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog \
+		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \
+		-Wl,-rpath=../../.libs/  -o create_data.bin
+	./create_data.bin cifar-10-batches-bin $(HDFS_DIR) 
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/aada3658/examples/cifar10/create_data.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/create_data.cc b/examples/cifar10/create_data.cc
index 5873c0e..37c58f6 100644
--- a/examples/cifar10/create_data.cc
+++ b/examples/cifar10/create_data.cc
@@ -19,16 +19,16 @@
 *
 *************************************************************/
 
-//
-// This code creates training and test DataShard for CIFAR dataset.
-// It is adapted from the convert_cifar_data from Caffe
-//
-// Usage:
-//    create_shard.bin input_folder output_folder
-//
-// The CIFAR dataset could be downloaded at
-//    http://www.cs.toronto.edu/~kriz/cifar.html
-//
+
+/**
+ * Create training and test DataShard for CIFAR dataset. 
+ * It is adapted from convert_cifar_data from Caffe. 
+ *    create_shard.bin <input> <output_folder> 
+ * 
+ * Read from JobConf object the option to use KVfile, HDFS or other (1st layer
+ * store_conf object). 
+ * To load to HDFS, specify "hdfs://namenode/examples" as the output folder
+ */
 
 #include <glog/logging.h>
 #include <fstream>
@@ -38,6 +38,7 @@
 
 #include "singa/io/store.h"
 #include "singa/proto/common.pb.h"
+#include "singa/utils/common.h"
 
 using std::string;
 
@@ -58,7 +59,6 @@ void create_data(const string& input_folder, const string& output_folder) {
   int label;
   char str_buffer[kCIFARImageNBytes];
   string rec_buf;
-
   singa::RecordProto image;
   image.add_shape(3);
   image.add_shape(kCIFARSize);
@@ -69,7 +69,8 @@ void create_data(const string& input_folder, const string& output_folder) {
   for (int i = 0; i < kCIFARImageNBytes; i++)
     mean.add_data(0.f);
 
-  auto store = singa::io::CreateStore("kvfile");
+  string store_backend = (output_folder.find("hdfs")!=-1) ? "hdfsfile" : "kvfile";  
+  auto store = singa::io::CreateStore(store_backend);
   CHECK(store->Open(output_folder + "/train_data.bin", singa::io::kCreate));
   LOG(INFO) << "Preparing training data";
   int count = 0;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/aada3658/examples/mnist/Makefile.example
----------------------------------------------------------------------
diff --git a/examples/mnist/Makefile.example b/examples/mnist/Makefile.example
index 733633d..48d2fd8 100644
--- a/examples/mnist/Makefile.example
+++ b/examples/mnist/Makefile.example
@@ -23,6 +23,8 @@ libs :=singa glog protobuf
 
 .PHONY: all download create
 
+HDFS_DIR := hdfs://node0:9000/examples/cifar10
+
 download: mnist
 
 mnist:
@@ -33,9 +35,21 @@ mnist:
 	gunzip train-images-idx3-ubyte.gz && gunzip train-labels-idx1-ubyte.gz
 	gunzip t10k-images-idx3-ubyte.gz && gunzip t10k-labels-idx1-ubyte.gz
 
+compile:
+	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -lhdfs3 -I../../include \
+		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \
+		-o create_data.bin
+
 create:
 	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -I../../include \
 		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \
 		-o create_data.bin
 	./create_data.bin train-images-idx3-ubyte train-labels-idx1-ubyte train_data.bin
 	./create_data.bin t10k-images-idx3-ubyte t10k-labels-idx1-ubyte test_data.bin
+
+create_hdfs:
+	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog \
+		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \
+		-Wl,-rpath=../../.libs/  -o create_data.bin
+	./create_data.bin cifar-10-batches-bin $(HDFS_DIR) 
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/aada3658/examples/mnist/create_data.cc
----------------------------------------------------------------------
diff --git a/examples/mnist/create_data.cc b/examples/mnist/create_data.cc
index 5e51e97..59da860 100644
--- a/examples/mnist/create_data.cc
+++ b/examples/mnist/create_data.cc
@@ -46,6 +46,8 @@ uint32_t swap_endian(uint32_t val) {
     return (val << 16) | (val >> 16);
 }
 
+// output is the full path, unlike create_data in CIFAR with only
+// specifies the directory
 void create_data(const char* image_filename, const char* label_filename,
         const char* output) {
   // Open files
@@ -76,7 +78,9 @@ void create_data(const char* image_filename, const char* label_filename,
   image_file.read(reinterpret_cast<char*>(&cols), 4);
   cols = swap_endian(cols);
 
-  auto store = singa::io::OpenStore("kvfile", output, singa::io::kCreate);
+  // read backend from the job.conf
+  string store_backend = (output_folder.find("hdfs")!=-1) ? "hdfsfile" : "kvfile";  
+  auto store = singa::io::CreateStore(store_backend);
   char label;
   char* pixels = new char[rows * cols];
   int count = 0;


[6/7] incubator-singa git commit: SINGA-97 Add HDFS Store

Posted by wa...@apache.org.
SINGA-97 Add HDFS Store

Minor change to Makefile.example to get MNIST HDFS uploader to work.
Use HDFS_MNIST_TRAIN and HDFS_MNIST_TEST to specify the destination directories.

Fixed minor bug in mnist/create_data.cc


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/befe5ca5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/befe5ca5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/befe5ca5

Branch: refs/heads/master
Commit: befe5ca51c5959fec0f04b4ce06c02a3b9e409e7
Parents: aada365
Author: Anh Dinh <ug...@gmail.com>
Authored: Sat Jan 2 16:16:55 2016 +0800
Committer: WANG Sheng <wa...@gmail.com>
Committed: Sat Jan 2 19:58:14 2016 +0800

----------------------------------------------------------------------
 examples/mnist/Makefile.example | 14 ++++++++------
 examples/mnist/create_data.cc   |  4 ++--
 2 files changed, 10 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/befe5ca5/examples/mnist/Makefile.example
----------------------------------------------------------------------
diff --git a/examples/mnist/Makefile.example b/examples/mnist/Makefile.example
index 48d2fd8..8c1c838 100644
--- a/examples/mnist/Makefile.example
+++ b/examples/mnist/Makefile.example
@@ -23,7 +23,9 @@ libs :=singa glog protobuf
 
 .PHONY: all download create
 
-HDFS_DIR := hdfs://node0:9000/examples/cifar10
+HDFS_MNIST_TRAIN := hdfs://node0:9000/examples/mnist/train_data.bin
+HDFS_MNIST_TEST := hdfs://node0:9000/examples/mnist/test_data.bin
+
 
 download: mnist
 
@@ -48,8 +50,8 @@ create:
 	./create_data.bin t10k-images-idx3-ubyte t10k-labels-idx1-ubyte test_data.bin
 
 create_hdfs:
-	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog \
-		-I../../include -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs \
-		-Wl,-rpath=../../.libs/  -o create_data.bin
-	./create_data.bin cifar-10-batches-bin $(HDFS_DIR) 
-
+	$(CXX) create_data.cc -std=c++11 -lsinga -lprotobuf -lglog -I../../include \
+		-L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \
+		-o create_data.bin
+	./create_data.bin train-images-idx3-ubyte train-labels-idx1-ubyte $(HDFS_MNIST_TRAIN)
+	./create_data.bin t10k-images-idx3-ubyte t10k-labels-idx1-ubyte $(HDFS_MNIST_TEST)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/befe5ca5/examples/mnist/create_data.cc
----------------------------------------------------------------------
diff --git a/examples/mnist/create_data.cc b/examples/mnist/create_data.cc
index 59da860..ff166b4 100644
--- a/examples/mnist/create_data.cc
+++ b/examples/mnist/create_data.cc
@@ -79,8 +79,8 @@ void create_data(const char* image_filename, const char* label_filename,
   cols = swap_endian(cols);
 
   // read backend from the job.conf
-  string store_backend = (output_folder.find("hdfs")!=-1) ? "hdfsfile" : "kvfile";  
-  auto store = singa::io::CreateStore(store_backend);
+  string store_backend = (string(output).find("hdfs")!=-1) ? "hdfsfile" : "kvfile";  
+	auto store = singa::io::OpenStore(store_backend, output, singa::io::kCreate);
   char label;
   char* pixels = new char[rows * cols];
   int count = 0;