You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/06/03 07:48:17 UTC

[12/60] incubator-singa git commit: SINGA-163 - Reorganize the project folder layout

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/input_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/input_layer.h b/include/singa/neuralnet/input_layer.h
deleted file mode 100644
index 0499c4b..0000000
--- a/include/singa/neuralnet/input_layer.h
+++ /dev/null
@@ -1,336 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_NEURALNET_INPUT_LAYER_H_
-#define SINGA_NEURALNET_INPUT_LAYER_H_
-
-#include <string>
-#include <vector>
-#include <thread>
-#include "singa/io/store.h"
-#include "singa/io/kvfile.h"
-#include "singa/neuralnet/layer.h"
-
-namespace singa {
-
-/**
- * Base class for loading data from Store.
- */
-class StoreInputLayer : virtual public InputLayer {
- public:
-  ~StoreInputLayer();
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-
- protected:
-  /**
-   * Helper method for doing the prefetching, basically read (key,value) pairs
-   * to buf_keys and buf_vals_ vector of size batchsize_.
-   */
-  void fetch_data();
-  /**
-   * Parsing the (key, val) tuple to get feature (and label).
-   * Subclasses must implment this function.
-   * @param[in] k parse this tuple as the k-th instance of one mini-batch.
-   * @param[in] flag used to guide the parsing, e.g., kDeploy phase should not
-   * parse labels from the tuple.
-   * @param[in] key
-   * @param[in] val
-   */
-  virtual bool Parse(int k, int flag, const string& key, const string& val) = 0;
-
- protected:
-  int batchsize_ = 1;
-  int random_skip_ = 0;
-  io::Store* store_ = nullptr;
-  vector<std::string> buf_keys_, buf_vals_;
-  std::thread *thread_ = nullptr;  // prefetching thread
-};
-
-/**
- * Base layer for parsing a key-value tuple as a feature vector with fixed
- * length. The feature shape is indicated by users in the configuration.
- * Each tuple may has a label.
- */
-class SingleLabelRecordLayer : public StoreInputLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-
- protected:
-  /**
-   * Load a single record (tuple), e.g., the mean or standard variance vector.
-   */
-  virtual void LoadRecord(const string& backend, const string& path,
-      Blob<float>* to) = 0;
-
- protected:
-  /**
-   * Feature standardization by processing each feature dimension via
-   * @f$ y = (x - mu)/ std @f$
-   * <a href= "http://ufldl.stanford.edu/wiki/index.php/Data_Preprocessing">
-   * UFLDL</a>
-   */
-  Blob<float> mean_, std_;
-};
-/**
- * Specific layer that parses the value string loaded by Store as a line from
- * a CSV file.
- *
- * It assumes the first column is the label except that has_label_ is configured
- * to false. Or the data is used in deploy mode.
- */
-class CSVInputLayer : public SingleLabelRecordLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-
- protected:
-  bool Parse(int k, int flag, const string& key, const string& val) override;
-  void LoadRecord(const string& backend,
-                  const string& path,
-                  Blob<float>* to) override;
-
- private:
-  std::string sep_;
-  bool has_label_;
-};
-
-
-/**
- * Specific layer that parses the value string loaded by Store into a
- * RecordProto.
- */
-class RecordInputLayer : public SingleLabelRecordLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-
- protected:
-  /**
-   * Parse key as instance ID and val into RecordProto.
-   * @copydetails StoreInputLayer::Parse()
-   */
-  bool Parse(int k, int flag, const string& key, const string& val) override;
-  void LoadRecord(const string& backend,
-                  const string& path,
-                  Blob<float>* to) override;
-
- private:
-  // TODO(wangwei) decode the image
-  bool encoded_;
-};
-
-/**
- * Do preprocessing for images, including cropping, mirroring, resizing.
- */
-class ImagePreprocessLayer : public InputLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers);
-
- private:
-  bool mirror_ = false;
-  int cropsize_ = 0;
-  int resize_ = 0;
-  float scale_ = 1;
-};
-
-class OneHotLayer : public InputLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers);
-
- private:
-  int batchsize_, dim_;
-};
-
-/**
- *  * Read the ASCII file as a large string used for RNN model where each character
- *   * is a single input to the unrolled RNN layer.
- *    * max string length is string::max_size();
- *     */
-class CharRNNInputLayer : public InputLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers);
-
- private:
-  int batchsize_ = 0, unroll_len_ = 1;
-  unsigned offset_ = 0;
-  string path_, vocab_path_;
-  string buf_;
-  vector<int> start_;
-  std::unordered_map<char, int> char2index_;
-};
-
-/**
- * Label layer for fetching labels from the src input layer for RNN models.
- * The i-th unrolled layer fetch label from the input layer via data(i+1).
- * Particularly, it shares data_ Blob with data(i+1) of its src layer.
- */
-class RNNLabelLayer : public InputLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers);
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers);
-};
-
-
-/****************Deprecated layers******************/
-/**
- * @deprecated please use the StoreInputLayer.
- *
- * Base layer for reading ::Record  from local Shard, HDFS, lmdb, etc.
- */
-class DataLayer: virtual public InputLayer {
- public:
-  Blob<float>* mutable_data(const Layer* layer) override { return nullptr; }
-  ConnectionType dst_layer_connection() const override {
-    return kOneToMany;
-  }
-
-  inline int batchsize() const { return batchsize_; }
-  virtual const Record& sample() const {
-    return sample_;
-  }
-  /**
-   * @return the loaded records
-   */
-  virtual const std::vector<Record>& records() const {
-    return records_;
-  }
-
- protected:
-  int random_skip_;
-  int batchsize_;
-  Record sample_;
-  std::vector<Record> records_;
-};
-/**
- * @deprecated Please use the subclasses of StoreInputLayer.
- *
- * Layer for loading Record from DataShard.
- */
-class ShardDataLayer : public DataLayer {
- public:
-  ~ShardDataLayer();
-
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-
- private:
-  DataShard* shard_;
-};
-/**
- * @deprecated please use the subclasses of StoreInputLayer.
- *
- * Layer for loading Record from LMDB.
- */
-#ifdef USE_LMDB
-#include <lmdb.h>
-class LMDBDataLayer : public DataLayer {
- public:
-  ~LMDBDataLayer();
-
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void OpenLMDB(const std::string& path);
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ConvertCaffeDatumToRecord(const CaffeDatum& datum,
-                                 SingleLabelImageRecord* record);
-
- private:
-  MDB_env* mdb_env_;
-  MDB_dbi mdb_dbi_;
-  MDB_txn* mdb_txn_;
-  MDB_cursor* mdb_cursor_;
-  MDB_val mdb_key_, mdb_value_;
-};
-#endif
-
-/******************Parser layers***************/
-/**
- * @deprecated Please use the subclasses of StoreInputLayer which load and parse
- * data in a single layer.
- *
- * Base layer for parsing the input records into Blobs.
- */
-class ParserLayer : public InputLayer {
- public:
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {}
-  ConnectionType dst_layer_connection() const override {
-    return kOneToMany;
-  }
-  /**
-   * Parse records from DataLayer into blob.
-   */
-  virtual void ParseRecords(int flag, const std::vector<Record>& records,
-      Blob<float>* blob) = 0;
-};
-/**
- *
- * @deprecated Please use the SingleLabelRecordLayer which parses both feature
- * and label for each record. Its aux_data() function returns the parsed labels.
- *
- * Derived from ParserLayer to parse label in SingaleLabelImageRecord loaded by
- * ShardDataLayer.
- */
-class LabelLayer : public ParserLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ParseRecords(int flag, const std::vector<Record>& records,
-                    Blob<float>* blob) override;
-};
-
-/**
- * @deprecated Please use the subclasses of StoreInputLayer.
- *
- * Derived from ParserLayer to parse MNIST feature from SingaleLabelImageRecord.
- */
-class MnistLayer : public ParserLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ParseRecords(int flag, const std::vector<Record>& records,
-                    Blob<float>* blob) override;
-
- protected:
-  float norm_a_, norm_b_;
-};
-/**
- * @deprecated please use the ImagePreprocessLayer which preprocess image
- * feature from data Blob of source layers.
- *
- * Derived from ParserLayer to parse RGB image feature from
- * SingaleLabelImageRecord.
- */
-class RGBImageLayer : public ParserLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ParseRecords(int flag, const std::vector<Record>& records,
-                    Blob<float>* blob) override;
-
- private:
-  float scale_;
-  int cropsize_;
-  bool mirror_;
-  Blob<float> mean_;
-};
-}  // namespace singa
-
-#endif  // SINGA_NEURALNET_INPUT_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/layer.h b/include/singa/neuralnet/layer.h
deleted file mode 100644
index c8ea3fc..0000000
--- a/include/singa/neuralnet/layer.h
+++ /dev/null
@@ -1,376 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_NEURALNET_LAYER_H_
-#define SINGA_NEURALNET_LAYER_H_
-
-#include <string>
-#include <vector>
-#include "singa/proto/common.pb.h"
-#include "singa/proto/job.pb.h"
-#include "singa/utils/common.h"
-#include "singa/utils/blob.h"
-#include "singa/utils/param.h"
-
-namespace singa {
-using std::vector;
-using std::string;
-
-// TODO(wangwei) make AuxType a template argument for Layer.
-using AuxType = int;
-
-inline const string AddUnrollingPrefix(int unroll_idx, const string& name) {
-  return std::to_string(unroll_idx) + "#" + name;
-}
-inline const string AddPartitionSuffix(int partition_idx, const string& name) {
-  return name + "@" + std::to_string(partition_idx);
-}
-
-
-inline const string AddPrefixSuffix(int unroll_idx, int partition_idx,
-    const string& name) {
-  return std::to_string(unroll_idx) + "#" + name + "@" +
-    std::to_string(partition_idx);
-}
-/**
- * Base layer class.
- *
- * Subclasses should implement at least
- * Layer::ComputeFeature() and Layer::ComputGradient()
- * functions in accordance with the NeuralNet::TrainOneBatch function.
- */
-
-class Layer {
- public:
-  /**
-   * Create a sub-layer instance based on proto.type();
-   *
-   * @param proto configuration of the layer instance.
-   * @return pointer to the newly created layer instance.
-   */
-  static Layer* Create(const LayerProto& proto);
-
-  Layer() {}
-  virtual ~Layer() {}
-
-  /**
-   * Create for python binding, production test mode
-   *
-   */
-  static Layer* CreateLayer(const string str);
-  static void SetupLayer(Layer* layer, const string str, const vector<Layer*>& srclayers);
-
-  /**
-   * Setup layer properties.
-   *
-   * Setup members e.g., shapes of Param objects based on the layer
-   * configuration and connected layers.
-   * It should check the partition setting when setup the properties.
-   *
-   * @param conf layer configuration.
-   * @param srclayers source layers that connect to this layer.
-   */
-  virtual void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) {
-    layer_conf_ = conf;
-    datavec_.push_back(&data_);
-    gradvec_.push_back(&grad_);
-  }
-
-
-  /**
-   * Compute features of this layer based on connected layers.
-   *
-   * @param[in] flag set by the TrainOneBatch function, e.g., to indicate the
-   * running phase (kForward|kTrain, kForward|kTest, etc).
-   * @param[in] srclayers source layers that connect to this layer.
-   */
-  virtual void ComputeFeature(int flag, const vector<Layer*>& srclayers) = 0;
-  /**
-   * Compute gradients for parameters associated with this layer.
-   * It may also compute the gradients of the loss w.r.t the source layers.
-   *
-   * \copydetails ComputeFeature().
-   */
-  virtual void ComputeGradient(int flag, const vector<Layer*>& srclayers) = 0;
-  /**
-   * Layers that have paramters must override this function to return all Param
-   * objects associated with this layer.
-   *
-   * @return parameters associated with this layer.
-   */
-  virtual const std::vector<Param*> GetParams() const {
-    return std::vector<Param*> {};
-  }
-  virtual void SetParams(std::vector<Param*>) {}
-  /**
-   * Return the connection type between one neuron of this layer and its source
-   * layer.
-   *
-   * Currently support two connection types: kOneToOne, and kOneToAll.
-   * - kOneToOne indicates the neuron depends on only one neuron from src layer.
-   * - kOneToAll indicates the neuron depends on all neurons from src layer.
-   * TODO(wangwei) support kOneToMany.
-   *
-   * @param[in] k index of source layer, current only support k = 0.
-   * @return connection type.
-   */
-  virtual ConnectionType src_neuron_connection(int k) const {
-    // CHECK_LT(k, srclayers_.size());
-    return kOneToOne;
-  }
-  /**
-   * Return the connection type of this layer and all dst layers.
-   *
-   * Currently support two connection types: kOneToOne, and kOneToMany.
-   * - kOneToOne indicates the users implement the ComputeFeature and
-   * ComputeGradient function considering only one dst layer. In this case,
-   * a SplitLayer will be added automatically to connect this layer with all
-   * dest layer.
-   * - kOneToMany indicates this layer has already considered multiple dst
-   *   layers in the implementation.
-   *
-   * @return connection type default is kOneToOne.
-   */
-  virtual ConnectionType dst_layer_connection() const {
-    return kOneToOne;
-  }
-  /**
-   * To display layer info, e.g., aggreated loss/accuracy, or norm of feature
-   * vector and norm of parameters.
-   *
-   * @param[in] debug whether print the debug info
-   * @param[in] flag used to get the calling phase, e.g., forward of training
-   * (kForward | kTrain).
-   * @return info string about this layer, which is printed into the log.
-   */
-  virtual const std::string ToString(bool debug, int flag);
-  /**
-   * @return partition dimension of this layer,
-   * - -1 for no partition.
-   * -  0 for partition on the data dimension, i.e., partitioning the mini-batch
-   *    into sub-mini-batches.
-   * -  1 for partition this layer on feature dimension, i.e., the feature
-   *    vector of each instance is partitioned into sub-vectors.
-   */
-  inline int partition_dim() const {
-    CHECK_LE(layer_conf_.partition_dim(), 1);
-    return layer_conf_.partition_dim();
-  }
-  /**
-   * @return the partition ID (i.e., the worker ID to whom is layer is
-   * dispatched) of this layer, which is a sublayer partitioned from the
-   * original layer.
-   */
-  inline int partition_id() const { return layer_conf_.partition_id(); }
-  /**
-   * @return total number of partitions (i.e., sub-layers) of the original
-   * layer of this layer.
-   */
-  inline int num_partitions() const { return layer_conf_.num_partitions(); }
-  /**
-   * @return the type of this layer, only valid for built-in layer (types).
-   */
-  inline LayerType type() const { return layer_conf_.type(); }
-  /**
-   * @return user-defined layer type.
-   */
-  inline const std::string& user_type() const {
-    return layer_conf_.user_type();
-  }
-  /**
-   * Return name of this layer
-   */
-  inline const std::string& name() const { return layer_conf_.name(); }
-  /**
-   * Return the index of the unrolled layer within the unrolling group, which
-   * should be [0, max_unrolling_length)
-   */
-  inline const int unroll_index() const { return layer_conf_.unroll_index(); }
-
-  /**
-   * @return a const ref for Blob vector storing feature values of this layer.
-   */
-  virtual const vector<Blob<float>*>& data() {
-    return datavec_;
-  }
-
-  /**
-   * @param[in] from pointer to one of the dst layer. For some layers, they have
-   * more than one data Blob. In this case, this argument identifies the layer
-   * that is requesting the data Blob.
-   * @return a const ref for Blob storing feature values of this layer.
-   * @deprecated {This function will be deleted, use
-   * virtual const vector<Blob<float>>& data() const or
-   * virtual const Blob<float>& data(int k) const instead}.
-   */
-  virtual const Blob<float>& data(const Layer* from) {
-    return data_;
-  }
-  /**
-   * @return a const ref for the kth Blob.
-   * TODO(wangwei) if make this function const, there will be a warning
-   * indicating that data(const Layer*) and this function are ambiguous for
-   * data(0).
-   */
-  virtual const Blob<float>& data(int k) {
-    return *datavec_.at(k);
-  }
-
-  /**
-   * @see data().
-   * @return the pointer to the Blob storing feature values of this layer.
-   * @deprecated {This function will be deleted, use
-   * virtual Blob<float>* mutable_data(int k) instead}.
-   */
-  virtual Blob<float>* mutable_data(const Layer* from) {
-    return &data_;
-  }
-  /**
-   * @return the pointer to the kth Blob.
-   */
-  virtual Blob<float>* mutable_data(int k) {
-    return datavec_.at(k);
-  }
-  /**
-   * @return auxiliary data, e.g., image label.
-   */
-  virtual const vector<AuxType>& aux_data(const Layer* from = nullptr) {
-    return aux_data_;
-  }
-  /**
-   * @see data().
-   * @return the const ref of the Blob for the gradient of this layer, mainly
-   * used in BP algorithm.
-   * @deprecated {This function will be deleted, use
-   * virtual const vector<Blob<float>>& grad() const or
-   * virtual const Blob<float>& grad(int k) const instead}.
-   */
-  virtual const Blob<float>& grad(const Layer* from) {
-    return grad_;
-  }
-  /**
-   * @see data().
-   * @return the const ref of the Blob vector for the gradient of this layer.
-   */
-  virtual const vector<Blob<float>*>& grad() const {
-    return gradvec_;
-  }
-  /**
-   * @return the const ref of the kth Blob for the gradient of this layer.
-   */
-  virtual const Blob<float>& grad(int k) const {
-    return *gradvec_.at(k);
-  }
-  /**
-   * @see data().
-   * @return a pointer to the Blob storing gradients of this layer, mainly
-   * used in BP algorithm.
-   */
-  virtual Blob<float>* mutable_grad(const Layer* from) {
-    return &grad_;
-  }
-  /**
-   * @see data().
-   * @return a pointer to the kth Blob storing gradients of this layer, mainly
-   * used in BP algorithm.
-   */
-  virtual Blob<float>* mutable_grad(int k) {
-    return gradvec_.at(k);
-  }
-
- protected:
-  LayerProto layer_conf_;
-  Blob<float> data_, grad_;
-  vector<AuxType> aux_data_;
-  vector<Blob<float>*> datavec_, gradvec_;
-};
-/**************** Layer categories *****************/
-/**
- * Base layer for connecting layers when neural net is partitioned.
- */
-class ConnectionLayer : virtual public Layer {
-  // defined as a layer category
-};
-
-
-/**
- * Base layer for getting input data. May include layers for loading records,
- * parsing records.
- */
-class InputLayer : virtual public Layer {
- public:
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {}
-  ConnectionType dst_layer_connection() const override { return kOneToMany; }
-  Blob<float>* mutable_grad(const Layer* layer) override {
-    return nullptr;
-    // LOG(FATAL) << "Input layer has no gradient blob";
-  }
-  const Blob<float>& grad(const Layer* from) override {
-    return grad_;
-    // LOG(FATAL) << "Input layer has no gradient blob";
-  }
-};
-
-using SingleLabelImageRecord = RecordProto;
-
-/**
- * Base layer for feature transformation, e.g., ConvolutionLayer, PoolingLayer,
- * etc.
- */
-class NeuronLayer : virtual public Layer {
-  // defined as a layer category
-};
-
-
-/**
- * Base layer for calculating loss and doing BackPropagation.
- */
-class LossLayer : virtual public Layer {
- public:
-  Blob<float>* mutable_grad(const Layer* layer) override {
-    return nullptr;
-    // LOG(FATAL) << "Loss layer has no gradient blob";
-  }
-  const Blob<float>& grad(const Layer* from) override {
-    return grad_;
-    // LOG(FATAL) << "Loss layer has no gradient blob";
-  }
-};
-
-/**
- * Base layer for collecting features into disk file, HTTP stream, etc.
- */
-class OutputLayer : virtual public Layer {
- public:
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {}
-  Blob<float>* mutable_grad(const Layer* layer) override {
-    return nullptr;
-    // LOG(FATAL) << "Output layer has no gradient blob";
-  }
-  const Blob<float>& grad(const Layer* from) override {
-    return grad_;
-    // LOG(FATAL) << "Output layer has no gradient blob";
-  }
-};
-
-
-}  // namespace singa
-#endif  // SINGA_NEURALNET_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/loss_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/loss_layer.h b/include/singa/neuralnet/loss_layer.h
deleted file mode 100644
index 53ddc82..0000000
--- a/include/singa/neuralnet/loss_layer.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_NEURALNET_LOSS_LAYER_H_
-#define SINGA_NEURALNET_LOSS_LAYER_H_
-
-#include <vector>
-#include <string>
-#include "singa/neuralnet/layer.h"
-#include "singa/neuralnet/neuron_layer.h"
-
-namespace singa {
-using std::vector;
-/**
- * Squared Euclidean loss as @f$0.5 ||p - t||^2@f$, where p is prediction
- * result, t is the ground truth.
- */
-class EuclideanLossLayer : public LossLayer {
- public:
-  void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  const std::string ToString(bool debug, int flag) override;
-
- private:
-  int counter_ = 0;
-  float loss_ = 0.0f;
-};
-/**
- * Cross-entropy loss applied to the probabilities computed from Softmax.
- * @f$ L_i = -log P_{t_i}, t_i\in [0, C] @f$ is the label for the i-th object,
- * C is the total number of classes.
- */
-class SoftmaxLossLayer : public LossLayer {
- public:
-  void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  const std::string ToString(bool debug, int flag) override;
-
- private:
-  int batchsize_, topk_, dim_, counter_ = 0;
-  float scale_;
-  float loss_ = 0.0f, accuracy_ = 0.0f;
-};
-
-#ifdef USE_CUDNN
-class CudnnSoftmaxLossLayer : public LossLayer{
- public:
-  void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  const std::string ToString(bool debug, int flag) override;
-
- private:
-  int batchsize_, dim_;
-  int counter_ = 0;
-  float loss_ = 0.0f;
-
-  CudnnSoftmaxLayer softmax_;
-};
-#endif
-}  // namespace singa
-
-#endif  // SINGA_NEURALNET_LOSS_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/neuralnet.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuralnet.h b/include/singa/neuralnet/neuralnet.h
deleted file mode 100644
index 33ad38c..0000000
--- a/include/singa/neuralnet/neuralnet.h
+++ /dev/null
@@ -1,173 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_NEURALNET_NEURALNET_H_
-#define SINGA_NEURALNET_NEURALNET_H_
-
-#include <string>
-#include <vector>
-#include <unordered_map>
-
-#include "singa/neuralnet/layer.h"
-#include "singa/proto/job.pb.h"
-#include "singa/utils/factory.h"
-#include "singa/utils/graph.h"
-
-namespace singa {
-using std::unordered_map;
-using std::string;
-using std::vector;
-/**
- * The neural network is constructed from user configurations in NetProto.
- *
- * Some layers, e.g., SplitLayer and BridgeSrcLayer/BridgeDstLayer
- * will be added implicitly to partition the neural network.
- * TODO(wangwei) create wrappers for popular models, e.g., MLP, CNN.
- */
-class NeuralNet {
- public:
-  /**
-   * Create the neural network for training, test or validation.
-   *
-   * Parameters for test/validation net can share those from training after
-   * setup (done outside of this funcion).
-   *
-   * @param net_conf proto for the neural network
-   * @param phase test/training/validation
-   * @param npartitions num of partitions, do partitioning if num > 1
-   * @return pointer to a neural net
-   */
-  static NeuralNet* Create(const NetProto& net_conf, Phase phase,
-                           int npartitions);
-
-  static const NetProto Unrolling(const NetProto& net_conf);
-  /**
-   * construct the net structure from protocol buffer.
-   * @param netproto neural net config
-   * @param npartitions num of partitions. 1 for no partitioning.
-   */
-  NeuralNet(NetProto net_conf, int num_partitions);
-  ~NeuralNet();
-  /**
-   * Load net params from checkpoint fiels.
-   * @param path checkpoint files
-   */
-  void Load(const vector<string>& path);
-  /**
-   * load specified Param objects from from checkpoint files.
-   *
-   * Param objects and blobs are matched based on name.
-   * The param from previous checkpoint files will be overwritten by
-   * the param with the same name in later checkpoint files.
-   *
-   * @param[in] path
-   * @param[in,out] params load Blobs with the same name as the Params in this
-   * this dictionary. The Param values are copied into the corresponding Param
-   * objects.
-   */
-  static void Load(const vector<string>& path,
-                   const unordered_map<string, Param*>& params);
-  /**
-   * To display the adjacency layers
-  std::string ToAdjacency();
-   */
-  /**
-   * Share memory of parameter values from other neuralnet
-   * @param[in] other the neural net from which to share the Params
-   * @param[in] cpu_only if true only share cpu memory; else, share both cpu
-   * and gpu memory.
-   */
-  void ShareParamsFrom(NeuralNet* other, bool cpu_only);
-  inline const std::vector<Layer*>& layers() const { return layers_; }
-  inline const std::vector<Param*>& params() const { return params_; }
-  inline Layer* name2layer(std::string name) const {
-    if (name2layer_.find(name) == name2layer_.end())
-      return nullptr;
-    else
-      return name2layer_.at(name);
-  }
-  inline const std::vector<Layer*>& srclayers(const Layer* layer) const {
-    CHECK(src_map_.find(layer) != src_map_.end())
-      << "layer (" << layer->name() << " ) has no source layers";
-    return src_map_.at(layer);
-  }
-  Layer* last_unroll_layer(const Layer* layer) const {
-    auto pos = layer->name().find("#");
-    if (pos == std::string::npos)
-      return nullptr;
-    string last_name = std::to_string(unroll_len_) + layer->name().substr(pos);
-    CHECK(name2layer_.find(last_name) != name2layer_.end())
-      << "layer name = " << last_name << " has no unroll layers";
-    return name2layer_.at(last_name);
-  }
-  inline Param* paramid2param(int id) const { return paramid2param_.at(id); }
-
-  /**
-   * Conver the neural net into graph representation.
-   * Each layer is converted into a node.
-   * @param include_shape if true label the node with shape info
-   */
-  const Graph ToGraph(bool include_shape) const;
-
- protected:
-  /**
-   * Create a neural net graph, one node for each layer.
-   *
-   * Partition the graph if npartitions > 1, each layer is sliced according to
-   * its own partition setting.
-   * @param netproto
-   * @npartitions
-   * @return neural net graph
-   */
-  Graph* CreateGraph(const NetProto& netproto, int num_partitions);
-  /**
-   * Create neural net from graph, one layer per node.
-   */
-  void CreateNetFromGraph(Graph* graph);
-  /**
-   * prepare data structures, e.g., params_, layers_, etc.
-   */
-  void PrepareDataStructures();
-  void PrepareDataStructures(const NetProto& proto);
-  /**
-   * add split layers, due to connections to multiple dst-layers
-   */
-  NetProto AddModelSplitLayers(const NetProto& netproto);
-  /**
-   * add connection layers, due to partition of the whole nerualnet
-   * this should be done after AddModelSplitLayers()
-   */
-  NetProto AddPartitionConnectionLayers(const NetProto& netproto,
-                                        int npartitions);
-
- protected:
-  int unroll_len_ = 1;
-  std::vector<Layer*> layers_;
-  std::vector<Param*> params_;
-
-  unordered_map<std::string, Layer*> name2layer_;
-  unordered_map<int, Param*> paramid2param_;
-  unordered_map<const Layer*, std::vector<Layer*>> src_map_;
-};
-
-}  // namespace singa
-
-#endif  // SINGA_NEURALNET_NEURALNET_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h
deleted file mode 100644
index e6f0fd5..0000000
--- a/include/singa/neuralnet/neuron_layer.h
+++ /dev/null
@@ -1,560 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_NEURALNET_NEURON_LAYER_H_
-#define SINGA_NEURALNET_NEURON_LAYER_H_
-
-#include <vector>
-#include <string>
-#include "singa/neuralnet/layer.h"
-#include "singa/proto/job.pb.h"
-#include "singa/utils/context.h"
-#include "singa/utils/singleton.h"
-
-#ifdef USE_CUDNN
-#include <cudnn.h>
-#endif
-
-namespace singa {
-
-/* Activation layer applies following activations,
- * - "relu",    @f$ f(x) = max(0, x)@f$
- * - "sigmoid", @f$ f(x)=1/(1+exp(-x)) @f$
- * - "tanh",    @f$ f(x) = tanh(x) @f$
- * - "stanh",   scaled tanh @f$f(x)=1.7159047 * tanh(0.66666667 * x)@f$, valid
- *   only for CPU training.
- * It may share data and grad with its (single) source layer depending on
- * the share_srclayer_blob configuration field.
- */
-class ActivationLayer : public NeuronLayer {
- public:
-  void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-
- protected:
-  bool share_with_srclayer = false;
-  std::string method_;
-};
-
-/**
- * Convolution layer.
- * Currently using Mshadow to do convolution operations. TODO(wangwei) remove
- * dependency on Mshadow and using im2col from Caffe to implement this for CPU
- * version. For GPU version, there is class CudnnConvLayer.
- */
-class ConvolutionLayer : public NeuronLayer {
- public:
-  ~ConvolutionLayer();
-
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  const std::vector<Param*> GetParams() const override {
-    std::vector<Param*> params{weight_, bias_};
-    return params;
-  }
-  ConnectionType src_neuron_connection(int k) const  override {
-    // CHECK_LT(k, srclayers_.size());
-    return kOneToAll;
-  }
-
- protected:
-  int kernel_x_, pad_x_,  stride_x_;
-  int kernel_y_, pad_y_,  stride_y_;
-  int batchsize_,  channels_, height_, width_;
-  int col_height_, col_width_, conv_height_, conv_width_, num_filters_;
-  Param* weight_ = nullptr, *bias_ = nullptr;
-  Blob<float> col_data_, col_grad_;
-};
-
-/**
- * Implement convolution operations using im2col from Caffe.
- */
-class CConvolutionLayer : public ConvolutionLayer {
- public:
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-};
-
-/**
- * Layer that drops out some neurons randomly according to a user defined drop
- * ratio (default is 0.5). It helps reduce overfitting.
- */
-class DropoutLayer : public NeuronLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
- protected:
-  // drop probability
-  float pdrop_;
-  /* record which neuron is dropped, required for back propagating gradients,
-   * if mask[i]=0, then the i-th neuron is dropped.
-   */
-  Blob<float> mask_;
-};
-/**
- * This layer is dummy and do no real work.
- * It is used for testing purpose only.
- *
- * Use it as input layer, it will generate random data;
- * Use it as output layer, it will generate random grad;
- * Use it as neuron layer, it will replicates data and grad.
- */
-class DummyLayer: public NeuronLayer {
- public:
-  void Setup(const std::string str, const vector<Layer*>& srclayers);
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  void Feed(int batchsize, vector<float>& data, vector<int>& aux_data);
-  Layer* ToLayer() { return this;}
-
- private:
-  bool input_ = false;  // use as input layer
-  bool output_ = false;  // use as output layer
-  int batchsize_ = 1;  // use for input layer
-};
-
-/**
- * Embedding layer that converts an array of index ID into a matrix.
- *
- * Each index ID corresponds to a word (or feature) vector in the vocabulary
- * matrix maintained by the embedding layer.
- * The index ID ranges within [0, |D|), where |D| is the size of the vocabulary,
- * i.e., the number of rows of the vocabulary matrix.
- * If the index is -1, which means it is a padding word. A feature vector with
- * all values 0 will be constructed and inserted into the feature Blob.
- * Users handle special words by themseleves. For example, the index 0 could be
- * the starting word/symbol of a sentence, the index 1 could be the ending
- * word/symbol of a sentence.
- */
-class EmbeddingLayer : public NeuronLayer {
- public:
-  ~EmbeddingLayer() {
-    delete vocab_;
-  }
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  const std::vector<Param*> GetParams() const override {
-    std::vector<Param*> params;
-    params.push_back(vocab_);
-    return params;
-  }
-
- private:
-  int vocab_size_, feature_dim_, batchsize_;
-  //!< the vocabulary matrix to be learned
-  Param *vocab_;
-};
-
-class GRULayer : public NeuronLayer {
- public:
-  ~GRULayer();
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  ConnectionType dst_layer_connection() const override {
-    return kOneToMany;
-  }
-  Blob<float>* mutable_grad(const Layer* from) override {
-    if (typeid(*from) == typeid(GRULayer))
-      return gradvec_[1];
-    else
-      return gradvec_[0];
-  }
-  const Blob<float>& grad(const Layer* from) override {
-    if (typeid(*from) == typeid(GRULayer))
-      return *gradvec_[1];
-    else
-      return *gradvec_[0];
-  }
-  const std::vector<Param*> GetParams() const override {
-    std::vector<Param*> params{weight_z_hx_, weight_r_hx_, weight_c_hx_,
-      weight_z_hh_, weight_r_hh_, weight_c_hh_};
-
-    if (bias_z_ != nullptr && bias_r_ != nullptr && bias_c_ != nullptr) {
-      params.push_back(bias_z_);
-      params.push_back(bias_r_);
-      params.push_back(bias_c_);
-    }
-    return params;
-  }
-
- private:
-  int batchsize_;  // batch size
-  int vdim_, hdim_;  // dimensions
-  Blob<float> *update_gate_, *reset_gate_, *new_memory_;
-  Param *weight_z_hx_, *weight_z_hh_, *bias_z_;  // update gate
-  Param *weight_r_hx_, *weight_r_hh_, *bias_r_;  // reset gate
-  Param *weight_c_hx_, *weight_c_hh_, *bias_c_;  // new memory
-};
-
-/**
- * Layer that applys linear transformations as
- * @f$ h = v*W+b @f$, where W and b are weight matrix and bias vector.
- */
-class InnerProductLayer : public NeuronLayer {
- public:
-  ~InnerProductLayer();
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  ConnectionType src_neuron_connection(int k) const override {
-    return kOneToAll;
-  }
-  const std::vector<Param*> GetParams() const override {
-    std::vector<Param*> params{weight_, bias_};
-    return params;
-  }
-
-  void SetParams(std::vector<Param*> params) {
-    weight_ = params.at(0);
-    bias_ = params.at(1);
-  }
-
- private:
-  int batchsize_;
-  int vdim_, hdim_;
-  bool transpose_;
-  Param *weight_, *bias_;
-};
-
-/**
- * Local Response Normalization edge
- *
- * @f$ b_i=a_i/x_i^beta @f$
- * @f$x_i=knorm+alpha*\sum_{j=max(0,i-n/2)}^{min(N,i+n/2)}(a_j)^2 @f$
- * n is size of local response area.
- * @f$a_i@f$, the activation (after ReLU) of a neuron convolved with the i-th kernel.
- * @f$b_i@f$, the neuron after normalization, N is the total num of kernels
- */
-class LRNLayer : public NeuronLayer {
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-
- protected:
-  //!< shape of the feature blob of the src layer
-  int batchsize_, channels_, height_, width_;
-  //!< size local response (neighbor) area
-  int lsize_;
-  //!< hyper-parameter
-  float alpha_, beta_, knorm_;
-  Blob<float> norm_;
-};
-
-/**
- * Layer that applies the pooling operation.
- * TODO(wangwei) remove dependenices on mshadow
- */
-class PoolingLayer : public NeuronLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-
- protected:
-  int kernel_x_, pad_x_, stride_x_;
-  int kernel_y_, pad_y_, stride_y_;
-  int batchsize_, channels_, height_, width_, pooled_height_, pooled_width_;
-  PoolingProto_PoolMethod pool_;
-};
-/**
- * Use book-keeping for BP following Caffe's pooling implementation
- */
-class CPoolingLayer : public PoolingLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers);
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-
- private:
-  Blob<float> mask_;
-};
-
-/**
- * @deprecated {please use ActivationLayer}
- */
-class ReLULayer : public NeuronLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-};
-
-/**
- * Softmax layer applies softmax transformation to features from source layers.
- * The feature blob of this layer is of shape (batchsize,
- * num_softmax_per_instance, count_per_softmax), where num_softmax_per_instance
- * is controled by users (default is 1),
- * @f$ count_per_softmax = count / batchsize / num_softmax_per_instance @f$.
- * The softmax is conducted over count_per_softmax elements each time.
-  */
-class SoftmaxLayer : public NeuronLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  /**
-   * This layer is not recommendeded for partition because it requires the whole
-   * src layer for normalization.
-   */
-  ConnectionType src_neuron_connection(int k) const override {
-    // CHECK_LT(k, srclayers_.size());
-    return kOneToAll;
-  }
- protected:
-  int batchsize_, dim_;
-  //!< set by users (default is 1)
-  // int num_softmax_per_instance_;
-  //!< size of the softmax area/length
-  // int count_per_softmax_;
-};
-/**
- * @deprecated {please use ActivationLayer}
- *
- * This layer apply Sigmoid function to neuron activations.
- * f(x)=1/(1+exp(-x))
- * f'(x)=f(x)*(1-f(x))
- */
-class SigmoidLayer: public Layer {
- public:
-  using Layer::ComputeFeature;
-  using Layer::ComputeGradient;
-
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-};
-
-/**
- * @deprecated {please use ActivationLayer}
- * This layer apply scaled Tanh function to neuron activations.
- * f(x)=1.7159047  tanh(0.66666667 x)
- */
-class STanhLayer : public NeuronLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-};
-
-
-class BMLayer : public NeuronLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
- protected:
-  Param *bnScale_, *bnBias_;
-  Param *resultRunningMean_, *resultRunningInvVariance_;
-  int batchsize_,  channels_, height_, width_;
-};
-
-/*************** Layers implemented using cudnn v3 ***************/
-#ifdef USE_CUDNN
-#define CHECK_CUDNN(x) CHECK_EQ(x, CUDNN_STATUS_SUCCESS)
-
-class CudnnBase : virtual public NeuronLayer {
- public:
-  ~CudnnBase() {
-    if (src_desc_ != nullptr)
-      CHECK_CUDNN(cudnnDestroyTensorDescriptor(src_desc_));
-    if (my_desc_ != nullptr)
-      CHECK_CUDNN(cudnnDestroyTensorDescriptor(my_desc_));
-  }
-  void virtual InitCudnn() {
-    CHECK(!has_init_cudnn_);
-    CHECK_CUDNN(cudnnCreateTensorDescriptor(&src_desc_));
-    CHECK_CUDNN(cudnnCreateTensorDescriptor(&my_desc_));
-    handle_ = Singleton<Context>::Instance()->cudnn_handle();
-    has_init_cudnn_ = true;
-  }
- protected:
-  bool has_init_cudnn_ = false;
-  cudnnHandle_t handle_ = nullptr;
-  cudnnTensorDescriptor_t src_desc_ = nullptr, my_desc_ = nullptr;
-};
-
-/**
- * Activation layer implemented using cudnn v3.
- * Activation methods including
- * - SIGMOID
- * - TANH
- * - RELU
- */
-class CudnnActivationLayer : public ActivationLayer, public CudnnBase {
- public:
-  void InitCudnn() override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-
- protected:
-  cudnnActivationMode_t mode_;
-};
-
-/**
- * Convolution layer implemeneted using cudnn (v3 version backward functions).
- */
-class CudnnConvLayer : public ConvolutionLayer, public CudnnBase {
- public:
-  ~CudnnConvLayer();
-  void InitCudnn() override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-
- protected:
-  cudnnTensorDescriptor_t bias_desc_;
-  cudnnFilterDescriptor_t filter_desc_;
-  cudnnConvolutionDescriptor_t conv_desc_;
-  cudnnConvolutionFwdAlgo_t fp_alg_;
-  cudnnConvolutionBwdFilterAlgo_t bp_filter_alg_;
-  cudnnConvolutionBwdDataAlgo_t bp_data_alg_;
-  size_t workspace_byte_limit_, workspace_count_;
-};
-
-class CudnnLRNLayer : public LRNLayer, public CudnnBase {
- public:
-  ~CudnnLRNLayer();
-  void InitCudnn() override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-
- protected:
-  cudnnLRNMode_t mode_;
-  cudnnLRNDescriptor_t norm_desc_;
-};
-/**
- * Pooling layer implemented using cudnn.
- */
-class CudnnPoolLayer : public PoolingLayer, public CudnnBase {
- public:
-  ~CudnnPoolLayer();
-  void InitCudnn() override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-
- protected:
-  cudnnPoolingDescriptor_t pool_desc_;
-};
-
-/**
- * Cudnn Softmax layer.
- */
-class CudnnSoftmaxLayer : public SoftmaxLayer, public CudnnBase {
- public:
-  void InitCudnn() override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-};
-
-
-#if CUDNN_MAJOR == 4
-/**
- * Cudnn Batch Normalization layer -- supported by cudnn_v4
- */
-class CudnnBMLayer : public BMLayer, public CudnnBase {
- public:
-  ~CudnnBMLayer();
-  void InitCudnn() override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  const std::vector<Param*> GetParams() const override {
-    std::vector<Param*> params{bnScale_, bnBias_,
-        resultRunningMean_, resultRunningInvVariance_};
-    return params;
-  }
- protected:
-  cudnnBatchNormMode_t mode_;
-  cudnnTensorDescriptor_t bnScaleBiasMeanVar_desc_;
-  cudnnTensorDescriptor_t bnScaleBiasDiff_desc_;
-  Blob<float> resultSaveMean_;
-  Blob<float> resultSaveInvVariance_;
-};
-#endif
-#endif  // USE_CUDNN
-
-/******************** RBM layers *****************/
-/**
- * Base layer for RBM models.
- */
-class RBMLayer: virtual public Layer {
- public:
-  virtual ~RBMLayer() {}
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  const std::vector<Param*> GetParams() const override {
-    std::vector<Param*> params{weight_, bias_};
-    return params;
-  }
-  virtual Blob<float>* Sample(int flat);
-
- protected:
-  //! if ture, sampling according to guassian distribution
-  bool gaussian_;
-  //! dimension of the hidden layer
-  int hdim_;
-  //! dimension of the visible layer
-  int vdim_;
-  int batchsize_;
-  bool first_gibbs_;
-  Param* weight_, *bias_;
-  Blob<float> pos_data_;
-  Blob<float> neg_data_;
-  Blob<float> neg_sample_;
-  Blob<float> pos_sample_;
-};
-
-/**
- * RBM visible layer
- */
-class RBMVisLayer: public RBMLayer, public LossLayer {
- public:
-  ~RBMVisLayer();
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  const std::string ToString(bool debug, int flag) override;
-
- private:
-  RBMLayer* hid_layer_;
-  Layer* input_layer_;
-  float error_ = 0.0f;
-  int counter_ = 0;
-};
-/**
- * RBM hidden layer
- */
-class RBMHidLayer: public RBMLayer {
- public:
-  ~RBMHidLayer();
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-
- private:
-  RBMLayer *vis_layer_;
-};
-
-}  // namespace singa
-#endif  // SINGA_NEURALNET_NEURON_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/output_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/output_layer.h b/include/singa/neuralnet/output_layer.h
deleted file mode 100644
index 9071f33..0000000
--- a/include/singa/neuralnet/output_layer.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_NEURALNET_OUTPUT_LAYER_H_
-#define SINGA_NEURALNET_OUTPUT_LAYER_H_
-
-#include <vector>
-#include <string>
-#include "singa/neuralnet/layer.h"
-#include "singa/io/store.h"
-
-namespace singa {
-/**
- * ArgSort layer used to get topk prediction labels.
- *
- * It sort the labels based on its score (e.g., probability) from large to
- * small. Topk labels will be kepted in the data field. It should not be called
- * during training because this layer does not implement ComputeGradient()
- * function.
- */
-class ArgSortLayer : public OutputLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-
- protected:
-  int batchsize_, dim_;
-  int topk_;
-};
-
-class AccuracyLayer : public ArgSortLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  const std::string ToString(bool debug, int flag) override;
-
- private:
-  int counter_ = 0;
-  float accuracy_ = 0.0f;
-};
-/**
- * Output data (and label) for its source layer.
- */
-class CSVOutputLayer : public OutputLayer {
- public:
-  ~CSVOutputLayer() { delete store_; }
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-
- private:
-  int inst_ = 0;
-  io::Store* store_ = nullptr;
-};
-
-class RecordOutputLayer : public OutputLayer {
- public:
-  ~RecordOutputLayer() { delete store_; }
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-
- private:
-  int inst_ = 0;  //!< instance No.
-  io::Store* store_ = nullptr;
-};
-
-/**
- * Output layer for char rnn model, which convert sample id back to char and
- * dump to stdout.
- */
-class CharRNNOutputLayer : public OutputLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-
- private:
-  string vocab_;
-};
-
-}  // namespace singa
-#endif  // SINGA_NEURALNET_OUTPUT_LAYER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/server.h
----------------------------------------------------------------------
diff --git a/include/singa/server.h b/include/singa/server.h
deleted file mode 100644
index d95862d..0000000
--- a/include/singa/server.h
+++ /dev/null
@@ -1,135 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_SERVER_H_
-#define SINGA_SERVER_H_
-
-#include <unordered_map>
-#include <vector>
-#include "singa/comm/socket.h"
-#include "singa/proto/job.pb.h"
-#include "singa/utils/param.h"
-#include "singa/utils/updater.h"
-
-namespace singa {
-
- /* Repsond to worker's get/put/udpate request, and periodically syncing with
-  * other servers.
-  *
-  * Normally, the Server creates a response message for each request which
-  * will be sent back to the one who issued the request. However, if the request
-  * are not processed successfully, the original message will be returned. The
-  * sever does not know the returned message is a response or the original
-  * message. It just sends it to the router. The router will decided to
-  * re-send the request to the server or send it to the worker.
-  */
-class Server {
- public:
-  ~Server();
-  Server(int group_id, int server_id,
-      const JobProto& job_conf,
-      const std::vector<int>& slice2group,
-      const std::vector<int>& slice2server);
-  void Run();
-  inline int grp_id() const { return grp_id_; }
-  inline int id() const { return id_; }
-
- protected:
-  /**
-   * Process GET request.
-   *
-   * @return the orignal message or a response message which contains the values
-   * of the Param with the request version.
-   */
-  Msg* HandleGet(Msg** msg);
-  /**
-   * Process Update request.
-   *
-   * It waits until received the gradients from all workers from the same worker
-   * group. After updating, it responses to each sender with the new Param
-   * values. It may generate a sync message to the server group that maintains
-   * the global version of the updated Param (slice).
-   *
-   * Note: there is no counter for each worker group on the number of received
-   * update requests. Hence it is possible that the server would conduct the
-   * update when it receives x requests from group a and y requests from group
-   * b where x + y = group size. To avoid this problem, we can
-   * -# maintain request list for each group for each Param at the server side
-   * -# do not span a worker group among multiple nodes. then the updates from
-   * the same group would be locally aggregated on the worker node. And the
-   * server would conduct the update immediately after receiving the aggregated
-   * request.
-   * -# launch only one worker group.
-   *
-   * @return the orignal message or response message
-   */
-  const std::vector<Msg*> HandleUpdate(Msg **msg);
-  /**
-   * Process PUT request.
-   *
-   * @return the original message or response message. If we don't want to
-   * acknowledge the put request, then return nullptr.
-   */
-  Msg* HandlePut(Msg **msg);
-  /**
-   * Handle sync request from other server groups.
-   *
-   * It adds updates of Param (slice) from other server groups directly to
-   * local Param (slice). Currently, each Param (slice) has a master group,
-   * i.e., slice2group_[sliceid], which would receive such requests from all
-   * other server groups for the Param object.
-   *
-   * @param msg request msg containing the parameter updates
-   * @return response msg that contains the fresh parameter values.
-   */
-  Msg* HandleSyncRequest(Msg** msg);
-  /**
-   * Handle sync response.
-   *
-   * The response msg includes the latest values of a Param object from the
-   * server group that maintainers this Param object.
-   * The local Param values are replaced with the addition result of local
-   * udpates since the sync request was sent and the received Param values.
-   *
-   * @param response message
-   */
-  void HandleSyncResponse(Msg** msg);
-
- protected:
-  int grp_id_ = -1;
-  int id_ = -1;
-  Updater* updater_ = nullptr;
-  //!< map from slice ID to slice and deleted in the destructor
-  std::unordered_map<int, ParamEntry*> shard_;
-  std::vector<int> slice2group_, slice2server_;
-  //!< num of updates from last sync with master server group for a param/slice
-  std::vector<int> n_updates_;
-  //!< num of sync requests that have not been responded
-  std::vector<int> n_pending_sync_;
-  std::vector<Blob<float>> last_sync_;
-  std::unordered_map<int, std::vector<Msg*>> buffer_requests_;
-
-  Dealer* dealer_;
-};
-
-}  // namespace singa
-
-#endif  // SINGA_SERVER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/singa.h
----------------------------------------------------------------------
diff --git a/include/singa/singa.h b/include/singa/singa.h
deleted file mode 100644
index 9bc5ba5..0000000
--- a/include/singa/singa.h
+++ /dev/null
@@ -1,37 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_SINGA_H_
-#define SINGA_SINGA_H_
-
-#include "singa/comm/socket.h"
-#include "singa/io/store.h"
-#include "singa/neuralnet/neuralnet.h"
-#include "singa/neuralnet/layer.h"
-#include "singa/proto/job.pb.h"
-#include "singa/proto/singa.pb.h"
-#include "singa/utils/common.h"
-#include "singa/utils/param.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/factory.h"
-#include "singa/driver.h"
-
-#endif  // SINGA_SINGA_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/stub.h
----------------------------------------------------------------------
diff --git a/include/singa/stub.h b/include/singa/stub.h
deleted file mode 100644
index 4802535..0000000
--- a/include/singa/stub.h
+++ /dev/null
@@ -1,108 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_STUB_H_
-#define SINGA_STUB_H_
-
-#include <queue>
-#include <unordered_map>
-#include <vector>
-#include <string>
-#include "singa/comm/socket.h"
-#include "singa/neuralnet/neuralnet.h"
-#include "singa/proto/job.pb.h"
-#include "singa/proto/singa.pb.h"
-#include "singa/utils/factory.h"
-#include "singa/utils/param.h"
-#include "singa/utils/singleton.h"
-#include "singa/server.h"
-#include "singa/worker.h"
-
-namespace singa {
-
-class Stub {
- public:
-  ~Stub();
-  /**
-   * Find an endpoint to bind.
-   */
-  void Setup();
-  /**
-   * The Stub instance runs this function in the main thread to handle (e.g.,
-   * forward) messages from workers and servers.
-   *
-   * @param[in] slice2server the k-th value is the ID of the server that is in
-   * charge of updating the Param slice with ID k. Large Param objects are
-   * sliced into subsets for load-balance. Different subsets are updated by
-   * different servers.
-   */
-  void Run(const vector<int>& slice2server,
-      const std::vector<Worker*>& workers,
-      const std::vector<Server*>& servers);
-
-  void set_router(Router* router) {
-    router_ = router;
-  }
-
- protected:
-  /**
-   * Create a socket to send msg to the specified process
-   * @param dst_procs the dst process (logical) ID
-   * @return the newly created socket
-   */
-  Dealer* CreateInterProcsDealer(int dst_procs);
-  /**
-   * Generate a request message to Get the parameter object.
-   */
-  const std::vector<Msg*> HandleGetRequest(ParamEntry* entry, Msg** msg);
-  void HandleGetResponse(ParamEntry* entry, Msg** msg);
-  /**
-   * Generate a request message to Update the parameter object.
-   */
-  const std::vector<Msg*> HandleUpdateRequest(ParamEntry* entry, Msg** msg);
-  /**
-   * Handle response msg from servers for the update requests.
-   */
-  void HandleUpdateResponse(ParamEntry* entry, Msg** msg);
-  /**
-   * Generate a request message to Put the parameter object.
-   */
-  const std::vector<Msg*> HandlePutRequest(ParamEntry* entry, Msg** msg);
-  /**
-   * Called by HandlePut, HandleUpdate and HandleGet functions
-   * @param type message type
-   * @param version param version
-   * @param entry
-   * @param msg
-   * @param ret generated messages
-   */
-  void GenMsgs(int type, int version, ParamEntry* entry,
-    Msg* msg, std::vector<Msg*> *ret);
-
-
- protected:
-  Router *router_ = nullptr;
-  std::vector<int> slice2server_;
-};
-
-}  // namespace singa
-
-#endif  // SINGA_STUB_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/blob.h b/include/singa/utils/blob.h
deleted file mode 100644
index 1a0a592..0000000
--- a/include/singa/utils/blob.h
+++ /dev/null
@@ -1,414 +0,0 @@
-/**************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-/**
- * The code is adapted from that of Caffe which is under BSD 2 Clause License.
- * COPYRIGHT
- * All contributions by the University of California:
- * Copyright (c) 2014, The Regents of the University of California (Regents)
- * All rights reserved.
- * All other contributions:
- * Copyright (c) 2014, the respective contributors
- * All rights reserved.
- */
-#ifndef SINGA_UTILS_BLOB_H_
-#define SINGA_UTILS_BLOB_H_
-
-#include <glog/logging.h>
-#include <memory>
-#include <vector>
-#include "singa/proto/common.pb.h"
-#include "mshadow/tensor.h"
-#include "mshadow/cxxnet_op.h"
-
-namespace singa {
-
-// TODO(wangwei) use cudaMallocHost depending on Context::device.
-inline void MallocHost(void** ptr, size_t size) {
-  *ptr = malloc(size);
-  // cudaMallocHost(ptr, size);
-}
-
-inline void FreeHost(void* ptr) {
-  free(ptr);
-  // cudaFreeHost(ptr);
-}
-
-/**
- * @brief Manages memory allocation and synchronization between the host (CPU)
- *        and device (GPU).
- *
- * TODO(dox): more thorough description.
- */
-class SyncedMemory {
- public:
-  enum SyncedHead { UNINITIALIZED,
-                    HEAD_AT_CPU,
-                    HEAD_AT_GPU,
-                    SYNCED };
-
-  SyncedMemory() {}
-  explicit SyncedMemory(size_t size) : size_(size) {}
-  ~SyncedMemory();
-
-  const void* cpu_data();
-  const void* gpu_data();
-  void* mutable_cpu_data();
-  void* mutable_gpu_data();
-  void set_cpu_data(void* data);
-  inline SyncedHead head() { return head_; }
-  inline size_t size() { return size_; }
-
- private:
-  void to_cpu();
-  void to_gpu();
-
-  void* cpu_ptr_ = nullptr;
-  void* gpu_ptr_ = nullptr;
-  size_t size_ = 0;
-  SyncedHead head_ = UNINITIALIZED;
-  bool own_cpu_data_ = false;
-};  // class SyncedMemory
-
-
-template <typename Dtype>
-class Blob {
- public:
-  Blob() {}
-  /**
-   * Blob constructor with given shape.
-   * @param shape specifies the size of each dimension, shape[0] is the highest
-   * dimension, i.e., stride[0] = shape[1] * shape[2] * ...
-   */
-  explicit Blob(const std::vector<int>& shape) { Reshape(shape); }
-  /**
-   * Blob constructor with given shape.
-   * @param[in] dim0 total num of elements.
-   */
-  explicit Blob(int dim0) { Reshape(dim0); }
-  /**
-   * Blob constructor with given shape.
-   * @param[in] dim0 size of the highest dimension
-   * @param[in] dim1 size of the second highest dimension
-   */
-  explicit Blob(int dim0, int dim1) { Reshape(dim0, dim1); }
-  /**
-   * Blob constructor with given shape.
-   * @param[in] dim0 size of the highest dimension
-   * @param[in] dim1
-   * @param[in] dim2
-   */
-  explicit Blob(int dim0, int dim1, int dim2) { Reshape(dim0, dim1, dim2); }
-  /**
-   * Blob constructor with given shape.
-   * @param[in] dim0 size of the highest dimension
-   * @param[in] dim1
-   * @param[in] dim2
-   * @param[in] dim3
-   */
-  explicit Blob(int dim0, int dim1, int dim2, int dim3) {
-    Reshape(dim0, dim1, dim2, dim3);
-  }
-  /**
-   * Change the shape of the blob, re-allocate memory if Blob size() changes.
-   *
-   * @param[in] shape specifies the size of each dimension, shape[0] is the
-   * highest * dimension, i.e., stride[0] = shape[1] * shape[2] * ...
-   */
-  void Reshape(const std::vector<int>& shape);
-  /**
-   * Helper for Reshape(const std::vector<int>& shape) with shape.size() = 1.
-   *
-   * @see Reshape(const std::vector<int>&).
-   * @param[in] dim0 total num of elements.
-   */
-  void Reshape(int dim0) {
-    Reshape(std::vector<int>{dim0});
-  }
-  /**
-   * Helper for Reshape(const std::vector<int>& shape) with shape.size() = 2.
-   *
-   * @param dim0 the highest dimension size, i.e., dim0 = shape[0]. E.g., dim0
-   * could the batchsize.
-   * @param[in] dim1, dim1 = shape[1], e.g., dim1 could be the length of the
-   * feature vector.
-   */
-  void Reshape(int dim0, int dim1) {
-    Reshape(std::vector<int>{dim0, dim1});
-  }
-  /**
-   * Helper for Reshape(const std::vector<int>& shape) with shape.size() = 3.
-   *
-   * @param[in] dim0, dim0 = shape[0]
-   * @param[in] dim1, dim1 = shape[1]
-   * @param[in] dim2, dim2 = shape[2]
-   */
-  void Reshape(int dim0, int dim1, int dim2) {
-    Reshape(std::vector<int>{dim0, dim1, dim2});
-  }
-  /**
-   * Helper for Reshape(const std::vector<int>& shape) with shape.size() = 4.
-   *
-   * @param[in] dim0, dim0 = shape[0]
-   * @param[in] dim1, dim1 = shape[1]
-   * @param[in] dim2, dim2 = shape[2]
-   * @param[in] dim3, dim3 = shape[3]
-   */
-  void Reshape(int dim0, int dim1, int dim2, int dim3) {
-    Reshape(std::vector<int>{dim0, dim1, dim2, dim3});
-  }
-  /**
-   * Reshape as the shape of *other* Blob.
-   * @param[in] other
-   */
-  void ReshapeLike(const Blob& other);
-  /**
-   * @brief Copy from a source Blob.
-   *
-   * @param source the Blob to copy from
-   * @param reshape if false, require this Blob to be pre-shaped to the shape
-   * of other (and die otherwise); if true, Reshape this Blob to other's
-   * shape if necessary
-   */
-  void CopyFrom(const Blob<Dtype>& source, bool reshape);
-  /**
-   * call CopyFrom(const Blob<Dtype>& source, bool reshape) with reshape = false
-   */
-  void CopyFrom(const Blob<Dtype>& source);
-
-  void FromProto(const singa::BlobProto& proto);
-  void ToProto(singa::BlobProto* proto) const;
-  /**
-   * Set each element to be v
-   */
-  void SetValue(Dtype v);
-  /**
-   * Compute the sum of absolute values (L1 norm) of the data.
-  Dtype AsumData() const;
-   */
-  /**
-   * Sum all elements
-  Dtype SumData() const;
-   */
-  /**
-   * Share data with the other Blob.
-   * Set the data_ shared_ptr to point to the SyncedMemory holding the data_
-   * of Blob other.
-   *
-   * It may deallocate the SyncedMemory holding this Blob's data_, as
-   * shared_ptr calls its destructor when reset with the "=" operator.
-   * @param other the Blob who owns the data
-   * @param cpu_only if true, only share the cpu data; if false, share the whole
-   * data_ field. For training with multi-gpu cards, cpu_only must be true,
-   * becuase gpu memory cannot be shared among different devices.
-   */
-  void ShareData(Blob* other, bool cpu_only = true);
-
-  /*
-  void Swap(Blob& other);
-  */
-  /**
-   * @return the shape vector.
-   */
-  inline const std::vector<int>& shape() const { return shape_; }
-  /**
-   * @return the size of the k-th dimension.
-   */
-  inline int shape(int k) const {
-    CHECK_LT(k, shape_.size());
-    return shape_.at(k);
-  }
-  inline int count() const {
-    return count_;
-  }
-  inline int version() const {
-    return version_;
-  }
-  inline void set_version(int v) {
-    version_ = v;
-  }
-  inline const Dtype* cpu_data() const {
-    CHECK(data_);
-    return static_cast<const Dtype*>(data_->cpu_data());
-  }
-  inline void set_cpu_data(Dtype* data) {
-    CHECK(data);
-    data_->set_cpu_data(data);
-  }
-  inline const Dtype* gpu_data() const {
-    CHECK(data_);
-    return static_cast<const Dtype*>(data_->gpu_data());
-  }
-  inline Dtype* mutable_cpu_data() {
-    CHECK(data_);
-    return static_cast<Dtype*>(data_->mutable_cpu_data());
-  }
-  inline Dtype* mutable_gpu_data() {
-    CHECK(data_);
-    return static_cast<Dtype*>(data_->mutable_gpu_data());
-  }
-  inline void set_transpose(bool val) {
-    transpose_ = val;
-  }
-  inline bool transpose() const {
-    return transpose_;
-  }
-  inline const Blob<Dtype> T() const {
-    Blob<Dtype> ret(*this);
-    ret.transpose_ = !transpose_;
-    return ret;
-  }
-  // to check if two blob has the exact same content
-  bool check_equal(Blob* other) const {
-    if (transpose() != other->transpose()) return false;
-    if (count() != other->count()) return false;
-    if (shape().size() != other->shape().size()) return false;
-    for (unsigned int i = 0; i < shape().size(); i++) {
-      if (shape(i) != other->shape(i)) return false;
-    }
-    const Dtype * a = cpu_data();
-    const Dtype * b = other->cpu_data();
-    for (int i = 0; i < count(); i++) {
-      if (a[i] != b[i]) return false;
-    }
-    return true;
-  }
-
- protected:
-  std::shared_ptr<SyncedMemory> data_ = nullptr;
-  std::vector<int> shape_;
-  int count_ = 0;
-  int capacity_ = 0;
-  int version_ = -1;
-  bool transpose_ = false;
-};  // class Blob
-
-/**
- * Reshape a Blob.
- * @return a new Blob with the given shape, it shares the internal data_ with
- * the original Blob, i.e., no memory copy and allocation.
- */
-template <typename Dtype>
-Blob<Dtype>* Reshape(const Blob<Dtype> & A, const std::vector<int>& shape) {
-  Blob<Dtype>* res = new Blob<Dtype>(A);
-  res->Reshape(shape);
-  return res;
-}
-
-/**
- * Helper of Reshape(const Blob<Dtype>, const std::vector<int>*).
- */
-template <typename Dtype>
-Blob<Dtype>* Reshape(const Blob<Dtype> & A, int count) {
-  std::vector<int> tmpshape;
-  tmpshape.push_back(count);
-  return Reshape(A, tmpshape);
-}
-/**
- * Helper of Reshape(const Blob<Dtype>, const std::vector<int>*).
- */
-template <typename Dtype>
-Blob<Dtype>* Reshape(const Blob<Dtype> & A, int dim0, int dim1) {
-  std::vector<int> tmpshape;
-  tmpshape.push_back(dim0);
-  tmpshape.push_back(dim1);;
-  return Reshape(A, tmpshape);
-}
-/**
- * Helper of Reshape(const Blob<Dtype>, const std::vector<int>*).
- */
-template <typename Dtype>
-Blob<Dtype>* Reshape(const Blob<Dtype> & A, int dim0, int dim1, int dim2) {
-  std::vector<int> tmpshape;
-  tmpshape.push_back(dim0);
-  tmpshape.push_back(dim1);
-  tmpshape.push_back(dim2);
-  return Reshape(A, tmpshape);
-}
-/**
- * Helper of Reshape(const Blob<Dtype>, const std::vector<int>*).
- */
-template <typename Dtype>
-Blob<Dtype>* Reshape(const Blob<Dtype> & A, int dim0, int dim1, int dim2,
-    int dim3) {
-  std::vector<int> tmpshape;
-  tmpshape.push_back(dim0);
-  tmpshape.push_back(dim1);
-  tmpshape.push_back(dim2);
-  tmpshape.push_back(dim3);
-  return Reshape(A, tmpshape);
-}
-
-/**
- * @return a new Blob which share all internal members with the input Blob
- * except that the transpose_ field is set to the opposite value.
- */
-template <typename Dtype>
-Blob<Dtype>* Transpose(const Blob<Dtype> & A) {
-  Blob<Dtype>* res = new Blob<Dtype>(A);
-  bool origin = A.transpose();
-  res->set_transpose(!origin);
-  return res;
-}
-
-// TODO(wangwei) remove mshadow functions.
-using namespace mshadow;
-using mshadow::cpu;
-
-using mshadow::Shape;
-using mshadow::Shape1;
-using mshadow::Shape2;
-using mshadow::Shape3;
-using mshadow::Shape4;
-using mshadow::Tensor;
-
-using std::vector;
-
-inline Tensor<cpu, 4> Tensor4(Blob<float>* blob) {
-  const vector<int>& shape = blob->shape();
-  Tensor<cpu, 4> tensor(blob->mutable_cpu_data(),
-      Shape4(shape[0], shape[1], shape[2], shape[3]));
-  return tensor;
-}
-
-inline Tensor<cpu, 3> Tensor3(Blob<float>* blob) {
-  const vector<int>& shape = blob->shape();
-  Tensor<cpu, 3> tensor(blob->mutable_cpu_data(),
-      Shape3(shape[0], shape[1], blob->count() / shape[0] / shape[1]));
-  return tensor;
-}
-
-inline Tensor<cpu, 2> Tensor2(Blob<float>* blob) {
-  const vector<int>& shape = blob->shape();
-  Tensor<cpu, 2> tensor(blob->mutable_cpu_data(),
-      Shape2(shape[0], blob->count() / shape[0]));
-  return tensor;
-}
-
-inline Tensor<cpu, 1> Tensor1(Blob<float>* blob) {
-  Tensor<cpu, 1> tensor(blob->mutable_cpu_data(), Shape1(blob->count()));
-  return tensor;
-}
-
-
-}  // namespace singa
-
-#endif  // SINGA_UTILS_BLOB_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/cluster.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/cluster.h b/include/singa/utils/cluster.h
deleted file mode 100644
index 9e36cf8..0000000
--- a/include/singa/utils/cluster.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_UTILS_CLUSTER_H_
-#define SINGA_UTILS_CLUSTER_H_
-
-#include <glog/logging.h>
-#include <string>
-#include <unordered_map>
-#include <memory>
-#include <vector>
-#include "singa/proto/job.pb.h"
-#include "singa/proto/singa.pb.h"
-#include "singa/utils/cluster_rt.h"
-#include "singa/utils/common.h"
-#include "singa/utils/singleton.h"
-
-namespace singa {
-
-/**
- * Cluster is a singleton object, which provides cluster configuations,
- * e.g., the topology of the cluster.
- * All IDs start from 0.
- */
-class Cluster {
- public:
-  // Cluster is a global singleton in a process
-  static Cluster* Setup(int job_id, const SingaProto& singaConf,
-                        const ClusterProto& clusterConf);
-  static Cluster* Get();
-
-  inline int nserver_groups() const { return cluster_.nserver_groups(); }
-  inline int nworker_groups() const { return cluster_.nworker_groups(); }
-  inline int nworkers_per_group() const { return cluster_.nworkers_per_group();}
-  inline int nservers_per_group() const { return cluster_.nservers_per_group();}
-  inline int nworkers_per_procs() const { return cluster_.nworkers_per_procs();}
-  inline int nservers_per_procs() const { return cluster_.nservers_per_procs();}
-  inline int nworker_groups_per_server_group() const {
-    if (nserver_groups() == 0 || nservers_per_group() == 0)
-      return 1;
-    else
-      return cluster_.nworker_groups() / cluster_.nserver_groups();
-  }
-  /**
-   * @return true if the calling procs has server threads, otherwise false
-   */
-  inline bool has_server() const {
-    if (server_worker_separate()) {
-      CHECK_LT(procs_id_, nprocs_);
-      return procs_id_ >= nworker_procs();
-    } else {
-      return procs_id_ < nserver_procs();
-    }
-  }
-  /**
-   * @return true if the calling procs has worker threads.
-   */
-  inline bool has_worker() const {
-    return procs_id_ < nworker_procs();
-  }
-  /**
-   * @return global procs id, which starts from 0.
-   */
-  inline int procs_id() const { return procs_id_; }
-  inline void set_procs_id(int procs_id) { procs_id_ = procs_id; }
-  inline bool server_worker_separate() const {
-    return cluster_.server_worker_separate();
-  }
-  inline int nworker_procs() const {
-    return nworker_groups() * nworkers_per_group() / nworkers_per_procs();
-  }
-  inline int nserver_procs() const {
-    return nserver_groups() * nservers_per_group() / nservers_per_procs();
-  }
-  inline int nprocs() const { return nprocs_; }
-  /**
-   * @return endpoint of the router of a procs with the specified id
-   */
-  inline std::string endpoint(int procs_id) const {
-    CHECK_LT(procs_id, nprocs());
-    CHECK_GE(procs_id, 0);
-    return cluster_rt_->GetProcHost(procs_id);
-  }
-  inline std::string workspace() const { return cluster_.workspace(); }
-  inline std::string vis_folder() const {
-    return cluster_.workspace() + "/visualization";
-  }
-  inline std::string checkpoint_folder() const {
-    return cluster_.workspace() + "/checkpoint";
-  }
-  /*
-  const int stub_timeout() const { return cluster_.stub_timeout(); }
-  const int worker_timeout() const { return cluster_.worker_timeout(); }
-  const int server_timeout() const { return cluster_.server_timeout(); }
-  */
-  inline bool share_memory() const { return cluster_.share_memory(); }
-  inline int sync_freq() const { return cluster_.sync_freq(); }
-  inline int poll_time() const { return cluster_.poll_time(); }
-  ClusterRuntime* runtime() const { return cluster_rt_; }
-
-  /**
-   * @return logical procs ID
-   */
-  inline int ProcsIDOf(int group_id, int id, int flag) {
-    return procs_ids_.at(Hash(group_id, id, flag));
-  }
-
-  /**
-   * @param pid, processs ID
-   * @param group_size, num of executors in a group
-   * @param procs_size, num of executors in a procs
-   *
-   * @return a vector with 4 integers:
-   * [group start, group end), [start executor, end executor)
-   */
-  const std::vector<int> ExecutorRng(int pid, int group_size, int procs_size);
-  /**
-   * Register this process.
-   *
-   * @param pid physical process id get from OS, all other procs ID refers to
-   * logical process ID.
-   * @param endpoint unique string for other procs to connect
-   */
-  void Register(int pid, const std::string& endpoint);
-
- private:
-  void Init(int job, const SingaProto& singaConf,
-          const ClusterProto& clusterConf);
-  void SetupFolders(const ClusterProto &cluster);
-  int Hash(int gid, int id, int flag);
-
-  int procs_id_ = -1;
-  int nprocs_ = 0;
-  // cluster config proto
-  ClusterProto cluster_;
-  SingaProto singa_;
-  ClusterRuntime* cluster_rt_ = nullptr;
-  std::unordered_map<int, int> procs_ids_;
-};
-
-}  // namespace singa
-
-#endif  // SINGA_UTILS_CLUSTER_H_