You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/06/03 07:48:18 UTC
[13/60] incubator-singa git commit: SINGA-163 - Reorganize the project folder layout

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor_random.h
----------------------------------------------------------------------
diff --git a/include/mshadow/tensor_random.h b/include/mshadow/tensor_random.h
deleted file mode 100644
index ae2836a..0000000
--- a/include/mshadow/tensor_random.h
+++ /dev/null
@@ -1,369 +0,0 @@
-#ifndef MSHADOW_TENSOR_RANDOM_H
-#define MSHADOW_TENSOR_RANDOM_H
-/*!
- *  \file tensor_random.h
- *  \brief Random inline functions for tensor.
- *  \author Bing Xu, Tianqi Chen
- *   Based on curand|MKL|stdlib
- */
-#include <cstdlib>
-#include <random>
-#include <chrono>
-#include "tensor.h"
-#include "tensor_container.h"
-
-namespace mshadow {
-    /*!
-     * \brief random number generator
-     * \tparam Device the device of random number generator
-     *
-     * Note: replaced rand (srand) with c++11's random functions.
-     */
-    template<typename Device>
-    class Random {};
-
-    /*! \brief CPU random number generator */
-    template<>
-    class Random<cpu> {
-    public:
-        /*!
-         * \brief constructor of random engine using default seed
-         */
-        Random<cpu> (){
-          // obtain a seed from the system clock:
-          unsigned s= std::chrono::system_clock::now().time_since_epoch().count();
-          Seed(s);
-        }
-        /*!
-         * \brief constructor of random engine
-         * \param seed random number seed
-         */
-        Random<cpu>( int seed ){
-            #if MSHADOW_USE_MKL
-            int status = vslNewStream(&vStream_, VSL_BRNG_MT19937, seed);
-            utils::Assert( status == VSL_STATUS_OK, "MKL VSL Random engine failed to be initialized.\n" );
-            #else
-            //srand(seed);
-            gen_.seed(seed);
-            #endif
-            buffer_.Resize( Shape1( kRandBufferSize ) );
-        }
-        ~Random<cpu>() {
-            #if MSHADOW_USE_MKL
-            vslDeleteStream(&vStream_);
-            #endif
-        }
-        /*!
-         * \brief seed random number generator using this seed
-         * \param seed seed of prng
-         */
-        inline void Seed( int seed ){
-            #if MSHADOW_USE_MKL
-            int status = vslDeleteStream(&vStream_);
-            utils::Assert(status == VSL_STATUS_OK);
-            status = vslNewStream(&vStream_, VSL_BRNG_MT19937, seed);
-            utils::Assert(status == VSL_STATUS_OK);
-            #else
-            // srand( seed );
-            gen_.seed(seed);
-            #endif
-        }
-        template<int dim>
-        inline void SampleBinary(Tensor<cpu, dim> &src) {
-          SampleBinary(src, src);
-        }
-
-        /*!
-         * \brief generate binary data according to a probability matrix
-         * \param src source
-         * \param dst destination
-         * \param a lower bound of uniform
-         * \param b upper bound of uniform
-         * \tparam dim dimension of tensor
-         */
-        template<int dim>
-        inline void SampleBinary(Tensor<cpu, dim> &dst, Tensor<cpu, dim> &src) {
-            real_t a=0.0f;
-            real_t b=1.0f;
-            Tensor<cpu, 2> dmat = dst.FlatTo2D();
-            Tensor<cpu, 2> smat = src.FlatTo2D();
-            std::uniform_real_distribution<real_t> distribution (a,b);
-            for ( index_t i = 0; i < dmat.shape[1]; ++i ) {
-                #if MSHADOW_USE_MKL
-                #if MSHADOW_SINGLE_PRECISION
-                int status = vsRngUniform( 0, vStream_, mat.shape[0], mat[i].dptr, a, b );
-                #else
-                int status = vdRngUniform( 0, vStream_, mat.shape[0], mat[i].dptr, a, b );
-                #endif
-                utils::Assert(status == VSL_STATUS_OK, "Failed to generate random number by MKL.\n" );
-                #else
-                // use stdlib
-                /*
-                for ( index_t j = 0; j < mat.shape[0]; ++j ) {
-                    mat[i][j] = this->RandNext()*(b-a) + a;
-                }
-                */
-                for ( index_t j = 0; j < dmat.shape[0]; ++j ) {
-                    dmat[i][j] = distribution(gen_) > smat[i][j] ? 0.0f: 1.0f;
-                }
-                #endif
-            }
-        }
-        /*!
-         * \brief generate data from uniform [a,b)
-         * \param dst destination
-         * \param a lower bound of uniform
-         * \param b upper bound of uniform
-         * \tparam dim dimension of tensor
-         */
-        template<int dim>
-        inline void SampleUniform( Tensor<cpu, dim> &dst, real_t a=0.0f, real_t b=1.0f ) {
-            Tensor<cpu, 2> mat = dst.FlatTo2D();
-            std::uniform_real_distribution<real_t> distribution (a,b);
-            for ( index_t i = 0; i < mat.shape[1]; ++i ) {
-                #if MSHADOW_USE_MKL
-                #if MSHADOW_SINGLE_PRECISION
-                int status = vsRngUniform( 0, vStream_, mat.shape[0], mat[i].dptr, a, b );
-                #else
-                int status = vdRngUniform( 0, vStream_, mat.shape[0], mat[i].dptr, a, b );
-                #endif
-                utils::Assert(status == VSL_STATUS_OK, "Failed to generate random number by MKL.\n" );
-                #else
-                // use stdlib
-                /*
-                for ( index_t j = 0; j < mat.shape[0]; ++j ) {
-                    mat[i][j] = this->RandNext()*(b-a) + a;
-                }
-                */
-                for ( index_t j = 0; j < mat.shape[0]; ++j ) {
-                    mat[i][j] = distribution(gen_);
-                }
-                #endif
-            }
-        }
-        /*!
-         * \brief generate data from standard gaussian
-         * \param dst destination
-         * \param mu mean variable
-         * \param sigma standard deviation
-         * \tparam dim dimension of tensor
-         */
-        template<int dim>
-        inline void SampleGaussian( Tensor<cpu, dim> &dst, real_t mu = 0.0f, real_t sigma = 1.0f ) {
-            if( sigma <= 0.0f ) {
-                dst = mu; return;
-            }
-            Tensor<cpu, 2> mat = dst.FlatTo2D();
-            std::normal_distribution<real_t> distribution (mu, sigma);
-            for (index_t i = 0; i < mat.shape[1]; ++i) {
-                #if MSHADOW_USE_MKL
-                #if MSHADOW_SINGLE_PRECISION
-                int status = vsRngGaussian( 0, vStream_, mat.shape[0], mat[i].dptr, mu, sigma );
-                #else
-                int status = vdRngGaussian( 0, vStream_, mat.shape[0], mat[i].dptr, mu, sigma );
-                #endif
-                utils::Assert(status == VSL_STATUS_OK, "Failed to generate random number by MKL.\n" );
-                #else
-                /*
-                real_t g1 = 0.0f, g2 = 0.0f;
-                for (index_t j = 0; j < mat.shape[0]; ++j) {
-                    if( (j & 1) == 0 ){
-                        this->SampleNormal2D( g1, g2 );
-                        mat[i][j] = mu + g1 * sigma;
-                    }else{
-                        mat[i][j] = mu + g2 * sigma;
-                    }
-                }
-                */
-                for (index_t j = 0; j < mat.shape[0]; ++j) {
-                  mat[i][j] = distribution(gen_);
-                }
-                #endif
-            }
-        }
-        /*!
-         * \brief return a temporal expression storing standard gaussian random variables
-         *        the temporal tensor is only valid before next call of gaussian or uniform
-         *        can be used as part of expression
-         *  Caution: this means expression such as A = gaussian(s1) * gaussian(s2) will give invalid result,
-         *           since second call of gaussian(s2) makes gaussian(s1) invalid
-         *           A = gaussian(s1)*B+C; is correct; use one gaussian/uniform in each expression
-         * \param shape shape of the tensor
-         * \tparam dim dimension of tensor
-         */
-        template<int dim>
-        inline expr::ReshapeExp<Tensor<cpu,1>,dim,1> gaussian( Shape<dim> shape ){
-            buffer_.Resize( Shape1( shape.Size() ) );
-            this->SampleGaussian( buffer_, 0.0f, 1.0f );
-            return expr::reshape( buffer_, shape );
-        }
-        /*!
-         * \brief return a temporal expression storing standard uniform [0,1)
-         *        the temporal tensor is only valid before next call of gaussian or uniform
-         *        can be used as part of expression
-         *  Caution: this means expression such as A = gaussian(s1) * gaussian(s2) will give invalid result,
-         *           since second call of gaussian(s2) makes gaussian(s1) invalid
-         *           A = gaussian(s1)*B+C; is correct; use one gaussian/uniform in each expression
-         * \param shape shape of the tensor
-         * \tparam dim dimension of tensor
-         */
-        template<int dim>
-        inline expr::ReshapeExp<Tensor<cpu,1>,dim,1> uniform( Shape<dim> shape ){
-            buffer_.Resize( Shape1( shape.Size() ) );
-            this->SampleUniform( buffer_, 0.0f, 1.0f );
-            return expr::reshape( buffer_, shape );
-        }
-    private:
-        /*! \brief get next random number from rand */
-        inline real_t RandNext( void ){
-            return static_cast<real_t>(rand()) / (static_cast<real_t>(RAND_MAX)+1.0f);
-        }
-        /*! \brief return a real numer uniform in (0,1) */
-        inline real_t RandNext2( void ){
-            return (static_cast<real_t>( rand() ) + 1.0 ) / (static_cast<real_t>(RAND_MAX) + 2.0);
-        }
-        /*!
-         * \brief sample iid xx,yy ~N(0,1)
-         * \param xx first  gaussian output
-         * \param yy second gaussian output
-         */
-        inline void SampleNormal2D( real_t &xx, real_t &yy ){
-            real_t x,y,s;
-            do{
-                x = 2.0f * RandNext2() - 1.0f;
-                y = 2.0f * RandNext2() - 1.0f;
-                s = x*x + y*y;
-            }while( s >= 1.0f || s == 0.0f );
-            real_t t = std::sqrt( -2.0f * std::log( s ) / s ) ;
-            xx = x * t; yy = y * t;
-        }
-    private:
-        #if MSHADOW_USE_MKL
-        /*! \brief stream used by MKL VSL */
-        VSLStreamStatePtr vStream_;
-        #endif
-        /*! \brief temporal space used to store random numbers */
-        TensorContainer<cpu,1> buffer_;
-
-        /*! \brief c++11 random generator, added for SINGA use */
-        std::mt19937 gen_;
-    }; // class Random<cpu>
-
-#if MSHADOW_USE_CUDA
-// __CUDACC__
-    /*! \brief GPU random number generator */
-    template<>
-    class Random<gpu> {
-    public:
-        /*!
-         * \brief constructor of random engine
-         * \param seed random number seed
-         */
-        Random<gpu>(int seed) {
-            curandStatus_t status;
-            status = curandCreateGenerator(&gen_, CURAND_RNG_PSEUDO_DEFAULT);
-            utils::Assert(status == CURAND_STATUS_SUCCESS, "Can not create CURAND Generator");
-            this->Seed( seed );
-            buffer_.Resize( Shape1(kRandBufferSize) );
-        }
-
-        ~Random<gpu>() {
-            curandStatus_t status;
-            status = curandDestroyGenerator(gen_);
-            utils::Assert(status == CURAND_STATUS_SUCCESS, "Destory CURAND Gen failed");
-        }
-        /*!
-         * \brief seed random number generator using this seed
-         * \param seed seed of prng
-         */
-        inline void Seed( int seed ){
-            curandStatus_t status;
-            status = curandSetPseudoRandomGeneratorSeed(gen_, seed);
-            utils::Assert(status == CURAND_STATUS_SUCCESS, "Set CURAND seed failed.");
-        }
-        /*!
-         * \brief generate data from uniform [a,b)
-         * \param dst destination
-         * \param a lower bound of uniform
-         * \param b upper bound of uniform
-         * \tparam dim dimension of tensor
-         */
-        template<int dim>
-        inline void SampleUniform(Tensor<gpu, dim> &dst, real_t a=0.0f, real_t b=1.0f) {
-            if( a == 0.0f && b == 1.0f ){
-                dst = this->uniform( dst.shape );
-            }else{
-                dst = this->uniform( dst.shape ) *(b-a) + a;
-            }
-        }
-        /*!
-         * \brief generate data from standard gaussian
-         * \param dst destination
-         * \param mu mean variable
-         * \param sigma standard deviation
-         * \tparam dim dimension of tensor
-         */
-        template<int dim>
-        inline void SampleGaussian(Tensor<gpu, dim> &dst, real_t mu = 0.0f, real_t sigma = 1.0f) {
-            dst = this->gaussian( dst.shape, mu, sigma );
-        }
-        /*!
-         * \brief return a temporal expression storing standard gaussian random variables
-         *        the temporal tensor is only valid before next call of gaussian or uniform
-         *        can be used as part of expression
-         *  Caution: this means expression such as A = gaussian(s1) * gaussian(s2) will give invalid result,
-         *           since second call of gaussian(s2) makes gaussian(s1) invalid
-         *           A = gaussian(s1)*B+C; is correct; use one gaussian/uniform in each expression
-         * \param shape shape of the tensor
-         * \param mu mean
-         * \param sigma variance
-         * \tparam dim dimension of tensor
-         */
-        template<int dim>
-        inline expr::ReshapeExp<Tensor<gpu,1>,dim,1> gaussian( Shape<dim> shape, real_t mu=0.0f, real_t sigma=1.0f){
-            size_t aligned_sz = ((shape.Size() + 1UL)>>1)<<1;
-            // allocate alligned size
-            buffer_.Resize( Shape1( aligned_sz ) );
-            buffer_.Resize( Shape1( shape.Size() ) );
-            curandStatus_t status;
-            #if MSHADOW_SINGLE_PRECISION
-            status = curandGenerateNormal(gen_, buffer_.dptr, aligned_sz , mu, sigma);
-            #else
-            status = curandGenerateNormalDouble(gen_, buffer_.dptr, buffer_.shape[0], mu, sigma);
-            #endif
-            utils::Assert(status == CURAND_STATUS_SUCCESS, "CURAND Gen Uniform failed\n");
-            return expr::reshape( buffer_, shape );
-        }
-        /*!
-         * \brief return a temporal expression storing standard uniform [0,1)
-         *        the temporal tensor is only valid before next call of gaussian or uniform
-         *        can be used as part of expression
-         *  Caution: this means expression such as A = gaussian(s1) * gaussian(s2) will give invalid result,
-         *           since second call of gaussian(s2) makes gaussian(s1) invalid
-         *           A = gaussian(s1)*B+C; is correct; use one gaussian/uniform in each expression
-         * \param shape shape of the tensor
-         * \tparam dim dimension of tensor
-         */
-        template<int dim>
-        inline expr::ReshapeExp<Tensor<gpu,1>,dim,1> uniform(Shape<dim> shape) {
-            buffer_.Resize( Shape1( shape.Size() ) );
-            curandStatus_t status;
-            #if MSHADOW_SINGLE_PRECISION
-            status = curandGenerateUniform(gen_, buffer_.dptr, buffer_.shape[0] );
-            #else
-            status = curandGenerateUniformDouble(gen_, buffer_.dptr, buffer_.shape[0] );
-            #endif
-            utils::Assert(status == CURAND_STATUS_SUCCESS, "CURAND Gen Uniform failed\n");
-            return expr::reshape( buffer_, shape );
-        }
-    private:
-        /*! \brief random numbeer generator */
-        curandGenerator_t gen_;
-        /*! \brief templ buffer */
-        TensorContainer<gpu, 1> buffer_;
-    }; // class Random<gpu>
-    #endif
-
-}; // namespace mshadow
-
-#endif // MSHADOW_TENSOR_RANDOM_H

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor_sse-inl.hpp
----------------------------------------------------------------------
diff --git a/include/mshadow/tensor_sse-inl.hpp b/include/mshadow/tensor_sse-inl.hpp
deleted file mode 100644
index b98383e..0000000
--- a/include/mshadow/tensor_sse-inl.hpp
+++ /dev/null
@@ -1,431 +0,0 @@
-#ifndef MSHADOW_TENSOR_SSE_INL_HPP
-#define MSHADOW_TENSOR_SSE_INL_HPP
-/*!
- * \file tensor_sse-inl.hpp
- * \brief support of sse2 optimization of some operations
- * \author Tianqi Chen
- */
-#ifdef __APPLE__
-#include <stdlib.h>
-#else
-#include <malloc.h>
-#endif
-
-#include "tensor_expr.h"
-#include "tensor.h"
-
-namespace mshadow {
-    /*! \brief namespace to support sse2 vectorization */
-    namespace sse2{
-        /*! 
-         * \brief analog to cudaMallocPitch, allocate a aligned space with num_line * lspace cells
-         * \param pitch output parameter, the actuall space allocated for each line
-         * \param lspace number of cells required for each line
-         * \param num_line number of lines to be allocated
-         */
-        inline void* AlignedMallocPitch( size_t &pitch, size_t lspace, size_t num_line ){
-            pitch = ((lspace+15) >> 4) << 4;
-            #ifdef _MSC_VER
-            void * res = _aligned_malloc( pitch*num_line, 16 ); 
-            #else
-            #ifdef __APPLE__
-            void *res = malloc( pitch * num_line );
-            #else
-            void * res = memalign( 16, pitch*num_line ); 
-            #endif
-            #endif
-            utils::Assert( res != NULL, "AlignedMallocPitch failed" );
-            return res;
-        }
-        /*! 
-         * \brief free aligned space 
-         * \param ptr pointer to space to be freed
-         */
-        inline void AlignedFree( void *ptr ){
-            #ifdef _MSC_VER
-            _aligned_free( ptr );
-            #else
-            free( ptr );
-            #endif
-        }
-        /*! \brief check if a pointer is aligned */
-        inline bool CheckAlign( size_t pitch ){
-            return !(pitch & ((1<<4)-1));
-        }
-        /*! \brief check if a pointer is aligned */
-        inline bool CheckAlign( void *ptr ){
-            return CheckAlign( (size_t)ptr );
-        }
-        /*! 
-         * \brief get upper bound of aligned index of size 
-         * \param size size of the array
-         * \param fsize size of float
-         */
-        inline index_t UpperAlign( index_t size, size_t fsize ){
-            return (( (size*fsize+15) >> 4 ) << 4) / fsize;
-        }
-        /*! 
-         * \brief get lower bound of aligned index of size 
-         * \param size size of the array
-         * \param fsize size of float
-         */
-        inline index_t LowerAlign( index_t size, size_t fsize ){
-            return (( (size*fsize) >> 4 ) << 4) / fsize;
-        }
-    }; // namespace sse2
-}; // namespace  mshadow
-
-#if MSHADOW_USE_SSE
-// sse types are not compatible with nvcc, only use them in cpu mode
-#include <emmintrin.h>
-
-namespace mshadow{
-    namespace sse2{
-        /*! 
-         * \brief float vector real type, used for vectorization 
-         * \tparam FloatType double or float
-         */
-        template<typename FloatType> struct FVec{};
-        
-        /*! \brief vector real type for float */
-        template<> 
-        struct FVec<float> {
-        public:
-            typedef __m128 DType;
-            /*! \brief number of float in vector */
-            const static index_t kSize = 4;
-            /*! \brief data content */
-            DType data_;
-        public:
-            /* constructors */
-            FVec( void ){}
-            FVec( DType data ):data_(data){}
-            /* set the float */
-            FVec( const float &s ){
-                data_ = _mm_set1_ps( s );
-            }
-            /*!\brief load from pointer src */
-            FVec( const float *src ){
-                data_ = _mm_load_ps( src );                
-            } 
-        public:
-            /*! \brief store data into dst space */
-            inline void Store( float *dst ) const{
-                return _mm_store_ps( dst, data_ );
-            }
-            /*! \brief sum of all content */
-            inline float Sum( void ) const{
-                DType ans  = _mm_add_ps( data_, _mm_movehl_ps( data_, data_ ) );
-                DType rst  = _mm_add_ss( ans, _mm_shuffle_ps( ans, ans, 1 ) );
-                #if defined(_MSC_VER) && ( _MSC_VER <= 1500 ) && defined(_WIN64)
-                return rst.m128_f32[ 0 ];
-                #else
-                float rr = _mm_cvtss_f32( rst ) ;
-                return rr;
-                #endif
-            }
-        };
-
-        /*! \brief vector real type for float */
-        template<> 
-        struct FVec<double> {
-        public:
-            typedef __m128d DType;
-            /*! \brief number of float in vector */
-            const static index_t kSize = 2;
-            /*! \brief data content */
-            DType data_;
-        public:
-            /* constructors */
-            FVec( void ){}
-            FVec( DType data ):data_(data){}
-            /* set the float */
-            FVec( const double &s ){
-                data_ = _mm_set1_pd( s );
-            }
-            /*!\brief load from pointer src */
-            FVec( const double *src ){
-                data_ = _mm_load_pd( src );                
-            } 
-        public:
-            /*! \brief store data into dst space */
-            inline void Store( double *dst ) const{
-                return _mm_store_pd( dst, data_ );
-            }
-            /*! \brief sum of all content */
-            inline double Sum( void ) const{
-                DType tmp =  _mm_add_sd( data_, _mm_unpackhi_pd( data_,data_ ) ) ;
-                #if defined(_MSC_VER) && ( _MSC_VER <= 1500 ) && defined(_WIN64)
-                return tmp.m128d_f64[0];
-                #else
-                double ans = _mm_cvtsd_f64( tmp );
-                return ans;
-                #endif
-            }
-        };
-    };
-
-    namespace sse2{
-        /*! \brief sse2 operator type of certain operator */
-        template<typename OP>
-        struct SSEOp{
-            const static bool kEnabled = false;
-        };        
-        template<>
-        struct SSEOp<op::plus>{
-            const static bool kEnabled = true;
-            MSHADOW_CINLINE static FVec<float> Map( const FVec<float> &lhs, const FVec<float> &rhs ){
-                return FVec<float>( _mm_add_ps( lhs.data_, rhs.data_ ) );
-            }
-            MSHADOW_CINLINE static FVec<double> Map( const FVec<double> &lhs, const FVec<double> &rhs ){
-                return FVec<double>( _mm_add_pd( lhs.data_, rhs.data_ ) );
-            }
-        };
-        template<>
-        struct SSEOp<op::minus>{
-            const static bool kEnabled = true;
-            MSHADOW_CINLINE static FVec<float> Map( const FVec<float> &lhs, const FVec<float> &rhs ){
-                return FVec<float>( _mm_sub_ps( lhs.data_, rhs.data_ ) );
-            }
-            MSHADOW_CINLINE static FVec<double> Map( const FVec<double> &lhs, const FVec<double> &rhs ){
-                return FVec<double>( _mm_sub_pd( lhs.data_, rhs.data_ ) );
-            }
-        };
-        template<>
-        struct SSEOp<op::mul>{
-            const static bool kEnabled = true;
-            MSHADOW_CINLINE static FVec<float> Map( const FVec<float> &lhs, const FVec<float> &rhs ){
-                return FVec<float>( _mm_mul_ps( lhs.data_, rhs.data_ ) );
-            }
-            MSHADOW_CINLINE static FVec<double> Map( const FVec<double> &lhs, const FVec<double> &rhs ){
-                return FVec<double>( _mm_mul_pd( lhs.data_, rhs.data_ ) );
-            }
-        };
-        template<>
-        struct SSEOp<op::div>{
-            const static bool kEnabled = true;
-            MSHADOW_CINLINE static FVec<float> Map( const FVec<float> &lhs, const FVec<float> &rhs ){
-                return FVec<float>( _mm_div_ps( lhs.data_, rhs.data_ ) );
-            }
-            MSHADOW_CINLINE static FVec<double> Map( const FVec<double> &lhs, const FVec<double> &rhs ){
-                return FVec<double>( _mm_div_pd( lhs.data_, rhs.data_ ) );
-            }
-        };
-
-        template<>
-        struct SSEOp<op::identity>{
-            const static bool kEnabled = true;
-            MSHADOW_CINLINE static FVec<float> Map( const FVec<float> &src ){
-                return src;
-            }
-            MSHADOW_CINLINE static FVec<double> Map( const FVec<double> &src ){
-                return src;
-            }
-        };
-    }; // namespace sse2
-    
-    namespace sse2{
-        // savers to do storage
-        template<typename SV, typename TFloat>
-        struct Saver{
-            MSHADOW_CINLINE static void Save( TFloat *dst, const FVec<TFloat> &src ){
-                FVec<TFloat> lhs( dst );
-                FVec<TFloat> ans = SSEOp<typename SV::OPType>::Map( lhs, src );
-                ans.Store( dst );
-            }
-        };
-        template<typename TFloat>
-        struct Saver<sv::saveto,TFloat>{
-            MSHADOW_CINLINE static void Save( TFloat *dst, const FVec<TFloat> &src ){
-                src.Store( dst );
-            }
-        };        
-    }; // namespace sse2
-}; // namespace mshadow
-
-namespace mshadow{
-    namespace expr{
-        // same as plan, but use sse2
-        template<typename ExpType>
-        class SSEPlan {
-        public:
-            /*!
-             * \brief evaluate the expression at index [y][x], x will be aligned to 4
-             *        to be implemented by SubType
-             */
-            MSHADOW_CINLINE sse2::FVec<real_t> EvalSSE( index_t y, index_t x ) const;
-            MSHADOW_CINLINE real_t Eval( index_t y, index_t x ) const;
-        };
-
-        template <typename Device, int dim>
-        class SSEPlan< Tensor<Device,dim> >{
-        public:
-            SSEPlan( const Tensor<Device,dim> &t )
-                :dptr_(t.dptr),stride_(t.shape.stride_){}
-            MSHADOW_CINLINE sse2::FVec<real_t> EvalSSE( index_t y, index_t x ) const{
-                return sse2::FVec<real_t>( &dptr_[ y*stride_+x ] );
-            }
-            MSHADOW_CINLINE real_t Eval( index_t y, index_t x ) const{
-                return dptr_[ y * stride_ + x ];
-            }
-        private:
-            const real_t  *dptr_;
-            index_t stride_;
-        };
-
-        template<>
-        class SSEPlan<ScalarExp>{
-        public:
-            SSEPlan( real_t scalar ):scalar_(scalar){}
-            MSHADOW_CINLINE sse2::FVec<real_t> EvalSSE( index_t y, index_t x ) const{
-                return sse2::FVec<real_t>( scalar_ );
-            }
-            MSHADOW_CINLINE real_t Eval( index_t y, index_t x ) const{
-                return scalar_;
-            }
-        private:
-            real_t scalar_;
-        };
-
-        template<typename OP, typename TA, typename TB,int etype>
-        class SSEPlan< BinaryMapExp<OP,TA,TB,etype> >{
-        public:
-            SSEPlan( const SSEPlan<TA> &lhs, const SSEPlan<TB> &rhs )
-                :lhs_(lhs), rhs_(rhs){}
-            MSHADOW_CINLINE sse2::FVec<real_t> EvalSSE( index_t y, index_t x ) const{
-                return sse2::SSEOp<OP>::Map( lhs_.EvalSSE( y, x ), rhs_.EvalSSE( y, x ) );
-            }
-            MSHADOW_CINLINE real_t Eval( index_t y, index_t x ) const{
-                return OP::Map( lhs_.Eval( y, x ), rhs_.Eval( y, x ) );
-            }
-        private:
-            SSEPlan<TA> lhs_;
-            SSEPlan<TB> rhs_;
-        };
-
-        template<typename OP, typename TA, int etype>
-        class SSEPlan< UnaryMapExp<OP,TA,etype> >{
-        public:
-            SSEPlan( const SSEPlan<TA> &src ):src_(src){}
-            MSHADOW_CINLINE sse2::FVec<real_t> EvalSSE( index_t y, index_t x ) const{
-                return sse2::SSEOp<OP>::Map( src_.EvalSSE( y, x ) );
-            }
-            MSHADOW_CINLINE real_t Eval( index_t y, index_t x ) const{
-                return OP::Map( src_.Eval( y, x ) );
-            }
-        private:
-            SSEPlan<TA> src_;
-        };
-
-        template<typename OP, typename TA, typename TB, int etype>
-        inline SSEPlan< BinaryMapExp<OP,TA,TB,etype> > MakeSSEPlan( const BinaryMapExp<OP,TA,TB,etype> &e );
-
-        inline SSEPlan<ScalarExp> MakeSSEPlan( const ScalarExp &e ){
-            return SSEPlan<ScalarExp>( e.scalar_ );
-        }
-
-        template<typename T>
-        inline SSEPlan<T> MakeSSEPlan( const ContainerExp<T> &e ){
-            return SSEPlan<T>( e.self() );
-        }
-
-        template<typename T,int dim>
-        inline SSEPlan<T> MakeSSEPlan( const MakeTensorExp<T,cpu,dim> &e ){
-            return SSEPlan<T>( e.real_self() );
-        }
-
-        template<typename OP, typename TA, int etype>
-        inline SSEPlan< UnaryMapExp<OP,TA,etype> > MakeSSEPlan( const UnaryMapExp<OP,TA,etype> &e ){
-            return SSEPlan< UnaryMapExp<OP,TA,etype> >( MakeSSEPlan(e.src_) );
-        }
-
-        template<typename OP, typename TA, typename TB, int etype>
-        inline SSEPlan< BinaryMapExp<OP,TA,TB,etype> > MakeSSEPlan( const BinaryMapExp<OP,TA,TB,etype> &e ){
-                return SSEPlan< BinaryMapExp<OP,TA,TB,etype> >( MakeSSEPlan(e.lhs_), MakeSSEPlan(e.rhs_) );
-        }
-    };
-
-    namespace expr{
-        /*!
-         * \brief static check sse enable
-         *        if a expression E can not be evaluated using sse, then kPass = false
-         * \tparam Device the type of Device
-         * \tparam dim dimension of the tensor
-         * \tparam E expression
-         */
-        template<typename E>
-        struct SSECheck{
-            const static bool kPass = false;
-        };
-        template<>
-        struct SSECheck<ScalarExp>{
-            const static bool kPass = true;
-        };
-        template<int dim>
-        struct SSECheck<Tensor<cpu,dim> >{
-            const static bool kPass = true;
-        };
-        
-        template<typename OP, typename TA, int etype>
-        struct SSECheck<UnaryMapExp<OP,TA,etype> >{
-            const static bool kPass = SSECheck<TA>::kPass && sse2::SSEOp<OP>::kEnabled;
-        };
-        template<typename OP, typename TA, typename TB, int etype>
-        struct SSECheck< BinaryMapExp<OP,TA,TB,etype> >{
-            const static bool kPass = SSECheck<TA>::kPass && SSECheck<TB>::kPass && sse2::SSEOp<OP>::kEnabled;
-        }; 
-    }; // namespace expr
-    namespace expr{
-        // check if data is aligned and allow sse operation
-        template<int dim,typename E>
-        struct SSEAlignCheck{
-            inline static bool Check( const E &exp ){
-                return false;
-            }
-        };
-        template<int dim>
-        struct SSEAlignCheck< dim, ScalarExp >{
-            inline static bool Check( const ScalarExp &exp ){
-                return true;
-            }
-        };
-        template<int dim>
-        struct SSEAlignCheck< dim,Tensor<cpu,dim> >{
-            inline static bool Check( const Tensor<cpu,dim> &t ){
-                return sse2::CheckAlign( t.dptr ) && sse2::CheckAlign( t.shape.stride_ * sizeof( real_t ) );
-            }
-        };
-        template<int dim, typename OP, typename TA, int etype>
-        struct SSEAlignCheck< dim, UnaryMapExp<OP,TA,etype> >{
-            inline static bool Check( const UnaryMapExp<OP,TA,etype> &t ){
-                return SSEAlignCheck<dim,TA>::Check( t.src_);
-            }
-        };
-        template<int dim, typename OP, typename TA, typename TB, int etype>
-        struct SSEAlignCheck< dim, BinaryMapExp<OP,TA,TB,etype> >{ 
-            inline static bool Check( const BinaryMapExp<OP,TA,TB,etype> &t ){
-                return SSEAlignCheck<dim,TA>::Check( t.lhs_ ) && 
-                    SSEAlignCheck<dim,TB>::Check( t.rhs_ );
-            }
-        };
-    }; // namespace expr
-
-    /*! 
-     * \brief use SSEPlan to compute result
-     */
-    template<typename SV, typename E, int dim>
-    inline void MapSSEPlan(Tensor<cpu,dim> _dst, const expr::SSEPlan<E> &plan){        
-        Tensor<cpu,2> dst = _dst.FlatTo2D();
-        const index_t xlen = sse2::LowerAlign( dst.shape[0], sizeof(real_t) );
-        for ( index_t y = 0; y < dst.shape[1]; y ++ ) {
-            for( index_t x = 0; x < xlen; x += sse2::FVec<real_t>::kSize ){
-                sse2::Saver<SV,real_t>::Save( &dst[y][x], plan.EvalSSE( y,x ) );
-            }
-            for( index_t x = xlen; x < dst.shape[0]; x ++ ){
-                SV::Save( dst[y][x], plan.Eval(y,x) );
-            }
-        }
-    }
-}; // namespace mshadow
-#endif // MSHADOW_USE_SSE
-#endif // MSHADOW_TENSOR_SSE_INL_HPP

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/comm/msg.h
----------------------------------------------------------------------
diff --git a/include/singa/comm/msg.h b/include/singa/comm/msg.h
deleted file mode 100644
index 8e03cd5..0000000
--- a/include/singa/comm/msg.h
+++ /dev/null
@@ -1,243 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_COMM_MSG_H_
-#define SINGA_COMM_MSG_H_
-
-#include <utility>
-
-// TODO(wangwei): make it a compiler argument
-// #define USE_ZMQ
-
-#include <vector>
-#ifdef USE_ZMQ
-#include <czmq.h>
-#endif
-
-namespace singa {
-/**
- * Wrapper to generate message address
- * @param grp worker/server group id
- * @param id_or_proc worker/server id or procs id
- * @param type msg type
- */
-inline int Addr(int grp, int id_or_proc, int type) {
-  return (grp << 16) | (id_or_proc << 8) | type;
-}
-
-/**
- * Parse group id from addr.
- *
- * @return group id
- */
-inline int AddrGrp(int addr) {
-  return addr >> 16;
-}
-
-/**
- * Parse worker/server id from addr.
- *
- * @return id
- */
-inline int AddrID(int addr) {
-  static const int mask = (1 << 8) - 1;
-  return (addr >> 8) & mask;
-}
-
-/**
- * Parse worker/server procs from addr.
- *
- * @return procs id
- */
-inline int AddrProc(int addr) {
-  return AddrID(addr);
-}
-
-/**
- * Parse msg type from addr
- * @return msg type
- */
-inline int AddrType(int addr) {
-  static const int mask = (1 << 8) -1;
-  return addr & mask;
-}
-
-/**
- * Msg used to transfer Param info (gradient or value), feature blob, etc.
- * between workers, stubs and servers.
- *
- * Each msg has a source addr and dest addr identified by a unique integer.
- * It is also associated with a target field (value and version) for ease of
- * getting some meta info (e.g., parameter id) from the msg.
- *
- * Other data is added into the message as frames.
- */
-class Msg {
- public:
-  ~Msg();
-  Msg();
-  /**
-   * Construct the msg providing source and destination addr.
-   */
-  Msg(int src, int dst);
-  /**
-   * Copy constructor.
-   */
-  Msg(const Msg& msg);
-  /**
-   * Swap the src/dst addr
-   */
-  void SwapAddr();
-  /**
-   * Add a frame (a chunck of bytes) into the message
-   */
-  void AddFrame(const void* addr, int nBytes);
-  /**
-   * @return num of bytes of the current frame.
-   */
-  int FrameSize();
-  /**
-   * @return the pointer to the current frame data.
-   */
-  void* FrameData();
-  /**
-   * @return the data of the current frame as c string
-   */
-  char* FrameStr();
-  /**
-   * Move the cursor to the first frame.
-   */
-  void FirstFrame();
-  /**
-   * Move the cursor to the last frame.
-   */
-  void LastFrame();
-  /**
-   * Move the cursor to the next frame
-   * @return true if the next frame is not NULL; otherwise false
-   */
-  bool NextFrame();
-  /**
-   *  Add a 'format' frame to the msg (like CZMQ's zsock_send).
-   *
-   *  The format is a string that defines the type of each field.
-   *  The format can contain any of these characters, each corresponding to
-   *  one or two arguments:
-   *  i = int (signed)
-   *  1 = uint8_t
-   *  2 = uint16_t
-   *  4 = uint32_t
-   *  8 = uint64_t
-   *  p = void * (sends the pointer value, only meaningful over inproc)
-   *  s = char**
-   *
-   *  Returns size of the added content.
-   */
-  int AddFormatFrame(const char *format, ...);
-  /**
-   *  Parse the current frame added using AddFormatFrame(const char*, ...).
-   *
-   *  The format is a string that defines the type of each field.
-   *  The format can contain any of these characters, each corresponding to
-   *  one or two arguments:
-   *  i = int (signed)
-   *  1 = uint8_t
-   *  2 = uint16_t
-   *  4 = uint32_t
-   *  8 = uint64_t
-   *  p = void * (sends the pointer value, only meaningful over inproc)
-   *  s = char**
-   *
-   *  Returns size of the parsed content.
-   */
-  int ParseFormatFrame(const char* format, ...);
-
-#ifdef USE_ZMQ
-  void ParseFromZmsg(zmsg_t* msg);
-  zmsg_t* DumpToZmsg();
-#endif
-
-  /**
-   * @return msg size in terms of bytes, ignore meta info.
-   */
-  int size() const;
-  /**
-   * Set source addr.
-   * @param addr unique identify one worker/server/stub in the current job
-   */
-  inline void set_src(int addr) { src_ = addr; }
-  /**
-   * @return source addr.
-   */
-  inline int src() const { return src_; }
-  /**
-   * Set destination addr.
-   * @param addr unique identify one worker/server/stub in the current job
-   */
-  inline void set_dst(int addr) { dst_ = addr; }
-  /**
-   * @return dst addr.
-   */
-  inline int dst() const { return dst_; }
-  /**
-   * Set msg type, e.g., kPut, kGet, kUpdate, kRequest
-   */
-  inline void set_type(int type) { type_ = type; }
-  /**
-   * @return msg type.
-   */
-  inline int type() const { return type_; }
-  /**
-   * Set msg target.
-   *
-   * One msg has a target to identify some entity in worker/server/stub.
-   * The target is associated with a version, e.g., Param version.
-   */
-  inline void set_trgt(int val, int version) {
-    trgt_val_ = val;
-    trgt_version_ = version;
-  }
-  inline int trgt_val() const { return trgt_val_; }
-  inline int trgt_version() const { return trgt_version_; }
-
- protected:
-  int src_ = 0;
-  int dst_ = 0;
-  int type_ = 0;
-  int trgt_val_ = 0;
-  int trgt_version_ = 0;
-#ifdef USE_ZMQ
-  zmsg_t* msg_ = nullptr;
-  zframe_t *frame_ = nullptr;
-#else
-  std::vector<std::pair<void*, int>> frames_;
-  unsigned idx_ = 0;
-#endif
-};
-
-inline void DeleteMsg(Msg** msg) {
-  delete *msg;
-  *msg = nullptr;
-}
-
-}  // namespace singa
-
-#endif  // SINGA_COMM_MSG_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/comm/socket.h
----------------------------------------------------------------------
diff --git a/include/singa/comm/socket.h b/include/singa/comm/socket.h
deleted file mode 100644
index 40d4cc3..0000000
--- a/include/singa/comm/socket.h
+++ /dev/null
@@ -1,123 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_COMM_SOCKET_H_
-#define SINGA_COMM_SOCKET_H_
-
-#ifdef USE_ZMQ
-#include <czmq.h>
-#endif
-
-#include <map>
-#include <string>
-#include <vector>
-#include <unordered_map>
-#include "singa/utils/safe_queue.h"
-#include "singa/comm/msg.h"
-
-namespace singa {
-/**
- * Worker and Server use Dealer to communicate with Stub.
- * Stub uses Dealer to communicate with remote Stub.
- */
-class Dealer {
- public:
-   /**
-    * @param id used for identifying the msg queue of this dealer.
-    */
-   explicit Dealer(int id);
-  ~Dealer();
-  /**
-   * Setup the connection with the remote router.
-   *
-   * For local router, there is no need to connect it.
-   *
-   * @param endpoint Identifier of the remote router to connect. It follows
-   * ZeroMQ's format, i.e., IP:port, where IP is the connected process.
-   * @return 1 connection sets up successfully; 0 otherwise
-   */
-  int Connect(const std::string& endpoint);
-  /**
-   * Send a message to the local router (id=-1) or remote outer. It is
-   * non-blocking. The message will be deallocated after sending, thus
-   * should not be used after calling Send();
-   */
-  int Send(Msg** msg);
-  /**
-   * Recv msg from local router.
-   *
-   * @param timeout return if waiting for timeout microseconds.
-   * @return a message pointer if success; nullptr if failure
-   */
-  Msg* Receive(int timeout = 0);
-
- protected:
-  std::string endpoint_;
-  int id_;
-#ifdef USE_ZMQ
-  zsock_t* dealer_ = nullptr;
-#endif
-};
-/**
- * In Singa, since each process has one router used by Stub, hence we fix the
- * router to use the msg queue indexed by -1.
- */
-class Router {
- public:
-  ~Router();
-  Router();
-  /**
-   * Bind the router to an endpoint for recv msg from remote dealer.
-   * If the router is used for intra-communication only, then no need to call
-   * Bind.
-   *
-   * @param endpoint identifier for the Dealer socket in other process
-   * to connect. It has the format IP:Port, where IP is the host machine.
-   * @return number of connected dealers.
-   */
-  int Bind(const std::string& endpoint);
-  /**
-   * Send msg to local dealers by pushing the msg into the msg queue indexed by
-   * dst of the msg.
-   */
-  int Send(Msg** msg);
-  /**
-   * Recv msg from local (msg queue) or remote dealer (via zmq).
-   */
-  Msg* Receive(int timeout = 0);
-
- protected:
-  std::string endpoint_;
-#ifdef USE_ZMQ
-  zsock_t* router_ = nullptr;
-  zpoller_t* poller_ = nullptr;
-#endif
-};
-
-/**
- * Used for intra-process communication.
- * Each dealer/router has a SafeQueue for recieving msgs.
- * The sender pushes msgs onto the queue of the reciever's queue.
- */
-extern std::unordered_map<int, SafeQueue<Msg*>> msgQueues;
-}  // namespace singa
-
-#endif  // SINGA_COMM_SOCKET_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
new file mode 100644
index 0000000..ef7c208
--- /dev/null
+++ b/include/singa/core/device.h
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+namespace singa {
+
+/// Allocate memory for Tensor objects and execute Tensor operations.
+class Device {
+
+
+
+
+};
+
+}  /* singa */

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/core/memory.h
----------------------------------------------------------------------
diff --git a/include/singa/core/memory.h b/include/singa/core/memory.h
new file mode 100644
index 0000000..0a7aa82
--- /dev/null
+++ b/include/singa/core/memory.h
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+namespace singa {
+
+/// Manage device memory pool including garbage collection, memory opt.
+class VirtualMemory {
+
+
+
+
+};
+
+}  /* singa */

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/core/scheduler.h
----------------------------------------------------------------------
diff --git a/include/singa/core/scheduler.h b/include/singa/core/scheduler.h
new file mode 100644
index 0000000..6071b33
--- /dev/null
+++ b/include/singa/core/scheduler.h
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+namespace singa {
+
+/// Scheduling Tensor operations with dependency detection.
+class Scheduler {
+
+
+
+
+};
+
+}  /* singa */

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
new file mode 100644
index 0000000..795891d
--- /dev/null
+++ b/include/singa/core/tensor.h
@@ -0,0 +1,30 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+namespace singa {
+
+/// A multi-dimensional array resident on a device.
+class Tensor {
+
+
+
+
+};
+
+}  /* singa */

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/driver.h
----------------------------------------------------------------------
diff --git a/include/singa/driver.h b/include/singa/driver.h
deleted file mode 100644
index 0105158..0000000
--- a/include/singa/driver.h
+++ /dev/null
@@ -1,264 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-#ifndef SINGA_DRIVER_H_
-#define SINGA_DRIVER_H_
-
-#include <vector>
-#include <string>
-#include "singa/proto/job.pb.h"
-#include "singa/proto/singa.pb.h"
-#include "singa/utils/factory.h"
-#include "singa/utils/param.h"
-#include "singa/utils/singleton.h"
-#include "singa/utils/updater.h"
-#include "singa/neuralnet/layer.h"
-#include "singa/worker.h"
-#include "singa/server.h"
-
-namespace singa {
-using std::vector;
-class Driver {
- public:
-  /**
-   * Init SINGA
-   * - init glog
-   * - parse job id and job conf from cmd line
-   * - register built-in layer, worker, updater, param subclasses.
-   *
-   * May be used for MPI init if it is used for message passing.
-   */
-  void Init(int argc, char** argv);
-  /**
-   * Init SINGA LOG
-   * Used for python binding. Users can also directly call it as a C++ API.
-   * - init glog with given parameters
-   *
-   */
-  void InitLog(char *arg);
-  /**
-   * Update job configuration and call Train(const JobProto&) to start the
-   * training.
-   *
-   * It sets up the logging path and checkpoing files (if resume), and checks
-   * the existence of the workspace folder .
-   *
-   * @param[in] resume if true resume the training from the latest checkpoint
-   * files.
-   * @param[in] job_conf job configuration.
-   */
-  void Train(bool resume, const JobProto& job_conf);
-  /**
-   * Used for python binding. Users can also directly call it as a C++ API.
-   *
-   * It completes the functions as defined above but accept serialized string
-   * parameters.
-   *
-   * @param[in] resume if true resume the training from the latest checkpoint
-   * files.
-   * @param[in] str serialized string recorded job configuration.
-   */
-  void Train(bool resume, const std::string str);
-  /**
-   * Create workers and servers to conduct the training.
-   *
-   * @param[in] job_conf job configuration with all necessary fields set (e.g.,
-   * by Train(bool, const JobProto&).
-   */
-  void Train(const JobProto& job_conf);
-  /**
-   * Test the pre-trained model by loading parameters from checkpoint files.
-   *
-   * It can be used for both computing accuracy of test data, and extracting
-   * features (predicting label) of new data.
-   * @param[in] job_conf job configuration, which should include the checkpoint
-   * files and test settings (e.g., test steps). To extract features, the output
-   * layers should be added.
-   */
-  void Test(const JobProto& job_conf);
-  /**
-   * Used for python binding. Users can also directly call it as a C++ API.
-   *
-   * It completes the functions as defined above but accept serialized string
-   * parameters.
-   *
-   * @param[in] str serialized string recorded job configuration.
-   */
-  void Test(const std::string str);
-  /**
-   * Setting the checkpoint field of the job configuration to resume training.
-   *
-   * The checkpoint folder will be searched to get the files for the latest
-   * checkpoint, which will be added into the checkpoint field. The workers
-   * would then load the values of params from the checkpoint files.
-   *
-   * @param job_conf job configuration
-   */
-  void SetupForResume(JobProto* job_conf);
-  /**
-   * Create server instances.
-   *
-   * @param[in] job_conf job configuration.
-   * @param[in] net training neural network.
-   * @return server instances
-   */
-  const vector<Server*> CreateServers(const JobProto& job_conf, NeuralNet* net);
-  /**
-   * Create workers instances.
-   * @param[in] job_conf job configuration.
-   * @param[in] net training neural network.
-   * @return worker instances
-   */
-  const vector<Worker*> CreateWorkers(const JobProto& job_conf, NeuralNet* net);
-
-
-  /*********** Subclasses registers *************************/
-  /**
-   * Register a Layer subclass.
-   *
-   * @param type layer type ID. If called to register built-in subclasses,
-   * it is from LayerType; if called to register user-defined
-   * subclass, it is a string;
-   * @return 0 if success; otherwise -1.
-   */
-  template<typename Subclass, typename Type>
-  int RegisterLayer(const Type& type);
-  /**
-   * Register an Updater subclass.
-   *
-   * @param type ID of the subclass. If called to register built-in subclasses,
-   * it is from UpdaterType; if called to register user-defined
-   * subclass, it is a string;
-   * @return 0 if success; otherwise -1.
-   */
-  template<typename Subclass, typename Type>
-  int RegisterUpdater(const Type& type);
-  /**
-   * Register a learning rate generator subclasses.
-   *
-   * @param type ID of the subclass. If called to register built-in subclasses,
-   * it is from ChangeMethod; if called to register user-defined
-   * subclass, it is a string;
-   * @return 0 if success; otherwise -1.
-   */
-  template<typename Subclass, typename Type>
-  int RegisterLRGenerator(const Type& type);
-  /**
-   * Register a Worker subclass.
-   *
-   * @param type ID of the subclass. If called to register built-in subclasses,
-   * it is from TrainOneBatchAlg; if called to register user-defined
-   * subclass, it is a string;
-   * @return 0 if success; otherwise -1.
-   */
-  template<typename Subclass, typename Type>
-  int RegisterWorker(const Type& type);
-  /**
-   * Register a Param subclass.
-   * @param type ID of the subclass. If called to register built-in subclasses,
-   * it is from ParamType; if called to register user-defined
-   * subclass, it is a string;
-   *
-   * @return 0 if success; otherwise -1.
-   */
-  template<typename Subclass, typename Type>
-  int RegisterParam(const Type& type);
-  /**
-   * Register ParamGenerator subclasses for initalizing Param objects.
-   *
-   * @param type ID of the subclass. If called to register built-in subclasses,
-   * it is from InitMethod; if called to register user-defined
-   * subclass, it is a string;
-   * @return 0 if success; otherwise -1.
-   */
-  template<typename Subclass, typename Type>
-  int RegisterParamGenerator(const Type& type);
-
-  /****************** Access function ********************/
-  /**
-   * @return job ID which is generated by zookeeper and passed in by the
-   * launching script.
-   */
-  inline int job_id() const { return job_id_; }
-  /**
-   * @return job conf path which is passed by users at the command line. It
-   * should at least contains the cluster configuration.
-   */
-  inline JobProto job_conf() const { return job_conf_; }
-
- private:
-  int job_id_;
-  std::string hostip_;
-  JobProto job_conf_;
-  SingaProto singa_conf_;
-};
-
-/************* Implementation of template functions*************************
-* Must put the implementation in driver.h file instead of driver.cc.
-* Otherwise there would be linking error caused by unknown registration
-* functions, becuase these function cannot be generated merely based on its
-* declearation in driver.h.
-*/
-
-template<typename Subclass, typename Type>
-int Driver::RegisterLayer(const Type& type) {
-  auto factory = Singleton<Factory<singa::Layer>>::Instance();
-  factory->Register(type, CreateInstance(Subclass, Layer));
-  return 1;
-}
-
-template<typename Subclass, typename Type>
-int Driver::RegisterParam(const Type& type) {
-  auto factory = Singleton<Factory<singa::Param>>::Instance();
-  factory->Register(type, CreateInstance(Subclass, Param));
-  return 1;
-}
-
-template<typename Subclass, typename Type>
-int Driver::RegisterParamGenerator(const Type& type) {
-  auto factory = Singleton<Factory<singa::ParamGenerator>>::Instance();
-  factory->Register(type, CreateInstance(Subclass, ParamGenerator));
-  return 1;
-}
-
-template<typename Subclass, typename Type>
-int Driver::RegisterUpdater(const Type& type) {
-  auto factory = Singleton<Factory<singa::Updater>>::Instance();
-  factory->Register(type, CreateInstance(Subclass, Updater));
-  return 1;
-}
-
-template<typename Subclass, typename Type>
-int Driver::RegisterLRGenerator(const Type& type) {
-  auto factory = Singleton<Factory<singa::LRGenerator>>::Instance();
-  factory->Register(type, CreateInstance(Subclass, LRGenerator));
-  return 1;
-}
-
-template<typename Subclass, typename Type>
-int Driver::RegisterWorker(const Type& type) {
-  auto factory = Singleton<Factory<singa::Worker>>::Instance();
-  factory->Register(type, CreateInstance(Subclass, Worker));
-  return 1;
-}
-
-}  // namespace singa
-
-#endif  // SINGA_DRIVER_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/io/hdfs_store.h
----------------------------------------------------------------------
diff --git a/include/singa/io/hdfs_store.h b/include/singa/io/hdfs_store.h
deleted file mode 100644
index 1fb9258..0000000
--- a/include/singa/io/hdfs_store.h
+++ /dev/null
@@ -1,58 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_IO_HDFS_STORE_H_
-#define SINGA_IO_HDFS_STORE_H_
-
-#include <string>
-#include "singa/io/store.h"
-#include "singa/io/hdfsfile.h"
-
-namespace singa {
-namespace io {
-
-/**
- * HDFS implementation of the Store interface. The store manages key-value 
- * records storing in HDFS files. 
- *
- * The store consists of records of the following format:
- *      [<length><content>] 
- */
-class HDFSStore : public Store {
- public:
-  ~HDFSStore() { Close();}
-  bool Open(const std::string& source, Mode mode) override;
-  void Close() override;
-  bool Read(std::string* key, std::string* value) override;
-  void SeekToFirst() override;
-  void Seek(int offset) override;
-  bool Write(const std::string& key, const std::string& value) override;
-  void Flush() override;
-
- private:
-  HDFSFile* file_ = nullptr;
-  Mode mode_;
-};
-
-}  // namespace io
-}  // namespace singa
-
-#endif  // SINGA_IO_HDFS_STORE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/io/hdfsfile.h
----------------------------------------------------------------------
diff --git a/include/singa/io/hdfsfile.h b/include/singa/io/hdfsfile.h
deleted file mode 100644
index cd3ded3..0000000
--- a/include/singa/io/hdfsfile.h
+++ /dev/null
@@ -1,131 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_IO_HDFSFILE_H_
-#define SINGA_IO_HDFSFILE_H_
-
-#include <fstream>
-#include <string>
-#include <unordered_set>
-
-
-#define USE_PROTOBUF 1
-
-#ifdef USE_PROTOBUF
-#include <google/protobuf/message.h>
-#endif
-
-#include <hdfs/hdfs.h>
-
-namespace singa {
-namespace io {
-
-/**
- * HDFSFile represents a specific partition of the HDFS file storing training/validation
- * or test data. HDFS library maintains its own buffer, so we don't need one. 
- * 
- * Each record is of the form: <length><content>
- */
-class HDFSFile {
- public:
-  enum Mode {
-    // read only mode used in training
-    kRead = 0,
-    // write mode used in creating HDFSFile (will overwrite previous one)
-    kCreate = 1,
-    // append mode, e.g. used when previous creating crashes
-    kAppend = 2
-  };
-
-  /**
-   * HDFSFile constructor.
-   *
-   * @param path path to file, of the form "hdfs://namenode/file_path"
-   * @param mode HDFSFile::kRead, HDFSFile::kCreate or HDFSFile::kAppend
-   */
-  HDFSFile(const std::string& path, Mode mode);
-  ~HDFSFile();
-
-#ifdef USE_PROTOBUF
-  /**
-   * read next tuple from the HDFSFile.
-   *
-   * @param val Record of type Message
-   * @return false if read unsuccess, e.g., the tuple was not inserted
-   *         completely.
-   */
-  bool Next(google::protobuf::Message* val);
-  /**
-   * Append one record to the HDFSFile.
-   *
-   * @param val
-   * @return false if unsucess, e.g., inserted before
-   */
-  bool Insert(const google::protobuf::Message& tuple);
-#endif
-
-  /**
-   * Read next record from the HDFSFile.
-   *
-   * @param val Record of type string
-   * @return false if unsuccess, e.g. the tuple was not inserted completely.
-   */
-  bool Next(std::string* val);
-  /**
-   * Append record to the KVFile.
-   *
-   * @param key e.g., image path
-   * @param val
-   * @return false if unsucess, e.g., inserted before
-   */
-  bool Insert(const std::string& tuple);
-  /**
-   * Move the read pointer to the head of the KVFile file.
-   * Used for repeated reading.
-   */
-  void Seek(int offset);
-
-  /**
-   * Flush buffered data to disk.
-   * Used only for kCreate or kAppend.
-   */
-  void Flush();
-    /**
-   * @return path to HDFSFile file
-   */
-  inline std::string path() { return path_; }
-
- private:
-  std::string path_ = "";
-  Mode mode_;
-  // handle to HDFS
-  hdfsFS fs_;
-  // handle to the HDFS open file
-  hdfsFile file_;
-
-  //!< to avoid replicated record
-  std::unordered_set<std::string> keys_;
-};
-}  // namespace io
-
-}  // namespace singa
-
-#endif  // SINGA_IO_HDFSFILE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/io/imagefolder_store.h
----------------------------------------------------------------------
diff --git a/include/singa/io/imagefolder_store.h b/include/singa/io/imagefolder_store.h
deleted file mode 100644
index c05d92d..0000000
--- a/include/singa/io/imagefolder_store.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-// TODO(wangwei) store images in a disk folder

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/io/kvfile.h
----------------------------------------------------------------------
diff --git a/include/singa/io/kvfile.h b/include/singa/io/kvfile.h
deleted file mode 100644
index 6d9a709..0000000
--- a/include/singa/io/kvfile.h
+++ /dev/null
@@ -1,182 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_IO_KVFILE_H_
-#define SINGA_IO_KVFILE_H_
-
-#include <fstream>
-#include <string>
-#include <unordered_set>
-
-#define USE_PROTOBUF 1
-
-#ifdef USE_PROTOBUF
-#include <google/protobuf/message.h>
-#endif
-
-namespace singa {
-namespace io {
-
-/**
- * KVFile stores training/validation/test tuples.
- * Every worker node should have a KVFile for training data (validation/test
- * KVFile is optional).
- * KVFile consists of a set of unordered tuples. Each tuple is
- * encoded as [key_len key val_len val] (key_len and val_len are of type
- * uint32, which indicate the bytes of key and value respectively.
- *
- * When KVFile is created, it will remove the last tuple if the value size
- * and key size do not match because the last write crashed.
- *
- * TODO(wangwei) split one KVFile into multiple KVFile s.
- *
- */
-class KVFile {
- public:
-  enum Mode {
-    // read only mode used in training
-    kRead = 0,
-    // write mode used in creating KVFile (will overwrite previous one)
-    kCreate = 1,
-    // append mode, e.g. used when previous creating crashes
-    kAppend = 2
-  };
-
-  /**
-   * KVFile constructor.
-   *
-   * @param path path to the disk KVFile, it can be
-   *  - a path to local disk file.
-   *  - a path to local directory. This is to be compatible with the older
-   *    version (DataShard). The KVFile is shard.dat under that directory
-   *  - a hdfs file starting with "hdfs://"
-   * @param mode KVFile open mode, KVFile::kRead, KVFile::kWrite or
-   * KVFile::kAppend
-   * @param bufsize Cache bufsize bytes data for every disk op (read or write),
-   * default is 10MB.
-   */
-  KVFile(const std::string& path, Mode mode, int bufsize = 10485760);
-  ~KVFile();
-
-#ifdef USE_PROTOBUF
-  /**
-   * read next tuple from the KVFile.
-   *
-   * @param key Tuple key
-   * @param val Record of type Message
-   * @return false if read unsuccess, e.g., the tuple was not inserted
-   *         completely.
-   */
-  bool Next(std::string* key, google::protobuf::Message* val);
-  /**
-   * Append one tuple to the KVFile.
-   *
-   * @param key e.g., image path
-   * @param val
-   * @return false if unsucess, e.g., inserted before
-   */
-  bool Insert(const std::string& key, const google::protobuf::Message& tuple);
-#endif
-  /**
-   * read next tuple from the KVFile.
-   *
-   * @param key Tuple key
-   * @param val Record of type string
-   * @return false if unsuccess, e.g. the tuple was not inserted completely.
-   */
-  bool Next(std::string* key, std::string* val);
-  /**
-   * Append one tuple to the KVFile.
-   *
-   * @param key e.g., image path
-   * @param val
-   * @return false if unsucess, e.g., inserted before
-   */
-  bool Insert(const std::string& key, const std::string& tuple);
-  /**
-   * Move the read pointer to the head of the KVFile file.
-   * Used for repeated reading.
-   */
-  void SeekToFirst();
-  /**
-   * Flush buffered data to disk.
-   * Used only for kCreate or kAppend.
-   */
-  void Flush();
-  /**
-   * Iterate through all tuples to get the num of all tuples.
-   *
-   * @return num of tuples
-   */
-  int Count();
-  /**
-   * @return path to KVFile file
-   */
-  inline std::string path() { return path_; }
-
- protected:
-  /**
-   * Read the next key and prepare buffer for reading value.
-   *
-   * @param key
-   * @return length (i.e., bytes) of value field.
-   */
-  int Next(std::string* key);
-  /**
-   * Setup the disk pointer to the right position for append in case that
-   * the pervious write crashes.
-   *
-   * @param path KVFile path.
-   * @return offset (end pos) of the last success written record.
-   */
-  int PrepareForAppend(const std::string& path);
-  /**
-   * Read data from disk if the current data in the buffer is not a full field.
-   *
-   * @param size size of the next field.
-   */
-  bool PrepareNextField(int size);
-
- private:
-  std::string path_ = "";
-  Mode mode_;
-  //!< either ifstream or ofstream
-  std::fstream fdat_;
-  //!< to avoid replicated record
-  std::unordered_set<std::string> keys_;
-  //!< internal buffer
-  char* buf_ = nullptr;
-  //!< offset inside the buf_
-  int offset_ = 0;
-  //!< allocated bytes for the buf_
-  int capacity_ = 0;
-  //!< bytes in buf_, used in reading
-  int bufsize_ = 0;
-};
-}  // namespace io
-
-/**
- * @deprecated {ShardData is deprecated! Use KVFile}.
- */
-using DataShard = io::KVFile;
-}  // namespace singa
-
-#endif  // SINGA_IO_KVFILE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/io/kvfile_store.h
----------------------------------------------------------------------
diff --git a/include/singa/io/kvfile_store.h b/include/singa/io/kvfile_store.h
deleted file mode 100644
index 50b8f4f..0000000
--- a/include/singa/io/kvfile_store.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_IO_KVFILE_STORE_H_
-#define SINGA_IO_KVFILE_STORE_H_
-
-#include <string>
-#include "singa/io/store.h"
-#include "singa/io/kvfile.h"
-
-namespace singa {
-namespace io {
-
-/**
- * Use the KVFile as the data storage.
- *
- * KVFile is a binary file. Each tuple is stored as byte string.
- */
-class KVFileStore : public Store {
- public:
-  ~KVFileStore() { Close();}
-  bool Open(const std::string& source, Mode mode) override;
-  void Close() override;
-  bool Read(std::string* key, std::string* value) override;
-  void SeekToFirst() override;
-  void Seek(int offset) override;
-  bool Write(const std::string& key, const std::string& value) override;
-  void Flush() override;
-
- private:
-  KVFile* file_ = nullptr;
-  Mode mode_;
-};
-
-}  // namespace io
-}  // namespace singa
-
-#endif  // SINGA_IO_KVFILE_STORE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/io/store.h
----------------------------------------------------------------------
diff --git a/include/singa/io/store.h b/include/singa/io/store.h
deleted file mode 100644
index a63a981..0000000
--- a/include/singa/io/store.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_IO_STORE_H_
-#define SINGA_IO_STORE_H_
-
-#include <string>
-
-namespace singa {
-namespace io {
-
-using std::string;
-enum Mode { kCreate, kRead, kAppend };
-
-/**
- * General key-value store that provides functions for reading and writing
- * tuples.
- *
- * Subclasses implement the functions for a specific data storage, e.g., CSV
- * file, HDFS, image folder, singa::io::SFile, leveldb, lmdb, etc.
- */
-class Store {
- public:
-  Store() { }
-  /**
-   * In case that users forget to call Close() to release resources, e.g.,
-   * memory, you can release them here.
-   */
-  virtual ~Store() { }
-  /**
-   * @param[in] source path to the storage, could be a file path, folder path
-   * or hdfs path, or even a http url.
-   * @param[in] mode
-   * @return true if open successfully, otherwise false.
-   */
-  virtual bool Open(const std::string& source, Mode mode) = 0;
-  /**
-   * Release resources.
-   */
-  virtual void Close() = 0;
-  /**
-   * Read a tuple.
-   *
-   * @param[out] key
-   * @param[out] value
-   * @return true if read successfully, otherwise false.
-   */
-  virtual bool Read(std::string* key, std::string* value) = 0;
-  /**
-   * Seek the read header to the first tuple.
-   */
-  virtual void SeekToFirst() = 0;
-
-  /**
-   * Seek to an offset. This allows concurrent workers to start reading from
-   * different positions (HDFS). 
-   */
-  virtual void Seek(int offset) = 0; 
-  /**
-   * Write a tuple.
-   *
-   * @param[in] key
-   * @param[in] value
-   * @return true if success, otherwise false.
-   */
-  virtual bool Write(const std::string& key, const std::string& value) = 0;
-  /**
-   * Flush writing buffer if it has.
-   */
-  virtual void Flush() {}
-};
-
-/**
- * Create a Store object.
- *
- * @param[in] backend identifier for a specific backend. Two backends are
- * inluced currently, i.e., "kvfile", "textfile"
- * @return a pointer to the newly created Store.
- */
-Store* CreateStore(const string& backend);
-/**
- * Create and open a Store object.
- *
- * @param[in] backend, @see CreateStore().
- * @param[in] path
- * @param[in] mode kRead or kCreate or kAppend
- */
-Store* OpenStore(const string& backend, const string& path, Mode mode);
-
-}  // namespace io
-}  // namespace singa
-
-#endif  // SINGA_IO_STORE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/io/textfile_store.h
----------------------------------------------------------------------
diff --git a/include/singa/io/textfile_store.h b/include/singa/io/textfile_store.h
deleted file mode 100644
index 83bcbfa..0000000
--- a/include/singa/io/textfile_store.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_IO_TEXTFILE_STORE_H_
-#define SINGA_IO_TEXTFILE_STORE_H_
-
-#include <fstream>
-#include <string>
-#include "singa/io/store.h"
-
-namespace singa {
-namespace io {
-/**
- * Use text file as the data storage, one line per tuple.
- *
- * It is used for storeing CSV format data where the key is the line No. and
- * the value is the line.
- */
-class TextFileStore : public Store {
- public:
-  ~TextFileStore() { Close(); }
-  bool Open(const std::string& source, Mode mode) override;
-  void Close() override;
-  bool Read(std::string* key, std::string* value) override;
-  void SeekToFirst() override;
-  void Seek(int offset) override;
-  bool Write(const std::string& key, const std::string& value) override;
-  void Flush() override;
-
- private:
-  int lineNo_ = 0;
-  std::fstream* fs_ = nullptr;
-  Mode mode_;
-};
-
-}  // namespace io
-}  // namespace singa
-
-#endif  // SINGA_IO_TEXTFILE_STORE_H_

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/layer/conv.h
----------------------------------------------------------------------
diff --git a/include/singa/layer/conv.h b/include/singa/layer/conv.h
new file mode 100644
index 0000000..fd43018
--- /dev/null
+++ b/include/singa/layer/conv.h
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+namespace singa {
+
+class ConvLayer {
+
+
+
+
+};
+
+}  /* singa */

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/model/rnn.h
----------------------------------------------------------------------
diff --git a/include/singa/model/rnn.h b/include/singa/model/rnn.h
new file mode 100644
index 0000000..7d2c20c
--- /dev/null
+++ b/include/singa/model/rnn.h
@@ -0,0 +1,29 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+namespace singa {
+
+class RNN {
+
+
+
+
+};
+
+}  /* singa */

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/connection_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/connection_layer.h b/include/singa/neuralnet/connection_layer.h
deleted file mode 100644
index 481d991..0000000
--- a/include/singa/neuralnet/connection_layer.h
+++ /dev/null
@@ -1,187 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#ifndef SINGA_NEURALNET_CONNECTION_LAYER_H_
-#define SINGA_NEURALNET_CONNECTION_LAYER_H_
-
-#include <string>
-#include <unordered_map>
-#include <vector>
-#include "singa/comm/socket.h"
-#include "singa/neuralnet/layer.h"
-
-namespace singa {
-/**
- * Used inside SplitLayer and SliceLayer to locate the out-going connection
- * index given the Layer pointer.
- */
-class Layer2Index {
- public:
-  int Get(const Layer* layer) {
-    if (layer2idx_.find(layer) == layer2idx_.end()) {
-      int idx =  layer2idx_.size();
-      layer2idx_[layer] = idx;
-    }
-    return layer2idx_[layer];
-  }
-
- private:
-  std::unordered_map<const Layer*, int> layer2idx_;
-};
-
-
-class BridgeLayer : public ConnectionLayer {
- public:
-  void set_ready(bool a) { ready_ = a; }
-  bool ready() const { return ready_; }
-  // Bind the layer with dealer instance by worker at runtime
-  void MakePaired(Layer* pair, int grp_id, Dealer* dealer,
-                  std::unordered_map<std::string, Layer*>* name2bridge);
-  // Send blobs to other workers due to model partitions
-  void SendBlobs(bool handle_data);
-  // Receive blobs from other workers due to model partitions;
-  void ReceiveBlobs(bool handle_data);
-
- protected:
-  //!< true if received grad from BridgeDstLayer
-  bool ready_ = false;
-  int group_id_ = 0;
-  Layer* pair_ = nullptr;
-  Dealer* dealer_ = nullptr;
-  std::unordered_map<std::string, Layer*>* name2bridge_ = nullptr;
-};
-
-/**
- * For sending data to layer on other threads which may resident on other nodes
- * due to layer/data partition.
- */
-class BridgeSrcLayer : public BridgeLayer {
- public:
-  void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-};
-
-/**
- * For recv data from layer on other threads which may resident on other nodes
- * due to layer/data partiton
- */
-class BridgeDstLayer : public BridgeLayer {
- public:
-  void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-};
-/**
- * Connect multiple (src) layers with a single (dst) layer.
- *
- * It concates feature Blobs (i.e., matrix) of src layers on one dimension.
- * The concated feature Blob will be fed into the dst layer.
- */
-class ConcateLayer : public ConnectionLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-
- private:
-  int num_concates_ = 0;
-  int concate_dim_ = 0;
-};
-
-/**
- * Connect a single (src) layer with multiple (dst) layers.
- *
- * It slices the feature Blob (i.e., matrix) of the src layer on one dimension.
- * The sliced feature Blobs will be fed into dst layers.
- */
-class SliceLayer : public ConnectionLayer {
- public:
-  ~SliceLayer();
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  const std::string ToString(bool debug, int flag) override;
-  const Blob<float>& data(const Layer* from) override;
-  const Blob<float>& grad(const Layer* from) override;
-  Blob<float>* mutable_data(const Layer* from) override;
-  Blob<float>* mutable_grad(const Layer* from) override;
-
- private:
-  int num_slices_ = 0;
-  int slice_dim_ = 0;
-  Layer2Index layer_idx_;
-};
-
-/**
- * Connect a single (src) layer with multiple dst layers.
- *
- * It replicates the feature Blob of the src layer.
- * Each replicated feature Blob will be fed into one dst layer.
- * It aggregates gradients set by all dst layers and set it to the src layer.
- */
-class SplitLayer : public ConnectionLayer {
- public:
-  ~SplitLayer();
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) override;
-  const std::string ToString(bool debug, int flag) override;
-  const Blob<float>& grad(const Layer* from) override;
-  Blob<float>* mutable_grad(const Layer* from) override;
-
- private:
-  int num_splits_ = 0;
-  Layer2Index layer_idx_;
-};
-
-/**
- * Dummy layer for RNN models, which provides input for other layers.
- *
- * Particularly, it is used in the test phase of RNN models to connect other
- * layers and avoid cycles in the neural net config.
- */
-class RNNDummyLayer : public ConnectionLayer {
- public:
-  void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override;
-  void ComputeFeature(int flag, const vector<Layer*>& srclayers) override;
-  void ComputeGradient(int flag, const vector<Layer*>& srclayers) {
-    LOG(FATAL) << "Not implemented";
-  }
-
-  const string srclayer(int step) const {
-    if (step > 0)
-      return dynamic_src_;
-    else
-      return "";
-  }
-
- private:
-  string dynamic_src_;
-  float low_, high_;
-  bool integer_;
-  Layer* srclayer_;
-};
-
-
-}  // namespace singa
-
-#endif  // SINGA_NEURALNET_CONNECTION_LAYER_H_