You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/06/03 07:48:20 UTC
[15/60] incubator-singa git commit: SINGA-163 - Reorganize the
project folder layout
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/gtest/gtest_main.cc
----------------------------------------------------------------------
diff --git a/include/gtest/gtest_main.cc b/include/gtest/gtest_main.cc
deleted file mode 100644
index f302822..0000000
--- a/include/gtest/gtest_main.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2006, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#include <stdio.h>
-
-#include "gtest/gtest.h"
-
-GTEST_API_ int main(int argc, char **argv) {
- printf("Running main() from gtest_main.cc\n");
- testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/cuda/cuda_reduce.cuh
----------------------------------------------------------------------
diff --git a/include/mshadow/cuda/cuda_reduce.cuh b/include/mshadow/cuda/cuda_reduce.cuh
deleted file mode 100644
index b7808a6..0000000
--- a/include/mshadow/cuda/cuda_reduce.cuh
+++ /dev/null
@@ -1,117 +0,0 @@
-#ifndef MSHADOW_CUDA_REDUCE_CUH
-#define MSHADOW_CUDA_REDUCE_CUH
-/*!
- * \file cuda_reduce.cuh
- * \brief helper functions to do reduction
- * \author Tianqi Chen
- */
-namespace mshadow{
- namespace cuda{
- /*
- * \brief reduce over the dimension x
- * \tparam Reducer reducer
- * \tparam x_bits dimension = 1<<x_bits
- */
- template<typename Reducer,int x_bits>
- inline __device__ void Reduce1D( volatile real_t buf[1<<x_bits] );
- /*
- * \brief reduce over the dimension x
- * \tparam Reducer reducer
- * \tparam xmax_bits maximum size of buffer
- * \param xsize size of x dimension, not sure if aligned
- */
- template<typename Reducer, int xmax_bits>
- inline __device__ void Reduce1DNotAlign( volatile real_t buf[1<<xmax_bits], int xsize );
- };
-};
-
-// ===============================================x===
-// implementations afterwards,
-// no need to read if only use the functions
-// --------------------------------------------------
-#ifdef __DEVICE_EMULATION__
-#define __MSHADOW_EMUSYNC__ __syncthreads()
-#else
-#define __MSHADOW_EMUSYNC__
-#endif
-
-namespace mshadow{
- namespace cuda{
- template<typename Reducer, int x_bits>
- inline __device__ void ReduceX( volatile real_t buf[], int tid ){
- if( x_bits >= 10 ){
- if( tid < 512 ) Reducer::Reduce( buf[tid] , buf[tid + 512] );
- __syncthreads();
- }
- if( x_bits >= 9 ){
- if( tid < 256 ) Reducer::Reduce( buf[tid] , buf[tid + 256] );
- __syncthreads();
- }
- if( x_bits >= 8 ){
- if( tid < 128 ) Reducer::Reduce( buf[tid] , buf[tid + 128] );
- __syncthreads();
- }
- if( x_bits >= 7 ){
- if( tid < 64 ) Reducer::Reduce( buf[tid] , buf[tid + 64 ] );
- __syncthreads();
- }
- if( x_bits >= 6 ){
- if( tid < 32 ) Reducer::Reduce( buf[tid] , buf[tid + 32] );
- __syncthreads();
- }
- // in warp optimization
- if( x_bits >= 5 ){
- if( tid < 16 ) Reducer::Reduce( buf[tid] , buf[tid + 16] );
- __MSHADOW_EMUSYNC__;
- }
- if( x_bits >= 4 ){
- if( tid < 8 ) Reducer::Reduce( buf[tid] , buf[tid + 8 ] );
- __MSHADOW_EMUSYNC__;
- }
- if( x_bits >= 3 ){
- if( tid < 4 ) Reducer::Reduce( buf[tid] , buf[tid + 4 ] );
- __MSHADOW_EMUSYNC__;
- }
- if( x_bits >= 2 ){
- if( tid < 2 ) Reducer::Reduce( buf[tid] , buf[tid + 2 ] );
- __MSHADOW_EMUSYNC__;
- }
- if( x_bits >= 1 ){
- if( tid < 1 ) Reducer::Reduce( buf[tid] , buf[tid + 1 ] );
- __MSHADOW_EMUSYNC__;
- }
- };
-
- template<typename Reducer,int x_bits>
- inline __device__ void Reduce1D( volatile real_t buf[1<<x_bits] ){
- ReduceX<Reducer,x_bits>( buf, threadIdx.x );
- }
-
- // reduce with a upper bound
- #define __RD_NON_ALIGN(els,x_bits) \
- els \
- if( xmax_bits >= x_bits && x_size >= (1 << x_bits) ){ \
- if( tid < (1 << x_bits) && tid + (1<<x_bits) < x_size ){ \
- Reducer::Reduce( buf[tid] , buf[tid + (1<<x_bits)] ); \
- } \
- __syncthreads(); \
- ReduceX<Reducer, x_bits>( buf, tid ); \
- } \
-
- template<typename Reducer, int xmax_bits>
- inline __device__ void Reduce1DNotAlign( volatile real_t buf[], int x_size ){
- int tid = threadIdx.x;
- __RD_NON_ALIGN(, 8)
- __RD_NON_ALIGN(else, 7)
- __RD_NON_ALIGN(else, 6)
- __RD_NON_ALIGN(else, 5)
- __RD_NON_ALIGN(else, 4)
- __RD_NON_ALIGN(else, 3)
- __RD_NON_ALIGN(else, 2)
- __RD_NON_ALIGN(else, 1)
- }
- };
-};
-
-#endif // MSHADOW_CUDA_REDUCE_CUH
-
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/cuda/tensor_gpu-inl.cuh
----------------------------------------------------------------------
diff --git a/include/mshadow/cuda/tensor_gpu-inl.cuh b/include/mshadow/cuda/tensor_gpu-inl.cuh
deleted file mode 100644
index 61e477c..0000000
--- a/include/mshadow/cuda/tensor_gpu-inl.cuh
+++ /dev/null
@@ -1,231 +0,0 @@
-#ifndef MSHADOW_TENSOR_GPU_INL_CUH
-#define MSHADOW_TENSOR_GPU_INL_CUH
-/*!
- * \file tensor_gpu-inl.cuh
- * \brief implementation of GPU code using CUDA
- * \author Bing Xu, Tianqi Chen
- */
-#include "../tensor.h"
-#include "cuda_reduce.cuh"
-
-namespace mshadow{
- namespace cuda{
- #ifndef __CUDA_ARCH__
- #warning "__CUDA_ARCH__ is not defined, I will assume compiling with CUDA verion greater than 2.0"
- #endif
- /* load unit for memory access */
- #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 200
- const int kMemUnitBits = 5;
- const int kMaxThreadsPerBlock = 1024;
- #else
- const int kMemUnitBits = 4;
- const int kMaxThreadsPerBlock = 512;
- #endif
- /*! \brief number of units that can do synchronized update, half warp size */
- const int kMemUnit = 1 << kMemUnitBits;
- /*! \brief mask that could be helpful sometime */
- const int kMemUnitMask = kMemUnit - 1;
- /*! \brief suggested thread number(logscale) for mapping kernel */
- const int kBaseThreadBits = 8;
- /*! \brief suggested thread number for mapping kernel */
- const int kBaseThreadNum = 1 << kBaseThreadBits;
- /*! \brief maximum value of grid */
- const int kMaxGridNum = 65535;
- /*! \brief suggested grid number for mapping kernel */
- const int kBaseGridNum = 1024;
-
- /*! \brief get align stride for given size in x dimension */
- inline index_t GetAlignStride( index_t xsize, index_t xstride ){
- if( (xstride & (kMemUnit-1)) == 0 ){
- return ( (xsize + kMemUnit - 1) >> kMemUnitBits) << kMemUnitBits;
- }else{
- // if originally space is not aligned, no necessary to to alligned thread allocation
- return xsize;
- }
- }
- inline void CheckLaunchParam( dim3 dimGrid, dim3 dimBlock, const char *estr = "" ){
- if( dimBlock.x*dimBlock.y*dimBlock.z > (unsigned)kMaxThreadsPerBlock ||
- dimGrid.x > 65535 || dimGrid.y > 65535 ){
- fprintf( stderr, "%s[%u,%u,%u]:", estr, dimBlock.x, dimBlock.y, dimBlock.z );
- utils::Error( "too large launch parameter\n");
- }
- }
- };
-
- namespace cuda {
- template<typename Saver, typename Plan, int block_dim_bits>
- __device__ void MapPlanProc( Tensor<gpu,2> dst, const index_t xstride, const Plan exp, int block_idx ){
- const index_t tid = (block_idx << block_dim_bits) + threadIdx.x;
- const int y = tid / xstride;
- const int x = tid % xstride;
- if (y < dst.shape[1] && x < dst.shape[0]) {
- Saver::Save(dst[y][x], exp.Eval(y,x));
- }
- }
- template<typename Saver, typename Plan, int block_dim_bits>
- __global__ void MapPlanKernel( Tensor<gpu,2> dst, const index_t xstride, const Plan exp ){
- MapPlanProc<Saver, Plan,block_dim_bits>( dst, xstride, exp, blockIdx.x );
- }
- template<typename Saver, typename Plan, int block_dim_bits, int grid_size>
- __global__ void MapPlanLargeKernel( Tensor<gpu,2> dst, const index_t xstride, const Plan exp, int repeat ){
- for( int i = 0; i < repeat; ++i ){
- MapPlanProc<Saver, Plan,block_dim_bits>( dst, xstride, exp, blockIdx.x + i*grid_size );
- }
- }
-
- template<typename Saver, typename E>
- inline void MapPlan( Tensor<gpu,2> dst, const expr::Plan<E> &plan ){
- const index_t xstride = GetAlignStride( dst.shape[0], dst.shape.stride_ );
- const int num_block = ( dst.shape[1]*xstride + kBaseThreadNum-1) / kBaseThreadNum;
- dim3 dimBlock(kBaseThreadNum, 1, 1);
-
- if (num_block < kMaxGridNum) {
- dim3 dimGrid(num_block, 1, 1);
- MapPlanKernel<Saver, expr::Plan<E>, kBaseThreadBits> \
- <<<dimGrid,dimBlock>>>(dst, xstride, plan);
- } else {
- int repeat = (num_block + kBaseGridNum-1) / kBaseGridNum;
- dim3 dimGrid( kBaseGridNum, 1 , 1 );
- MapPlanLargeKernel<Saver,expr::Plan<E>, kBaseThreadBits, kBaseGridNum> \
- <<<dimGrid,dimBlock>>>(dst, xstride, plan, repeat );
- }
- }
- }; // namespace cuda
-
- namespace cuda{
- template<typename Saver,typename Reducer, int warp_bits, typename Plan>
- __global__ void MapRedKeepLowestKernel( Tensor<gpu,1> dst, Plan plan, real_t scale, Shape<2> eshape ){
- const unsigned warp_size = 1 << warp_bits;
- const unsigned x = (blockIdx.x<<warp_bits) + threadIdx.x;
- // to avoid bank conflict
- __shared__ real_t s_res[ warp_size ][ warp_size + 1 ];
-
- // note: reverse store [y][x], so that we can reduce over threadIdx.x, use warp optimization
- if( threadIdx.y < eshape[1] && x < eshape[0] ){
- s_res[ threadIdx.x ][ threadIdx.y ] = plan.Eval( threadIdx.y, x );
- }
- for( unsigned y = warp_size; y < eshape[1]; y += warp_size ){
- if( threadIdx.y + y < eshape[1] && x < eshape[0] ){
- Reducer::Reduce( s_res[ threadIdx.x ][ threadIdx.y ], plan.Eval( threadIdx.y + y, x ) );
- }
- }
- __syncthreads();
- if( eshape[1] >= warp_size ){
- Reduce1D<Reducer,warp_bits>( s_res[ threadIdx.y ] );
- }else{
- Reduce1DNotAlign<Reducer,warp_bits>( s_res[ threadIdx.y ], eshape[1] );
- }
- __syncthreads();
-
- if( threadIdx.y == 0 && x < eshape[0] ){
- Saver::Save( dst[x], s_res[ threadIdx.x ][ 0 ] * scale );
- }
- }
-
- template<typename Saver, typename Reducer, typename E>
- inline void MapReduceKeepLowest( Tensor<gpu,1> dst, const expr::Plan<E> &plan, real_t scale, Shape<2> eshape ){
- dim3 dimBlock( kMemUnit, kMemUnit );
- dim3 dimGrid ( (eshape[0]+kMemUnit-1) >> kMemUnitBits );
- CheckLaunchParam( dimGrid, dimBlock, "MapRedKeepLowestKernel" );
- MapRedKeepLowestKernel<Saver,Reducer,kMemUnitBits><<<dimGrid,dimBlock>>>( dst, plan, scale, eshape );
- }
- }; // namespace cuda
-
- namespace cuda{
- template<typename Saver,typename Reducer, int block_dim_bits, typename Plan>
- __global__ void MapReduceKeepDim2Kernel( Tensor<gpu,1> dst, Plan plan, real_t scale, Shape<4> pshape ){
- const int block_size = 1 << block_dim_bits;
- __shared__ real_t s_rec[ block_size ];
- const int c = blockIdx.x;
- const index_t tot = pshape[0]*pshape[1]*pshape[3];
-
- real_t res = Reducer::kInitV;
- for( index_t i_offset = 0; i_offset < tot; i_offset += block_size ){
- index_t i = i_offset + threadIdx.x;
- if( i< tot ){
- const index_t x = i % pshape[0];
- i /= pshape[0];
- const index_t y = i % pshape[1];
- const index_t n = i / pshape[1];
- Reducer::Reduce( res, plan.Eval( (n*pshape[2] + c) * pshape[1] + y, x ) );
- }
- }
- s_rec[ threadIdx.x ] = res;
- __syncthreads();
- Reduce1D<Reducer,block_dim_bits>( s_rec );
- if( threadIdx.x == 0 ){
- Saver::Save( dst[c], s_rec[0]*scale );
- }
- }
-
- template<typename Saver, typename Reducer, typename Plan>
- inline void MapReduceKeepDim2( Tensor<gpu,1> dst, const Plan &plan, real_t scale, Shape<4> pshape ){
- dim3 dimBlock( kBaseThreadNum );
- dim3 dimGrid ( dst.shape[0] );
- CheckLaunchParam( dimGrid, dimBlock, "MapReduceKeepDim2" );
- MapReduceKeepDim2Kernel<Saver,Reducer,kBaseThreadBits>
- <<<dimGrid,dimBlock>>>( dst, plan, scale, pshape );
- }
- };
-
- namespace cuda{
- template<int x_bits>
- __global__ void SoftmaxKernel( Tensor<gpu,2> dst, Tensor<gpu,2> src ){
- const unsigned x_size = 1 << x_bits;
- const int y = blockIdx.x;
- __shared__ real_t s_rec[ x_size ];
-
- // step 1: get max
- if( threadIdx.x < dst.shape[ 0 ] ){
- s_rec[ threadIdx.x ] = src[ y ][ threadIdx.x ] ;
- }
- for( unsigned x = x_size; x < dst.shape[0]; x += x_size ){
- if( x + threadIdx.x < dst.shape[0] ){
- real_t a = src[ y ][ x + threadIdx.x ];
- s_rec[ threadIdx.x ] = max( a, s_rec[ threadIdx.x] );
- }
- }
- __syncthreads();
- if( threadIdx.x >= dst.shape[0] ){
- s_rec[ threadIdx.x ] = s_rec[0];
- }
- __syncthreads();
- Reduce1D<red::maximum,x_bits>( s_rec );
- __syncthreads();
- real_t smax = s_rec[0];
- __syncthreads();
- s_rec[ threadIdx.x ] = 0.0f;
- __syncthreads();
-
- // calculate normalizer, with writeback
- for( unsigned x = 0; x < dst.shape[0]; x += x_size ){
- if( x + threadIdx.x < dst.shape[0] ){
- real_t p = expf( src[ y ][ x + threadIdx.x ] - smax );
- s_rec[ threadIdx.x ] += p;
- // write back first, will fetch later
- dst[ y ][ x + threadIdx.x ] = p;
- }
- }
- // calculate normalizer
- __syncthreads();
- Reduce1D<red::sum,x_bits>( s_rec );
- __syncthreads();
- real_t ssum = s_rec[0];
-
- for( unsigned x = 0; x < dst.shape[0]; x += x_size ){
- if( x + threadIdx.x < dst.shape[0] ){
- dst[ y ][ x + threadIdx.x ] /= ssum;
- }
- }
- }
-
- inline void Softmax( Tensor<gpu,2> &dst, const Tensor<gpu,2> &src ){
- dim3 dimBlock( kBaseThreadNum );
- dim3 dimGrid ( dst.shape[1] );
- utils::Assert( dst.shape == src.shape, "Softmax: shape mismatch" );
- CheckLaunchParam( dimGrid, dimBlock, "Softmax" );
- SoftmaxKernel<kBaseThreadBits><<<dimGrid,dimBlock>>>( dst, src );
- }
- }; // namespace cuda
-}; // namespace mshadow
-#endif // TENSOR_GPU_INL_H
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/cxxnet_op.h
----------------------------------------------------------------------
diff --git a/include/mshadow/cxxnet_op.h b/include/mshadow/cxxnet_op.h
deleted file mode 100644
index 1422070..0000000
--- a/include/mshadow/cxxnet_op.h
+++ /dev/null
@@ -1,127 +0,0 @@
-#ifndef CXXNET_OP_H
-#define CXXNET_OP_H
-#pragma once
-/*!
- * \file cxxnet_op.h
- * \brief extra mshadow operation for cxxnet
- * \author Bing Xu
- */
-#include "mshadow/tensor.h"
-
-namespace mshadow {
- /*! \brief operations for algorithm */
- namespace op {
- struct sigmoid {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return 1.0f / (1.0f + expf(-a));
- }
- };
- struct sigmoid_grad {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return a * ( 1.0f - a );
- }
- };
-
- /*! \brief Rectified Linear Operation */
- struct relu {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- using namespace std;
- return max( a, 0.0f );
- }
- };
- struct relu_grad {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return a > 0.0f ? 1.0f : 0.0f;
- }
- };
-
- struct tanh {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return tanhf( a );
- }
- };
- struct tanh_grad {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return 1.0f - a * a;
- }
- };
- struct softplus {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return logf(1 + expf(a));
- }
- };
- struct softplus_grad {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return 1.0f / (1.0f + expf(-a));
- }
- };
- struct bnll {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return a > 0.0f ? a + logf(1.0f + expf(-a)) : logf(1.0f + expf(a));
- }
- };
- struct bnll_grad {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- real_t expval = a > 50.0f ? 50.0f : a; // kBNLL_THRESHOLD = 50.0f
- expval = expf(-expval);
- return 1.0f / (1.0f + expval);
- }
- };
-
- struct square {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return a * a;
- }
- };
- /*! \brief scaled tanh, hard code the scale factor*/
- struct stanh {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return 1.7159047*tanhf(0.66666667 *a);
- }
- };
- /*! \breif back prop for scaled tanh: */
- struct stanh_grad {
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return 0.66666667*1.7159047 -0.66666667/1.7159047*a*a;
- }
- };
-
- struct abs{
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return a < 0 ? -a : a;
- }
- };
-
- }; //namespace op
-
-}; //namespace mshadow
-
-namespace mshadow {
- namespace op {
- /*! \brief used for generate Bernoulli mask */
- struct threshold {
- MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
- return a < b ? 1.0f : 0.0f;
- }
- };
-
- /*! \brief used for generate element of power */
- struct power {
- MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
- return powf( a, b );
- }
- };
- struct sqrtop {
- MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
- return sqrt(a+b);
- }
- };
- struct max {
- MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
- return a > b ? a : b;
- }
- };
- }; // namespace op
-}; // namespace mshadow
-
-#endif // CXXNET_OP_H
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor.h
----------------------------------------------------------------------
diff --git a/include/mshadow/tensor.h b/include/mshadow/tensor.h
deleted file mode 100644
index 42d13d3..0000000
--- a/include/mshadow/tensor.h
+++ /dev/null
@@ -1,472 +0,0 @@
-#ifndef MSHADOW_TENSOR_H
-#define MSHADOW_TENSOR_H
-/*!
- * \file tensor.h
- * \brief header file of tensor data structure and functions
- * covention: this lib requires explicit memory allocation and de-allocation
- * all the data structure Tensor<cpu,1>, Tensor<gpu,1> are like handles(pointers),
- * no memory allocation is happening during calculation
- * \author Bing Xu, Tianqi Chen
- */
-#include "tensor_base.h"
-#include "tensor_expr.h"
-
-namespace mshadow {
- /*!
- * \brief shape of a tensor
- * IMPORTANT NOTE: this shape is different from numpy.shape
- * shape[0] gives the lowest dimension, shape[dimension-1] gives the highest dimension
- * shape[k] corresponds to k-th dimension of tensor
- * \tparam dimension dimension of tensor
- */
- template<int dimension>
- struct Shape {
- public:
- /*! \brief maximum dimension of tensor */
- const static int kMaxShape = dimension;
- /*! \brief maximum dimension minus 1 */
- const static int kSubShape = dimension - 1;
- public:
- /*! \brief default constructor, do nothing */
- MSHADOW_XINLINE Shape(void) {}
- /*! \brief constuctor */
- MSHADOW_XINLINE Shape( const Shape<dimension> &s ){
- #pragma unroll
- for( int i = 0; i < kMaxShape; ++i ){
- this->shape_[i] = s[i];
- }
- this->stride_ = s.stride_;
- }
- /*!
- * \brief get corresponding index
- * \param idx dimension index
- * \return the corresponding dimension size
- */
- MSHADOW_XINLINE index_t& operator[](index_t idx) {
- return shape_[ idx ];
- }
- /*!
- * \brief get corresponding index
- * \param idx dimension index
- * \return the corresponding dimension size
- */
- MSHADOW_XINLINE const index_t& operator[](index_t idx) const {
- return shape_[ idx ];
- }
- /*! \return whether two shape equals */
- MSHADOW_XINLINE bool operator==(const Shape<kMaxShape> &s) const {
- #pragma unroll
- for ( int i = 0; i < kMaxShape; ++i ) {
- if (s.shape_[i] != this->shape_[i]) return false;
- }
- return true;
- }
- /*!
- * flatten the higher dimension to second dimension, return a 2D shape
- * \return the flat 2d shape
- */
- MSHADOW_XINLINE Shape<2> FlatTo2D(void) const {
- Shape<2> s;
- s.stride_ = this->stride_;
- s.shape_[ 0 ] = this->shape_[ 0 ];
- index_t ymax = 1;
-
- #pragma unroll
- for (int i = 1; i < kMaxShape; ++i) {
- ymax *= this->shape_[ i ];
- }
- s.shape_[1] = ymax;
- return s;
- }
- /*! \return number of valid elements */
- MSHADOW_XINLINE size_t Size(void) const{
- size_t memsz = this->shape_[ 0 ];
- #pragma unroll
- for (int i = 1; i < kMaxShape; ++i) {
- memsz *= this->shape_[ i ];
- }
- return memsz;
- }
- /*! \return memory size, including the aligned x dimension */
- MSHADOW_XINLINE size_t MSize(void) const {
- size_t memsz = this->stride_;
- #pragma unroll
- for (int i = 1; i < kMaxShape; ++i) {
- memsz *= this->shape_[ i ];
- }
- return memsz;
- }
- /*!
- * \return product shape in [dimstart,dimend)
- * \param dimstart start dimension
- * \param dimend end dimension
- */
- MSHADOW_XINLINE index_t ProdShape( int dimstart, int dimend ) const{
- index_t num = 1;
- #pragma unroll
- for (int i = dimstart; i < dimend; ++i) {
- num *= this->shape_[ i ];
- }
- return num;
- }
- /*!
- * \brief get subshape
- * \return subshape
- */
- MSHADOW_XINLINE Shape<kSubShape> SubShape(void) const {
- Shape<kSubShape> s;
- s.stride_ = this->stride_;
- // for cuda
- #pragma unroll
- for (int i = 0; i < kSubShape; ++i) {
- s.shape_[ i ] = this->shape_[ i ];
- }
- return s;
- }
-
- public:
- /*! \brief storing the dimension information */
- index_t shape_[ kMaxShape ];
- /*!
- * \brief storing the stride information in x dimension
- * this is used to deal with pitch allocation in gpu or sse(align x dimension to 64bit) for efficiency
- */
- index_t stride_;
- };
- // useful construction functions to generate shape
- /*!
- * \brief construct a one dimension shape, stride will equal s0
- * \param s0 size of dimension 0
- * \return the shape construction
- */
- MSHADOW_XINLINE Shape<1> Shape1( index_t s0 ){
- Shape<1> s; s[0] = s0; s.stride_ = s0;
- return s;
- }
- /*!
- * \brief construct a two dimension shape, stride will equal s0
- * \param s1 size of dimension 1
- * \param s0 size of dimension 0
- * \return the shape construction
- */
- MSHADOW_XINLINE Shape<2> Shape2( index_t s1, index_t s0 ){
- Shape<2> s; s[0] = s0; s[1] = s1; s.stride_ = s0;
- return s;
- }
- /*!
- * \brief construct a three dimension shape, stride will equal s0
- * \param s2 size of dimension 2
- * \param s1 size of dimension 1
- * \param s0 size of dimension 0
- * \return the shape construction
- */
- MSHADOW_XINLINE Shape<3> Shape3( index_t s2, index_t s1, index_t s0 ){
- Shape<3> s;
- s[0] = s0; s[1] = s1; s[2] = s2; s.stride_ = s0;
- return s;
- }
- /*!
- * \brief construct a four dimension shape, stride will equal s0
- * \param s3 size of dimension 3
- * \param s2 size of dimension 2
- * \param s1 size of dimension 1
- * \param s0 size of dimension 0
- * \return the shape construction
- */
- MSHADOW_XINLINE Shape<4> Shape4( index_t s3, index_t s2, index_t s1, index_t s0 ){
- Shape<4> s;
- s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; s.stride_ = s0;
- return s;
- }
-}; // namespace mshadow
-
-namespace mshadow {
- /*! \brief device name CPU */
- struct cpu {
- /*! \brief whether this device is CPU or not */
- const static bool kDevCPU = true;
- /*! \brief device flag number, identifies this device */
- const static int kDevMask = 1<<0;
- };
- /*! \brief device name CPU */
- struct gpu {
- /*! \brief whether this device is CPU or not */
- const static bool kDevCPU = false;
- /*! \brief device flag number, identifies this device */
- const static int kDevMask = 1<<1;
- };
-
- // more compact template
- /*!
- * \brief general tensor
- * \tparam Device which device the tensor is on
- * \tparam dimension dimension of the tensor
- */
- template<typename Device, int dimension>
- struct Tensor: public expr::ContainerExp< Tensor<Device,dimension> >{
- public:
- /*! \brief whether current type lies in cpu */
- const static bool kDevCPU = Device::kDevCPU;
- /*! \brief dimension of subtype */
- const static int kSubdim = dimension - 1;
-
- public:
- /*! \brief pointer to the data */
- real_t *dptr;
- /*! \brief shape of the tensor */
- Shape<dimension> shape;
- public:
- /*! \brief default constructor */
- MSHADOW_XINLINE Tensor(void) {}
- /*! \brief constructor from shape */
- MSHADOW_XINLINE Tensor(const Shape<dimension> &shape): shape(shape) {}
- /*! \brief constructor from data pointer and shape */
- MSHADOW_XINLINE Tensor(real_t *dptr, const Shape<dimension> &shape): dptr((real_t*)dptr), shape(shape) {}
- /*!
- * \brief flatten the tensor to 2 dimension, collapse the higher dimensions together
- * \return tensor after flatten
- */
- MSHADOW_XINLINE Tensor<Device, 2> FlatTo2D(void) const {
- return Tensor<Device, 2>(reinterpret_cast<real_t*> \
- (dptr), shape.FlatTo2D());
- }
- /*!
- * \brief get a element of dimension - 1
- * \param idx index
- * \return the result tensor
- */
- MSHADOW_XINLINE Tensor<Device, kSubdim> operator[](index_t idx) const {
- Shape<kSubdim> s = shape.SubShape();
- return Tensor<Device, kSubdim>(reinterpret_cast<real_t*> \
- (dptr) + s.MSize() * idx, s);
- }
- /*!
- * \brief slice the tensor in highest dimension [begin,end)
- * \param begin begin position of slice
- * \param end end position of slice
- * \return tensor after slice
- */
- MSHADOW_XINLINE Tensor<Device, dimension> Slice(index_t begin, index_t end) const {
- Shape<dimension> s = this->shape;
- s[ dimension - 1 ] = end - begin;
- return Tensor<Device, dimension>(reinterpret_cast<real_t*>\
- (dptr) + s.SubShape().MSize() * begin, s);
- }
- public:
- /*!\brief functions to fit expression template */
- inline Tensor<Device,dimension>& operator=( real_t s ){
- return this->__assign( s );
- }
- /*!\brief functions to fit expression template */
- template<typename E>
- inline Tensor<Device,dimension>& operator=( const expr::Exp<E,expr::type::kMapper> &exp ){
- return this->__assign( exp );
- }
- /*!\brief functions to fit expression template */
- template<typename E>
- inline Tensor<Device,dimension>& operator=( const expr::Exp<E,expr::type::kComplex> &exp ){
- return this->__assign( exp );
- }
- };
-
- /*
- * respecialized class Tensor1D,thei is due to different implementation in operator[]
- */
- template<typename Device>
- struct Tensor<Device,1>: public expr::ContainerExp< Tensor<Device,1> >{
- public:
- real_t *dptr;
- Shape<1> shape;
- public:
- MSHADOW_XINLINE Tensor(void) {}
- MSHADOW_XINLINE Tensor(const Shape<1> &shape): shape(shape) {}
- MSHADOW_XINLINE Tensor(real_t *dptr, Shape<1> shape) :dptr(dptr), shape(shape) {}
-
- MSHADOW_XINLINE Tensor<Device, 2> FlatTo2D(void) const {
- return Tensor<Device, 2>(reinterpret_cast<real_t*> \
- (dptr), shape.FlatTo2D());
- }
- MSHADOW_XINLINE Tensor<Device, 1> Slice(index_t begin, index_t end) const {
- Shape<1> s;
- s[0] = s.stride_ = end - begin;
- return Tensor<Device, 1>(reinterpret_cast<real_t*> \
- (dptr) + begin, s);
- }
- MSHADOW_XINLINE real_t &operator[](index_t idx) { return dptr[ idx ]; }
- MSHADOW_XINLINE const real_t &operator[](index_t idx)const { return dptr[ idx ]; }
- public:
- // functions to fit expression template
- inline Tensor<Device,1>& operator=( double s ){
- return this->__assign( s );
- }
- template<typename E>
- inline Tensor<Device,1>& operator=( const expr::Exp<E,expr::type::kMapper> &exp ){
- return this->__assign( exp );
- }
- template<typename E>
- inline Tensor<Device,1>& operator=( const expr::Exp<E,expr::type::kComplex> &exp ){
- return this->__assign( exp );
- }
- };
-}; // namespace mshadow
-
-// add unroll loops for the shape
-namespace mshadow {
- // function declarations
- /*!
- * \brief initialize tensor engine, used to call intialization functions of dependent libs
- * this function should be called before all GPU tensor operations,
- * for using tensors in CPU, this call is actually not needed
- * \param device_id GPU device id to be choosed
- */
- inline void InitTensorEngine( int device_id=0 );
- /*!
- * \brief Shutdown tensor engine,
- * this function should be called after all GPU tensor operations,
- * for using tensors in CPU, this call is actually not needed
- */
- inline void ShutdownTensorEngine( void );
-
- /*!
- * \brief CPU/CPU: allocate space for CTensor, according to the shape in the obj
- * this function is responsible to set the stride_ in each obj.shape
- * \tparam dim specify the dim of tensor
- * \param obj the tensor object, with shape specified
- * \param pad whether padding dimension 0, to make last dimension aligned,
- * padding may help improve efficiency of matrix multiplications
- * if true, will allocate space with stride_ that may not equals shape[0]
- * if false, will allocate continuous space
- */
- template<int dim>
- inline void AllocSpace(Tensor<cpu,dim> &obj, bool pad = MSHADOW_ALLOC_PAD);
- /*! \brief refer to comment of cpu ver \sa AllocSpace */
- template<int dim>
- inline void AllocSpace(Tensor<gpu,dim> &obj, bool pad = MSHADOW_ALLOC_PAD);
-
- /*!
- * \brief CPU/GPU: free the space of tensor, will set obj.dptr to NULL
- * \tparam dim specify the dim of tensor
- * \param obj the tensor object
- */
- template<int dim>
- inline void FreeSpace(Tensor<cpu,dim> &obj);
- /*! \brief refer to comment of cpu ver \sa FreeSpace */
- template<int dim>
- inline void FreeSpace(Tensor<gpu,dim> &obj);
-
- /*!
- * \brief CPU/GPU: short cut to allocate and initialize a Tensor
- * \tparam Device device of tensor
- * \tparam dim dimention of tensor
- * \param shape: shape of tensor
- * \param initv: initialization value
- * \param pad : padding option
- * \sa AllocSpace
- */
- template<typename Device, int dim>
- inline Tensor<Device,dim> NewTensor(const Shape<dim> &shape, real_t initv, bool pad = MSHADOW_ALLOC_PAD);
-
- /*!
- * \brief copy data from one tensor to another, with same shape
- * \tparam dim specify the dim of tensor
- * \param dst target tensor
- * \param src source tensor
- */
- template<int dim>
- inline void Copy(Tensor<cpu,dim> dst, const Tensor<cpu,dim> &src );
- /*! \brief refer to comment of cpu ver \sa Copy */
- template<int dim>
- inline void Copy(Tensor<cpu,dim> dst, const Tensor<gpu,dim> &src );
- /*! \brief refer to comment of cpu ver \sa Copy */
- template<int dim>
- inline void Copy(Tensor<gpu,dim> dst, const Tensor<cpu,dim> &src );
- /*! \brief refer to comment of cpu ver \sa Copy */
- template<int dim>
- inline void Copy(Tensor<gpu,dim> dst, const Tensor<gpu,dim> &src );
-
-
- /*!
- * \brief CPU/GPU: normalize softmax: dst[i][j] = exp( energy[i][j] ) /( sum_j exp( energy[i][j] ) )
- * \param dst destination
- * \param energy input energy
- */
- inline void Softmax( Tensor<cpu,2> dst, const Tensor<cpu,2> &energy );
- /*! \brief refer to comment of cpu ver \sa Softmax */
- inline void Softmax( Tensor<gpu,2> dst, const Tensor<gpu,2> &energy );
-
-}; // namespace mshadow
-
-
-namespace mshadow{
- // function declarations to support expression, no need to understand them
- // these functions do not need to be directly used
-
- /*!
- * \brief CPU/GPU: map a expression to a tensor, this function calls MapPlan
- * \tparam Saver specify storage method
- * \tparam dim dim of the tensor, during usage, there is no need to specify this parameter
- * \tparam E specifies the expression type, not need to specify this parameter during usage
- * \tparam etype expression type
- * \param dst destination
- * \param exp expression
- * \sa namespace mshadow:sv, mshadow::op, mshadow::expr
- */
- template<typename Saver, int dim, typename E, int etype>
- inline void MapExp(Tensor<cpu,dim> dst, const expr::Exp<E,etype> &exp );
- /*! \brief refer to comment of cpu ver \sa MapExp */
- template<typename Saver, int dim, typename E, int etype>
- inline void MapExp(Tensor<gpu,dim> dst, const expr::Exp<E,etype> &exp );
-
- /*!
- * \brief CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0)
- * \tparam Saver specify storage method
- * \tparam Reducer specify a reducer method
- * \tparam E specifies the expression type, not need to specify this parameter during usage
- * \tparam etype expression type
- * \param dst destination
- * \param exp expression
- * \param scale scale the result before save
- * \sa namespace mshadow:sv, mshadow::op, mshadow::red, mshadow::expr
- */
- template<typename Saver, typename Reducer, typename E, int etype>
- inline void MapReduceKeepLowest( Tensor<cpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale = 1.0f );
- /*! \brief refer to comment of cpu ver \sa MapReduceKeepLowest */
- template<typename Saver, typename Reducer, typename E, int etype>
- inline void MapReduceKeepLowest( Tensor<gpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale = 1.0f );
-
-
- /*!
- * \brief CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2)
- * \tparam Saver specify storage method
- * \tparam Reducer specify a reducer method
- * \tparam E specifies the expression type, not need to specify this parameter during usage
- * \tparam dimkeep the target dimension to be kept, should be larger than 0, for 0, use MapReduceKeepLowest
- * \tparam etype expression type
- * \param dst destination
- * \param exp expression
- * \param scale scale the result before save
- * \sa namespace mshadow:sv, mshadow::op, mshadow::red, mshadow::expr
- */
- template<typename Saver, typename Reducer, int dimkeep, typename E, int etype>
- inline void MapReduceKeepHighDim( Tensor<cpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale = 1.0f );
- /*! \brief refer to comment of cpu ver \sa MapReduceKeepHighDim */
- template<typename Saver, typename Reducer, int dimkeep, typename E, int etype>
- inline void MapReduceKeepHighDim( Tensor<gpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale = 1.0f );
-
-};// namespace mshadow
-
-// execution implementation of expression evaluations
-#include "tensor_expr_engine-inl.hpp"
-// cpu implementation of functions
-#include "tensor_cpu-inl.hpp"
-// gpu implementation of functions
-#include "tensor_gpu-inl.hpp"
-// extension of expressions
-#include "tensor_expr_ext.h"
-// io
-#include "tensor_io.h"
-// container
-#include "tensor_container.h"
-// random number generator
-#include "tensor_random.h"
-#endif // TENSOR_H
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor_base.h
----------------------------------------------------------------------
diff --git a/include/mshadow/tensor_base.h b/include/mshadow/tensor_base.h
deleted file mode 100644
index b251cba..0000000
--- a/include/mshadow/tensor_base.h
+++ /dev/null
@@ -1,298 +0,0 @@
-#ifndef MSHADOW_TENSOR_BASE_H
-#define MSHADOW_TENSOR_BASE_H
-/*!
- * \file tensor_base.h
- * \brief definitions of base types, macros functions
- *
- * \author Bing Xu, Tianqi Chen
- */
-#include <cmath>
-#include <cstdio>
-#include <cfloat>
-#include <climits>
-#include <algorithm>
-// macro defintiions
-
-/*!\brief if this macro is define to be 1, mshadow should compile without any of other libs */
-#ifndef MSHADOW_STAND_ALONE
- #define MSHADOW_STAND_ALONE 0
-#endif
-
-/*! \brief whether do padding during allocation */
-#ifndef MSHADOW_ALLOC_PAD
- #define MSHADOW_ALLOC_PAD true
-#endif
-
-/*!
- * \brief x dimension of data must be bigger pad_size * ratio to be alloced padded memory, otherwise use tide allocation
- * for example, if pad_ratio=2, GPU memory alignement size is 32, then we will only allocate padded memory if x dimension > 64
- * set it to 0 then we will always allocate padded memory
- */
-#ifndef MSHADOW_MIN_PAD_RATIO
- #define MSHADOW_MIN_PAD_RATIO 2
-#endif
-
-#if MSHADOW_STAND_ALONE
- #define MSHADOW_USE_CBLAS 0
- #define MSHADOW_USE_MKL 0
- #define MSHADOW_USE_CUDA 0
-#endif
-
-/*! \brief use CBLAS for CBLAS */
-#ifndef MSHADOW_USE_CBLAS
- #define MSHADOW_USE_CBLAS 0
-#endif
-/*! \brief use MKL for BLAS */
-#ifndef MSHADOW_USE_MKL
- #define MSHADOW_USE_MKL 1
-#endif
-/*! \brief use CUDA support, must ensure that the cuda include path is correct, or directly compile using nvcc */
-#ifndef MSHADOW_USE_CUDA
- #define MSHADOW_USE_CUDA 1
-#endif
-/*! \brief use single precition float */
-#ifndef MSHADOW_SINGLE_PRECISION
- #define MSHADOW_SINGLE_PRECISION 1
-#endif
-/*! \brief whether use SSE */
-#ifndef MSHADOW_USE_SSE
- #define MSHADOW_USE_SSE 1
-#endif
-/*! \brief whether use NVML to get dynamic info */
-#ifndef MSHADOW_USE_NVML
- #define MSHADOW_USE_NVML 0
-#endif
-// SSE is conflict with cudacc
-#ifdef __CUDACC__
- #undef MSHADOW_USE_SSE
- #define MSHADOW_USE_SSE 0
-#endif
-
-#if MSHADOW_USE_CBLAS
-extern "C"{
- #include <cblas.h>
-}
-#elif MSHADOW_USE_MKL
- #include <mkl.h>
- #include <mkl_cblas.h>
- #include <mkl_vsl.h>
- #include <mkl_vsl_functions.h>
-#endif
-
-#if MSHADOW_USE_CUDA
- #include <cublas.h>
- #include <curand.h>
-#endif
-
-#if MSHADOW_USE_NVML
- #include <nvml.h>
-#endif
-// --------------------------------
-// MSHADOW_XINLINE is used for inlining template code for both CUDA and CPU code.
-#ifdef MSHADOW_XINLINE
- #error "MSHADOW_XINLINE must not be defined"
-#endif
-#ifdef __CUDACC__
- #define MSHADOW_XINLINE inline __attribute__((always_inline)) __device__ __host__
-#else
- #define MSHADOW_XINLINE inline __attribute__((always_inline))
-#endif
-/*! \brief cpu force inline */
-#define MSHADOW_CINLINE inline __attribute__((always_inline))
-
-#if defined(__GXX_EXPERIMENTAL_CXX0X) || defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L
- #define MSHADOW_CONSTEXPR constexpr
-#else
- #define MSHADOW_CONSTEXPR const
-#endif
-
-/*! \brief namespace for mshadow */
-namespace mshadow {
- /*! \brief buffer size for each random number generator */
- const unsigned kRandBufferSize = 1000000;
- /*! \brief pi */
- const float kPi = 3.1415926f;
-
-#if MSHADOW_SINGLE_PRECISION
- /*! \brief type that will be used for content */
- typedef float real_t;
-#else
- typedef double real_t;
-#endif
- /*! \brief type that will be used for index */
- typedef unsigned index_t;
-}; // namespace mshadow
-
-namespace mshadow {
- /*! \brief namespace for operators */
- namespace op {
- // binary operator
- /*! \brief mul operator */
- struct mul{
- /*! \brief map a, b to result using defined operation */
- MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
- return a * b;
- }
- };
- /*! \brief plus operator */
- struct plus {
- /*! \brief map a, b to result using defined operation */
- MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
- return a + b;
- }
- };
- /*! \brief minus operator */
- struct minus {
- /*! \brief map a, b to result using defined operation */
- MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
- return a - b;
- }
- };
- /*! \brief divide operator */
- struct div {
- /*! \brief map a, b to result using defined operation */
- MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
- return a / b;
- }
- };
- /*! \brief get rhs */
- struct right {
- /*! \brief map a, b to result using defined operation */
- MSHADOW_XINLINE static real_t Map(real_t a, real_t b) {
- return b;
- }
- };
- }; // namespace op
-
- /*! \brief namespace for savers */
- namespace sv {
- /*! \brief save to saver: = */
- struct saveto {
- /*! \brief save b to a using save method */
- MSHADOW_XINLINE static void Save(real_t& a, real_t b) {
- a = b;
- }
- /*! \brief helper constant to use BLAS, alpha */
- MSHADOW_CONSTEXPR static real_t kAlphaBLAS = 1.0f;
- /*! \brief helper constant to use BLAS, beta */
- MSHADOW_CONSTEXPR static real_t kBetaBLAS = 0.0f;
- /*! \brief corresponding binary operator type */
- typedef op::right OPType;
- };
- /*! \brief save to saver: += */
- struct plusto {
- /*! \brief save b to a using save method */
- MSHADOW_XINLINE static void Save(real_t& a, real_t b) {
- a += b;
- }
- /*! \brief helper constant to use BLAS, alpha */
- MSHADOW_CONSTEXPR static real_t kAlphaBLAS = 1.0f;
- /*! \brief helper constant to use BLAS, beta */
- MSHADOW_CONSTEXPR static real_t kBetaBLAS = 1.0f;
- /*! \brief corresponding binary operator type */
- typedef op::plus OPType;
- };
- /*! \brief minus to saver: -= */
- struct minusto {
- /*! \brief save b to a using save method */
- MSHADOW_XINLINE static void Save(real_t& a, real_t b) {
- a -= b;
- }
- /*! \brief helper constant to use BLAS, alpha */
- MSHADOW_CONSTEXPR static real_t kAlphaBLAS = -1.0f;
- /*! \brief helper constant to use BLAS, beta */
- MSHADOW_CONSTEXPR static real_t kBetaBLAS = 1.0f;
- /*! \brief corresponding binary operator type */
- typedef op::minus OPType;
- };
- /*! \brief multiply to saver: *= */
- struct multo {
- /*! \brief save b to a using save method */
- MSHADOW_XINLINE static void Save(real_t& a, real_t b) {
- a *= b;
- }
- /*! \brief corresponding binary operator type */
- typedef op::mul OPType;
- };
- /*! \brief divide to saver: /= */
- struct divto {
- /*! \brief save b to a using save method */
- MSHADOW_XINLINE static void Save(real_t& a, real_t b) {
- a /= b;
- }
- /*! \brief corresponding binary operator type */
- typedef op::div OPType;
- };
- }; // namespace sv
-
-
- namespace op {
- // unary operator/ function: example
- // these operators can be defined by user, in the same style as binary and unary operator
- // to use, simply write F<op::identity>( src )
- /*! \brief identity function that maps a real number to it self */
- struct identity{
- /*! \brief map a to result using defined operation */
- MSHADOW_XINLINE static real_t Map(real_t a) {
- return a;
- }
- };
- }; // namespace op
-
- /*! \brief namespace for potential reducer operations */
- namespace red {
- /*! \brief sum reducer */
- struct sum {
- /*! \brief do reduction into dst */
- MSHADOW_XINLINE static void Reduce( volatile real_t& dst, volatile real_t src ) {
- dst += src;
- }
- /*! \brief calculate gradient of redres with respect to redsrc, redres: reduced result, redsrc: one of reduction element */
- MSHADOW_XINLINE static real_t PartialGrad( real_t redres, real_t redsrc ) {
- return 1.0f;
- }
- /*! \brief an intial value of reducer */
- MSHADOW_CONSTEXPR static real_t kInitV = 0.0f;
- };
- /*! \brief maximum reducer */
- struct maximum {
- /*! \brief do reduction into dst */
- MSHADOW_XINLINE static void Reduce( volatile real_t& dst, volatile real_t src ) {
- using namespace std;
- dst = max( dst, src );
- }
- /*! \brief calculate gradient of redres with respect to redsrc, redres: reduced result, redsrc: one of reduction element */
- MSHADOW_XINLINE static real_t PartialGrad( real_t redres, real_t redsrc ) {
- return redres == redsrc ? 1.0f: 0.0f;
- }
- /*! \brief an intial value of reducer */
-#if MSHADOW_SINGLE_PRECISION
- MSHADOW_CONSTEXPR static real_t kInitV = -FLT_MAX;
-#else
- MSHADOW_CONSTEXPR static real_t kInitV = -DBL_MAX;
-#endif
- };
- };
-
- /*! \brief namespace for helper utils of the project */
- namespace utils{
- /*! \brief send error message then exit */
- inline void Error( const char *msg ){
- fprintf( stderr, "Error:%s\n",msg );
- exit( -1 );
- }
- /*! \brief assert a expression is true */
- inline void Assert( bool exp ){
- if( !exp ) Error( "AssertError" );
- }
- /*! \brief assert a expression is true */
- inline void Assert( bool exp, const char *msg ){
- if( !exp ) Error( msg );
- }
- /*! \brief warning */
- inline void Warning( const char *msg ){
- fprintf( stderr, "warning:%s\n",msg );
- }
- }; // namespace utils
-}; // namespace mshadow
-#endif // TENSOR_BASE_H
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor_container.h
----------------------------------------------------------------------
diff --git a/include/mshadow/tensor_container.h b/include/mshadow/tensor_container.h
deleted file mode 100644
index f0699e7..0000000
--- a/include/mshadow/tensor_container.h
+++ /dev/null
@@ -1,152 +0,0 @@
-#ifndef MSHADOW_TENSOR_CONTAINER_H
-#define MSHADOW_TENSOR_CONTAINER_H
-/*!
- * \file tensor_container.h
- * \brief tensor container that does memory allocation and resize like STL
- * \author Tianqi Chen
- */
-#include "tensor.h"
-#include "tensor_io.h"
-
-namespace mshadow{
- /*!
- * \brief tensor container that does memory allocation and resize like STL,
- * use it to save the lines of FreeSpace in class.
- * Do not abuse it, efficiency can come from pre-allocation and no re-allocation
- *
- * \tparam Device which device the tensor is on
- * \tparam dimension dimension of the tensor
- */
- template<typename Device, int dimension>
- class TensorContainer: public Tensor<Device,dimension>{
- public:
- /*!
- * \brief constructor
- * \param pad whether use padding alignment in space allocation
- */
- TensorContainer( bool pad = MSHADOW_ALLOC_PAD ){
- this->pad_ = pad;
- this->dptr = data_.dptr = NULL;
- this->shape[0] = 0;
- this->shape.stride_ = 0;
- this->data_.shape.stride_ = 0;
- this->data_.shape[1] = 0;
- }
- /*!
- * \brief constructor
- * \param shape intial shape
- */
- TensorContainer( const Shape<dimension> &shape ){
- this->pad_ = MSHADOW_ALLOC_PAD;
- data_.dptr = NULL;
- this->AllocByShape( shape );
- }
- /*!
- * \brief constructor
- * \param shape intial shape
- * \param initv intial value
- */
- TensorContainer( const Shape<dimension> &shape, real_t initv ){
- this->pad_ = MSHADOW_ALLOC_PAD;
- data_.dptr = NULL;
- this->AllocByShape( shape );
- (*this) = initv;
- }
- ~TensorContainer( void ){
- this->FreeSpace();
- }
- /*!
- * \brief resize the container to given shape, content is NOT preserved
- * \param shape target shape
- */
- inline void Resize( const Shape<dimension> &shape ){
- Shape<2> s2 = shape.FlatTo2D();
- if( s2.shape_[0] > data_.shape.stride_ || s2.shape_[1] > data_.shape[1] ){
- this->AllocByShape( shape );
- }else{
- this->shape = shape;
- if( this->pad_ ){
- this->shape.stride_ = data_.shape.stride_;
- }else{
- this->shape.stride_ = this->shape[ 0 ];
- }
- }
- }
- /*!
- * \brief resize the container to given shape, and initialize, content is NOT preserved
- * \param shape target shape
- * \param initv initialization value
- */
- inline void Resize( const Shape<dimension> &shape, real_t initv ){
- this->Resize( shape );
- (*this) = initv;
- }
- /*! \brief set whether padding is allowed in tensor */
- inline void set_pad( bool pad ){
- this->pad_ = pad;
- }
- /*!
- * \brief save by binary format
- * \param fo output binary stream
- * \tparam TStream type of stream, need to support Read, Write, one example is utils::IStream.
- */
- template<typename TStream>
- inline void SaveBinary( TStream &fo ) const{
- mshadow::SaveBinary( fo, *this );
- }
- /*!
- * \brief load by binary format, a temp Tensor<cpu,dim> storage will be allocated
- * \param fi input binary stream
- * \tparam TStream type of stream, need to support Read, Write, one example is utils::IStream.
- */
- template<typename TStream>
- inline void LoadBinary( TStream &fi ) {
- Tensor<cpu,dimension> tmp;
- mshadow::LoadBinary( fi, tmp, false );
- this->Resize( tmp.shape );
- Copy( *this, tmp );
- mshadow::FreeSpace( tmp );
- }
- public:
- // functions to fit exp template
- inline Tensor<Device,dimension>& operator=( real_t s ){
- return this->__assign( s );
- }
- template<typename E>
- inline Tensor<Device,dimension>& operator=( const expr::Exp<E,expr::type::kMapper> &exp ){
- return this->__assign( exp );
- }
- template<typename E>
- inline Tensor<Device,dimension>& operator=( const expr::Exp<E,expr::type::kComplex> &exp ){
- return this->__assign( exp );
- }
- private:
- /*! \brief whether we do padding in the space */
- bool pad_;
- /*! \brief the shape of data_ is actually current data space */
- Tensor<Device, 2> data_;
- private:
- inline void FreeSpace (void){
- if( data_.dptr != NULL ){
- mshadow::FreeSpace( data_ );
- data_.dptr = this->dptr = NULL;
- }
- }
- inline void AllocByShape (const Shape<dimension>& shape){
- if( data_.dptr != NULL ){
- this->FreeSpace();
- }
- data_.shape = shape.FlatTo2D();
- mshadow::AllocSpace( data_, pad_ );
- this->dptr = data_.dptr;
- this->shape = shape;
- if( this->pad_ ){
- this->shape.stride_ = data_.shape.stride_;
- }else{
- this->shape.stride_ = shape[0];
- }
- }
- };
-};// namespace mshadow
-
-#endif
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor_cpu-inl.hpp
----------------------------------------------------------------------
diff --git a/include/mshadow/tensor_cpu-inl.hpp b/include/mshadow/tensor_cpu-inl.hpp
deleted file mode 100644
index 0fa3cfa..0000000
--- a/include/mshadow/tensor_cpu-inl.hpp
+++ /dev/null
@@ -1,168 +0,0 @@
-#ifndef MSHADOW_TENSOR_CPU_INL_HPP
-#define MSHADOW_TENSOR_CPU_INL_HPP
-/*!
- * \file tensor_cpu-inl.hpp
- * \brief implementation of CPU host code
- * \author Bing Xu, Tianqi Chen
- */
-#include <cstring>
-#include "tensor_base.h"
-#include "tensor_sse-inl.hpp"
-
-namespace mshadow {
- template<int dim>
- inline void AllocSpace(Tensor<cpu,dim> &obj, bool pad ){
- size_t pitch;
- if( pad ){
- obj.dptr = (real_t*)sse2::AlignedMallocPitch
- ( pitch, obj.shape[0] * sizeof(real_t), obj.FlatTo2D().shape[1] );
- obj.shape.stride_ = static_cast<index_t>( pitch / sizeof(real_t) );
- }else{
- obj.shape.stride_ = obj.shape[0];
- obj.dptr = (real_t*)sse2::AlignedMallocPitch
- ( pitch, obj.shape.Size() * sizeof(real_t), 1 );
- }
- }
-
- template<typename Device, int dim>
- inline Tensor<Device,dim> NewTensor(const Shape<dim> &shape, real_t initv, bool pad ){
- Tensor<Device, dim> obj( shape );
- AllocSpace( obj, pad );
- MapExp<sv::saveto>( obj, expr::ScalarExp( initv ) );
- return obj;
- }
-
- template<int dim>
- inline void FreeSpace(Tensor<cpu,dim> &obj){
- sse2::AlignedFree( obj.dptr );
- obj.dptr = NULL;
- }
-
- template<int dim>
- inline void Copy(Tensor<cpu,dim> _dst, const Tensor<cpu,dim> &_src ){
- utils::Assert( _dst.shape == _src.shape, "Copy:shape mismatch" );
- Tensor<cpu,2> dst = _dst.FlatTo2D();
- Tensor<cpu,2> src = _src.FlatTo2D();
- for (index_t y = 0; y < dst.shape[1]; ++y ) {
- memcpy( dst[y].dptr, src[y].dptr, sizeof(real_t) * dst.shape[0] );
- }
- }
-
- template<typename Saver, typename E, int dim>
- inline void MapPlan(Tensor<cpu,dim> _dst, const expr::Plan<E> &plan){
- Tensor<cpu,2> dst = _dst.FlatTo2D();
- for (index_t y = 0; y < dst.shape[1]; ++y ) {
- for (index_t x = 0; x < dst.shape[0]; ++x ) {
- // trust your compiler! -_- they will optimize it
- Saver::Save(dst[y][x], plan.Eval( y, x ) );
- }
- }
- }
-
- // code to handle SSE optimization
- template<bool pass_check,typename Saver, int dim, typename E, int etype>
- struct MapExpCPUEngine;
- template<typename SV, int dim, typename E, int etype>
- struct MapExpCPUEngine<false,SV,dim,E,etype>{
- inline static void Map(Tensor<cpu,dim> dst, const expr::Exp<E,etype> &exp ){
- MapPlan<SV>( dst, MakePlan( exp.self() ) );
- }
- };
-
- #if MSHADOW_USE_SSE
- template<typename SV, int dim, typename E, int etype>
- struct MapExpCPUEngine<true,SV,dim,E,etype>{
- inline static void Map(Tensor<cpu,dim> dst, const expr::Exp<E,etype> &exp ){
- using namespace expr;
- if( SSEAlignCheck<dim,E>::Check( exp.self() ) && SSEAlignCheck< dim,Tensor<cpu,dim> >::Check(dst) ){
- MapSSEPlan<SV>( dst, MakeSSEPlan( exp.self() ) );
- }else{
- MapPlan<SV>( dst, MakePlan( exp.self() ) );
- }
- }
- };
- #endif
-
- template<typename Saver, int dim, typename E, int etype>
- inline void MapExp(Tensor<cpu,dim> dst, const expr::Exp<E,etype> &exp ){
- using namespace expr;
- TypeCheckPass< TypeCheck<cpu,dim,E>::kMapPass >::Error_All_Tensor_in_Exp_Must_Have_Same_Type();
- Shape<dim> eshape = ShapeCheck<dim,E>::Check( exp.self() );
- utils::Assert( eshape[0] == 0 || eshape == dst.shape, "Assignment: Shape of Tensors in expression is not consistent with target" );
- #if MSHADOW_USE_SSE
- MapExpCPUEngine< SSECheck<E>::kPass,Saver,dim,E,etype >::Map( dst, exp );
- #else
- MapExpCPUEngine< false,Saver,dim,E,etype >::Map( dst, exp );
- #endif
- }
-
- template<typename Saver, typename Reducer, typename E, int etype>
- inline void MapReduceKeepLowest( Tensor<cpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale ){
- using namespace expr;
- TypeCheckPass< TypeCheck<cpu,1,E>::kRedPass >::Error_TypeCheck_Not_Pass_For_Reduce_Exp();
- Shape<2> eshape = ShapeCheck< ExpInfo<E>::kDim, E >::Check( exp.self() ).FlatTo2D();
-
- utils::Assert( eshape[0] == dst.shape[0], "reduction dimension do not match" );
- utils::Assert( eshape[1] != 0, "can not reduce over empty tensor" );
- // execution
- expr::Plan<E> plan = MakePlan( exp.self() );
- for( index_t x = 0; x < eshape[0]; ++x ){
- real_t res = plan.Eval( 0, x );
- for( index_t y = 1; y < eshape[1]; ++y ){
- Reducer::Reduce( res, plan.Eval( y, x ) );
- }
- Saver::Save( dst[x], res*scale );
- }
- }
-
- template<typename Saver, typename Reducer, int dimkeep, typename E, int etype>
- inline void MapReduceKeepHighDim( Tensor<cpu,1> dst, const expr::Exp<E,etype> &exp, real_t scale ){
- using namespace expr;
- TypeCheckPass< TypeCheck<cpu,dimkeep,E>::kRedPass >::Error_TypeCheck_Not_Pass_For_Reduce_Exp();
- typedef Shape< ExpInfo<E>::kDim > EShape;
- EShape eshape = ShapeCheck< ExpInfo<E>::kDim, E >::Check( exp.self() );
- utils::Assert( eshape[dimkeep] == dst.shape[0], "reduction dimension do not match" );
- // use equvalent form
- Shape<4> pshape = Shape4( eshape.ProdShape(dimkeep+1,EShape::kMaxShape), eshape[dimkeep],
- eshape.ProdShape(1,dimkeep), eshape[0] );
-
- // execution
- expr::Plan<E> plan = MakePlan( exp.self() );
-
- for( index_t c = 0; c < pshape[2]; ++c ){
- real_t res = Reducer::kInitV;
- for( index_t n = 0; n < pshape[3]; ++n ){
- real_t tres = Reducer::kInitV;
- for( index_t y = 0; y < pshape[1]; ++y ){
- for( index_t x = 0; x < pshape[0]; ++x ){
- Reducer::Reduce( tres, plan.Eval( (n*pshape[2] + c) * pshape[1] + y, x ) );
- }
- }
- Reducer::Reduce( res, tres );
- }
- Saver::Save( dst[c], res*scale );
- }
- }
-
- inline void Softmax( Tensor<cpu,1> dst, const Tensor<cpu,1>& energy ){
- real_t mmax = energy[0];
- for( real_t x = 1; x < dst.shape[0]; ++x )
- if( mmax < energy[x] ) mmax = energy[x];
- real_t sum = 0.0f;
- for( index_t x = 0; x < dst.shape[0]; ++x ){
- dst[x] = std::exp( energy[x] - mmax );
- sum += dst[x];
- }
- for( index_t x = 0; x < dst.shape[0]; ++x ){
- dst[x] /= sum;
- }
- }
- inline void Softmax( Tensor<cpu,2> dst, const Tensor<cpu,2>& energy ){
- utils::Assert( dst.shape == energy.shape, "Softmax: shape mismatch" );
- for( index_t y = 0; y < dst.shape[1]; ++y ){
- Softmax( dst[y], energy[y] );
- }
- }
-}; // namespace mshadow
-
-#endif // TENSOR_CPU_INL_HPP
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/mshadow/tensor_expr.h
----------------------------------------------------------------------
diff --git a/include/mshadow/tensor_expr.h b/include/mshadow/tensor_expr.h
deleted file mode 100644
index ac8fde7..0000000
--- a/include/mshadow/tensor_expr.h
+++ /dev/null
@@ -1,367 +0,0 @@
-#ifndef MSHADOW_TENSOR_EXPR_H
-#define MSHADOW_TENSOR_EXPR_H
-/*!
- * \file tensor_expr.h
- * \brief definitions of abstract expressions and expressions template
- * \author Tianqi Chen, Bing Xu
- */
-#include "tensor_base.h"
-
-namespace mshadow{
- /*!
- * \brief namespace for abstract expressions and expressions template,
- * have no dependecy on tensor.h,
- * These data structure takes no charge in computations,
- * they are only used to define operations and represent expression in a symbolic way
- */
- namespace expr{
-
- /*! \brief type of expressions */
- namespace type{
- /*! \brief this expression directly correspnds to a data class */
- const int kContainer = 0;
- /*! \brief this only contains element-wise vector operations */
- const int kMapper = 1;
- /*! \brief othercase: e.g dot product */
- const int kComplex = 3;
- };
-
- /*!
- * \brief expression engine that actually interprets these expressions
- * this is a function template that needed to be implemented for specific expressions
- */
- template<typename Saver,typename Container>
- struct ExpEngine{
- template<typename EType>
- inline static void Eval( Container& dst, const EType &exp );
- };
-
- template<typename Container>
- class ContainerExp;
- class ScalarExp;
-
- /*!
- * \brief base class for expression
- * \tparam SubType inheritated class must put their type into this parameter
- * \tparam exp_type expression type, see namespace type
- */
- template<typename SubType, int exp_type>
- struct Exp{
- public:
- /*! \return subtype instance of current class */
- inline const SubType& self( void ) const{
- return *static_cast<const SubType*>(this);
- }
- /*! \return reference of subtype instance of current class */
- inline SubType& refself( void ){
- return *static_cast<SubType*>(this);
- }
- };
-
- /*! \brief scalar expression */
- struct ScalarExp: public Exp<ScalarExp, type::kMapper>{
- /*! \brief scalar value */
- real_t scalar_;
- /*! \brief constructor */
- ScalarExp( real_t scalar ):scalar_(scalar){}
- };
-
- /*! \brief represent a transpose expression of a container */
- template<typename EType>
- struct TransposeExp: public Exp< TransposeExp<EType>, type::kComplex >{
- public:
- /*! \brief expression to be transposed */
- const EType &exp;
- /*! \brief constructor */
- TransposeExp( const EType &e ):exp(e){}
- /*! \brief transpose expression */
- inline const EType & T( void ) const{
- return exp;
- }
- };
-
- /*!
- * \brief base class of all variables, that can be assigned to values
- * \tparam Container the actually class of data container, e.g. CTensor1D
- */
- template<typename Container>
- class ContainerExp: public Exp< Container, type::kContainer >{
- public:
- /*!
- *\brief transpose of a matrix
- *\return transpose of current expression
- */
- inline const TransposeExp<Container> T( void ) const{
- return TransposeExp<Container>( this->self() );
- }
- public:
- /*! \brief operator overload */
- inline Container &operator+=( real_t s ){
- ExpEngine<sv::plusto,Container>::Eval( this->refself(), ScalarExp(s) );
- return this->refself();
- }
- /*! \brief operator overload */
- inline Container &operator-=( real_t s ){
- ExpEngine<sv::minusto,Container>::Eval( this->refself(), ScalarExp(s) );
- return this->refself();
- }
- /*! \brief operator overload */
- inline Container &operator*=( real_t s ){
- ExpEngine<sv::multo,Container>::Eval( this->refself(), ScalarExp(s) );
- return this->refself();
- }
- /*! \brief operator overload */
- inline Container &operator/=( real_t s ){
- ExpEngine<sv::divto,Container>::Eval( this->refself(), ScalarExp(s) );
- return this->refself();
- }
- /*! \brief operator overload */
- inline Container &__assign( real_t s ){
- ExpEngine<sv::saveto,Container>::Eval( this->refself(), ScalarExp(s) );
- return this->refself();
- }
- public:
- /*! \brief implementation of operator=, note that we can not define container = container */
- template<typename E>
- inline Container &__assign( const Exp<E,type::kMapper> &exp ){
- ExpEngine<sv::saveto,Container>::Eval( this->refself(), exp.self() );
- return this->refself();
- }
- /*! \brief implementation of operator=, note that we can not define container = container */
- template<typename E>
- inline Container &__assign( const Exp<E,type::kComplex> &exp ){
- ExpEngine<sv::saveto,Container>::Eval( this->refself(), exp.self() );
- return this->refself();
- }
- /*! \brief implementation of operator+= */
- template<typename E,int etype>
- inline Container &operator+=( const Exp<E,etype> &exp ){
- ExpEngine<sv::plusto,Container>::Eval( this->refself(), exp.self() );
- return this->refself();
- }
- /*! \brief implementation of operator-= */
- template<typename E,int etype>
- inline Container &operator-=( const Exp<E,etype> &exp ){
- ExpEngine<sv::minusto,Container>::Eval( this->refself(), exp.self() );
- return this->refself();
- }
- /*! \brief implementation of operator*= */
- template<typename E,int etype>
- inline Container &operator*=( const Exp<E,etype> &exp ){
- ExpEngine<sv::multo,Container>::Eval( this->refself(), exp.self() );
- return this->refself();
- }
- /*! \brief implementation of operator/= */
- template<typename E,int etype>
- inline Container &operator/=( const Exp<E,etype> &exp ){
- ExpEngine<sv::divto,Container>::Eval( this->refself(), exp.self() );
- return this->refself();
- }
- };
- }; // namespace expr
-
- namespace expr{
- /*!
- * \brief matrix multiplication expression dot( lhs[.T], rhs[.T] )
- * \tparam TA type of lhs
- * \tparam TB type of rhs
- * \tparam ltrans whether lhs is transposed
- * \tparam rtrans whether rhs is transposed
- */
- template<typename TA,typename TB,bool ltrans,bool rtrans>
- struct DotExp: public Exp< DotExp<TA,TB,ltrans,rtrans>, type::kComplex >{
- /*! \brief left operand */
- const TA& lhs_;
- /*! \brief right operand */
- const TB& rhs_;
- /*! \brief scale over result */
- real_t scale_;
- /*! \brief constructor */
- DotExp( const TA &lhs, const TB &rhs, real_t scale )
- :lhs_(lhs),rhs_(rhs),scale_(scale){}
- };
-
- /*! \brief dot operator def */
- template<typename TA, typename TB>
- inline DotExp<TA,TB,false,false> dot( const ContainerExp<TA> &lhs, const ContainerExp<TB> &rhs ){
- return DotExp<TA,TB,false,false>( lhs.self(), rhs.self(), 1.0f );
- }
- /*! \brief dot operator def */
- template<typename TA, typename TB>
- inline DotExp<TA,TB,true,false> dot( const TransposeExp<TA> &lhs, const ContainerExp<TB> &rhs ){
- return DotExp<TA,TB,true,false>( lhs.exp, rhs.self(), 1.0f );
- }
- /*! \brief dot operator def */
- template<typename TA, typename TB>
- inline DotExp<TA,TB,false,true> dot( const ContainerExp<TA> &lhs, const TransposeExp<TB> &rhs ){
- return DotExp<TA,TB,false,true>( lhs.self(), rhs.exp, 1.0f );
- }
- /*! \brief dot operator def */
- template<typename TA, typename TB>
- inline DotExp<TA,TB,true,true> dot( const TransposeExp<TA> &lhs, const TransposeExp<TB> &rhs ){
- return DotExp<TA,TB,true,true>( lhs.exp, rhs.exp, 1.0f );
- }
- /*! \brief dot operator def */
- template<typename TA, typename TB, bool ltrans, bool rtrans >
- inline DotExp<TA,TB,ltrans,rtrans> operator*( const DotExp<TA,TB,ltrans,rtrans> &lhs, real_t rhs ){
- return DotExp<TA,TB,ltrans,rtrans>( lhs.lhs_, lhs.rhs_, lhs.scale_ * rhs );
- }
- /*! \brief scale of dot operation */
- template<typename TA, typename TB, bool ltrans, bool rtrans >
- inline DotExp<TA,TB,ltrans,rtrans> operator*( real_t lhs, const DotExp<TA,TB,ltrans,rtrans> &rhs ){
- return DotExp<TA,TB,ltrans,rtrans>( rhs.lhs_, rhs.rhs_, rhs.scale_ * lhs );
- }
- }; // namespace expr
-
- namespace expr{
- /*!
- * \brief binary map expression lhs [op] rhs
- * \tparam OP operator
- * \tparam TA type of lhs
- * \tparam TB type of rhs
- * \tparam etype expression type, sa namespace::type
- */
- template<typename OP, typename TA, typename TB, int etype >
- struct BinaryMapExp: public Exp< BinaryMapExp<OP,TA,TB,etype>, etype >{
- /*! \brief left operand */
- const TA& lhs_;
- /*! \brief right operand */
- const TB& rhs_;
- /*! \brief constructor */
- BinaryMapExp( const TA &lhs, const TB &rhs )
- :lhs_(lhs), rhs_(rhs){}
- };
-
- /*! \brief make expression */
- template<typename OP,typename TA, typename TB, int ta, int tb>
- inline BinaryMapExp<OP,TA,TB, (ta|tb|type::kMapper) > MakeExp( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){
- return BinaryMapExp<OP,TA,TB, (ta|tb|type::kMapper) >( lhs.self(), rhs.self() );
- }
-
- /*!
- * \brief short hand for MakeExp, usage F<op>(lhs, rhs). create a binary operation expression
- * \param lhs left operand
- * \param rhs right operand
- * \tparam binary operator
- * \tparam TA lhs expression
- * \tparam ta lhs expression type
- * \tparam TB rhs expression
- * \tparam tb rhs expression type
- * \sa mshadow::op
- */
- template<typename OP,typename TA, typename TB, int ta, int tb>
- inline BinaryMapExp<OP,TA,TB, (ta|tb|type::kMapper) > F( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){
- return MakeExp<OP>( lhs, rhs );
- }
- /*! \brief operator overload for const */
- template<typename OP,typename TA, int ta>
- inline BinaryMapExp<OP,TA,ScalarExp, (ta|type::kMapper) > F( const Exp<TA,ta> &lhs, const ScalarExp &rhs ){
- return MakeExp<OP>( lhs, rhs );
- }
- /*! \brief operator overload for const */
- template<typename OP,typename TB, int tb>
- inline BinaryMapExp<OP,ScalarExp,TB, (tb|type::kMapper) > F( const ScalarExp &lhs, const Exp<TB,tb>& rhs ){
- return MakeExp<OP>( lhs, rhs );
- }
-
- // operator rules
- /*! \brief operator overload */
- template<typename TA, typename TB, int ta, int tb>
- inline BinaryMapExp<op::plus,TA,TB, (ta|tb|type::kMapper) > operator+( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){
- return MakeExp<op::plus>( lhs, rhs );
- }
- /*! \brief operator overload */
- template<typename TA, typename TB, int ta, int tb>
- inline BinaryMapExp<op::minus,TA,TB, (ta|tb|type::kMapper) > operator-( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){
- return MakeExp<op::minus>( lhs, rhs );
- }
- /*! \brief operator overload */
- template<typename TA, typename TB, int ta, int tb>
- inline BinaryMapExp<op::mul,TA,TB, (ta|tb|type::kMapper) > operator*( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){
- return MakeExp<op::mul>( lhs, rhs );
- }
- /*! \brief operator overload */
- template<typename TA, typename TB, int ta, int tb>
- inline BinaryMapExp<op::div,TA,TB, (ta|tb|type::kMapper) > operator/( const Exp<TA,ta> &lhs, const Exp<TB,tb> &rhs ){
- return MakeExp<op::div>( lhs, rhs );
- }
- // constant operators
- /*! \brief operator overload */
- template<typename TA, int ta>
- inline BinaryMapExp<op::plus, TA, ScalarExp, (ta|type::kMapper) > operator+( const Exp<TA,ta>& lhs, const ScalarExp& rhs ){
- return MakeExp<op::plus>( lhs, rhs );
- }
- /*! \brief operator overload */
- template<typename TA, int ta>
- inline BinaryMapExp<op::minus, TA, ScalarExp, (ta|type::kMapper) > operator-( const Exp<TA,ta>& lhs, const ScalarExp& rhs ){
- return MakeExp<op::minus>( lhs, rhs );
- }
- /*! \brief operator overload */
- template<typename TA, int ta>
- inline BinaryMapExp<op::mul, TA, ScalarExp, (ta|type::kMapper) > operator*( const Exp<TA,ta>& lhs, const ScalarExp& rhs ){
- return MakeExp<op::mul>( lhs, rhs );
- }
- /*! \brief operator overload */
- template<typename TA, int ta>
- inline BinaryMapExp<op::div, TA, ScalarExp, (ta|type::kMapper) > operator/( const Exp<TA,ta>& lhs, const ScalarExp& rhs ){
- return MakeExp<op::div>( lhs, rhs );
- }
- // constant operators 2
- /*! \brief operator overload */
- template<typename TB, int tb>
- inline BinaryMapExp<op::plus, ScalarExp, TB, (tb|type::kMapper) > operator+( const ScalarExp& lhs, const Exp<TB,tb>& rhs ){
- return MakeExp<op::plus>( lhs, rhs );
- }
- /*! \brief operator overload */
- template<typename TB, int tb>
- inline BinaryMapExp<op::minus, ScalarExp, TB, (tb|type::kMapper) > operator-( const ScalarExp& lhs, const Exp<TB,tb>& rhs ){
- return MakeExp<op::minus>( lhs, rhs );
- }
- /*! \brief operator overload */
- template<typename TB, int tb>
- inline BinaryMapExp<op::mul, ScalarExp, TB, (tb|type::kMapper) > operator*( const ScalarExp& lhs, const Exp<TB,tb>& rhs ){
- return MakeExp<op::mul>( lhs, rhs );
- }
- /*! \brief operator overload */
- template<typename TB, int tb>
- inline BinaryMapExp<op::div, ScalarExp, TB, (tb|type::kMapper) > operator/( const ScalarExp& lhs, const Exp<TB,tb>& rhs ){
- return MakeExp<op::div>( lhs, rhs );
- }
- };
-
- namespace expr{
- /*!
- * \brief unary map expression op(src)
- * \tparam OP operator
- * \tparam TA type of src
- * \tparam etype expression type, sa namespace::type
- */
- template<typename OP, typename TA, int etype >
- struct UnaryMapExp: public Exp< UnaryMapExp<OP,TA,etype>, etype >{
- /*! \brief source expression */
- const TA& src_;
- /*! \brief constructor */
- UnaryMapExp( const TA &src ):src_(src){}
- };
-
- /*! \brief make expression */
- template<typename OP,typename TA, int ta>
- inline UnaryMapExp<OP,TA,(ta|type::kMapper) > MakeExp( const Exp<TA,ta> &src ){
- return UnaryMapExp<OP,TA, (ta|type::kMapper) >( src.self() );
- }
-
- /*!
- * \brief short hand for MakeExp, usage F<op>(src), create a unary operation expression
- * \param src source expression
- * \tparam operator
- * \tparam TA source expression
- * \tparam ta source expression type
- * \sa mshadow::op
- */
- template<typename OP,typename TA, int ta>
- inline UnaryMapExp<OP,TA,(ta|type::kMapper) > F( const Exp<TA,ta> &src ){
- return MakeExp<OP>(src);
- }
- };
-};
-#endif