You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2016/06/13 13:20:29 UTC
[36/50] [abbrv] incubator-singa git commit: SINGA-182 Clean math
function APIs and implementations
SINGA-182 Clean math function APIs and implementations
Clean tensor.h/.cc and tensor_math.h, tensor_math_cpp.h:
re-order the functions by (type, name), where type is a) element-wise
function b) matrix function c) random function d) blas function
Implement GEMV using cblas and cublas.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/564c88ad
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/564c88ad
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/564c88ad
Branch: refs/heads/master
Commit: 564c88ad95e976e6067198c832f4fcd9a8878cd7
Parents: 07c49da
Author: wangwei <wa...@gmail.com>
Authored: Fri Jun 10 23:12:09 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Sun Jun 12 12:15:11 2016 +0800
----------------------------------------------------------------------
include/singa/core/tensor.h | 396 +++++++++---------
src/core/tensor/tensor.cc | 688 ++++++++++++++++----------------
src/core/tensor/tensor_math.h | 336 ++++++++--------
src/core/tensor/tensor_math_cpp.h | 640 +++++++++++++++--------------
src/core/tensor/tensor_math_cuda.h | 158 ++++----
test/singa/test_tensor_math.cc | 15 +-
6 files changed, 1131 insertions(+), 1102 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/564c88ad/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index bb8d7f8..82bbe81 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -32,17 +32,6 @@ using std::tuple;
namespace singa {
typedef vector<size_t> Shape;
-typedef Shape::iterator ShapeIter;
-inline size_t Product(const Shape &shape, int start = 0, size_t len = 0) {
- if (len == 0)
- len = shape.size();
- CHECK_LE(len, shape.size());
- size_t v = 1;
- for (unsigned int i = start; i < len; i++)
- v *= shape[i];
- return v;
-}
-
/// hardcode the width of types defined in DataType
const size_t kDataWidth[] = {sizeof(float), sizeof(float) / 2, sizeof(int),
sizeof(char), sizeof(double)};
@@ -65,10 +54,10 @@ class Tensor {
public:
~Tensor();
Tensor();
- explicit Tensor(Shape &&shape, DataType dtype = kFloat32);
- explicit Tensor(const Shape &shape, DataType dtype = kFloat32);
- Tensor(Shape &&shape, Device *dev, DataType dtype = kFloat32);
- Tensor(const Shape &shape, Device *dev, DataType dtype = kFloat32);
+ explicit Tensor(Shape &&shape, const DataType dtype = kFloat32);
+ explicit Tensor(const Shape &shape, const DataType dtype = kFloat32);
+ Tensor(Shape &&shape, Device *dev, const DataType dtype = kFloat32);
+ Tensor(const Shape &shape, Device *dev, const DataType dtype = kFloat32);
/// Copy Tensor to share the internal data. No deep copy.
Tensor(const Tensor &from);
@@ -82,10 +71,10 @@ class Tensor {
Device *device() const { return device_; }
- /// Return immutable Tensor values with given type.
- template <typename DType>
- DType data() const {
- return static_cast<DType>(blob()->data());
+ /// return immutable Tensor values with given type.
+ template <typename SType>
+ SType data() const {
+ return static_cast<SType>(blob()->data());
}
/// data type, including kFloat16, kFloat32, kInt
@@ -93,7 +82,7 @@ class Tensor {
const Shape &shape() const { return shape_; }
- const size_t shape(size_t idx) const {
+ const size_t shape(const size_t idx) const {
CHECK_LT(idx, shape_.size());
return shape_.at(idx);
}
@@ -102,13 +91,13 @@ class Tensor {
bool transpose() const { return transpose_; }
- /// Return number of total elements
+ /// return number of total elements
size_t Size() const {
CHECK_EQ(blob_->size() % SizeOf(data_type_), 0u);
return blob_->size() / SizeOf(data_type_);
}
- /// Return memory size (i.e., Bytes)
+ /// return memory size (i.e., Bytes)
size_t MemSize() const { return blob_->size(); }
/// Reset the tensor shape, it may reallocate blob, if MemSize() changes.
@@ -121,7 +110,7 @@ class Tensor {
void ResetLike(const Tensor &t);
/// Reset the data type, it would reallocate blob if type changes.
- void AsType(DataType type);
+ void AsType(const DataType type);
/// Reset the device.
/// If the target device is a diff device, then do deep data copy.
@@ -135,14 +124,14 @@ class Tensor {
void SetValue(const SType x);
/// For init the tensor values, copy 'num' elements.
- template <typename DType>
- void CopyDataFromHostPtr(const DType *src, size_t num);
+ template <typename SType>
+ void CopyDataFromHostPtr(const SType *src, const size_t num);
/// Copy data from another Tensor which may be on a diff device.
/// Meta data would not be copied!
void CopyData(const Tensor &other);
- /// Return an exactly the same Tensor with data been deep copied.
+ /// return an exactly the same Tensor with data been deep copied.
Tensor Clone() const;
// Tensor operations
@@ -152,42 +141,37 @@ class Tensor {
Tensor T() const;
/// Copy the meta info with data blob shared.
- Tensor &operator=(const Tensor &t);
+ Tensor &operator=(const Tensor &in);
/// Copy the meta info with data blob shared.
- Tensor &operator=(Tensor &&t);
+ Tensor &operator=(Tensor &&in);
- Tensor &operator+=(const Tensor &t);
- // void operator+=(Tensor&& t);
- Tensor &operator-=(const Tensor &t);
- // void operator-=(Tensor&& t);
- Tensor &operator*=(const Tensor &t);
- // void operator*=(Tensor&& t);
- Tensor &operator/=(const Tensor &t);
- // void operator/=(Tensor&& t);
+ Tensor &operator+=(const Tensor &in);
+ // void operator+=(Tensor&& in);
+ Tensor &operator-=(const Tensor &in);
+ // void operator-=(Tensor&& in);
+ Tensor &operator*=(const Tensor &in);
+ // void operator*=(Tensor&& in);
+ Tensor &operator/=(const Tensor &in);
+ // void operator/=(Tensor&& in);
// Scalar operations.
- /// T is a scalar type
- template <typename DType>
- Tensor &operator+=(DType x);
-
- /// T is a scalar type
- template <typename DType>
- Tensor &operator-=(const DType x);
+ /// SType is a scalar type
+ template <typename SType>
+ Tensor &operator+=(const SType x);
- /// T is a scalar type
- template <typename DType>
- Tensor &operator*=(const DType x);
+ /// SType is a scalar type
+ template <typename SType>
+ Tensor &operator-=(const SType x);
- /// T is a scalar type
- template <typename DType>
- Tensor &operator/=(const DType x);
+ /// SType is a scalar type
+ template <typename SType>
+ Tensor &operator*=(const SType x);
- /// save Tensor into a proto msg
- // void ToProto(TensorProto* t);
- /// load Tensor from proto msg
- // void FromProto(const TensorProto& t);
+ /// SType is a scalar type
+ template <typename SType>
+ Tensor &operator/=(const SType x);
protected:
bool transpose_ = false;
@@ -196,14 +180,29 @@ class Tensor {
/// Note: blob_ is allocated in lazy manner to avoid frequent malloc/free.
/// If you want to get an allocated Blob, use blob() instead of blob_.
Blob *blob_ = nullptr;
- Shape shape_;
+ Shape shape_ = {};
};
+typedef Shape::iterator ShapeIter;
+inline size_t Product(const Shape &shape, int start = 0, size_t len = 0) {
+ if (len == 0) len = shape.size();
+ CHECK_LE(len, shape.size());
+ size_t v = 1;
+ for (unsigned int i = start; i < len; i++) v *= shape[i];
+ return v;
+}
+
inline void CheckDataTypeAndLang(const Tensor &in1, const Tensor &in2) {
CHECK_EQ(in1.data_type(), in2.data_type());
CHECK_EQ(in1.device()->lang(), in2.device()->lang());
}
+template <typename FromType, typename ToType>
+ToType TypeCast(const FromType &x) {
+ // TODO(wangwei) cast fp16; prevent some casts, e.g., float to char
+ return static_cast<ToType>(x);
+}
+
Tensor Reshape(const Tensor &in, const Shape &s);
Tensor Reshape(const Tensor &in, Shape &&s);
@@ -212,192 +211,171 @@ Tensor Reshape(const Tensor &in, Shape &&s);
/// Copy 'num' elements of src to dst.
/// The first 'src_offset' ('dst_offset') elements will be skipped.
-void CopyDataToFrom(Tensor *dst, const Tensor &src, size_t num,
- size_t src_offset = 0, size_t dst_offset = 0);
-
-// ==================Simple Linear Algebra Operations=========================
-Tensor Abs(const Tensor &t);
-Tensor Exp(const Tensor &t);
-Tensor Log(const Tensor &t);
-Tensor ReLU(const Tensor &t);
-Tensor Sigmoid(const Tensor &t);
-Tensor Sign(const Tensor &t);
-Tensor Sqrt(const Tensor &t);
-Tensor Square(const Tensor &t);
-Tensor Tanh(const Tensor &t);
+void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
+ const size_t src_offset = 0, const size_t dst_offset = 0);
+
+// =============Element-wise operations====================================
+Tensor Abs(const Tensor &in);
+Tensor Exp(const Tensor &in);
+Tensor Log(const Tensor &in);
+Tensor ReLU(const Tensor &in);
+Tensor Sigmoid(const Tensor &in);
+Tensor Sign(const Tensor &in);
+Tensor Sqrt(const Tensor &in);
+Tensor Square(const Tensor &in);
+Tensor Tanh(const Tensor &in);
+
+/// Element-wise opeartion, out[i]=in[i]^x
+template <typename SType>
+Tensor Pow(const Tensor &in, const SType x);
+/// Element-wise opeartion, out[i]=in[i]^x
+template <typename SType>
+void Pow(const Tensor &in, const SType x, Tensor *out);
+/// Element-wise opeartion, out[i]=baes[i]^exp[i]
+Tensor Pow(const Tensor &base, const Tensor &exp);
+/// Element-wise opeartion, out[i]=baes[i]^exp[i]
+void Pow(const Tensor &base, const Tensor &exp, Tensor *out);
+/// Element-wise operation, out[i]= (in[i] < x) ? 1.f : 0.f
template <typename SType>
-SType Sum(const Tensor &t);
-/// Sum elements in the Tensor, currently only support vector and matrix.
-/// if 'axis' is 0, sum all rows into a single row
-/// if 'axis' is 1, sum all columns into a single column
-/// TODO(wangwei) support arbitrary Tensor like numpy.sum
-Tensor Sum(const Tensor &t, int axis);
+Tensor operator<(const Tensor &in, const SType x);
+template <typename SType>
+void LT(const Tensor &in, const SType x, Tensor *out);
-/// Average elements in the Tensor, currently only support vector and matrix.
-/// if 'axis' is 0, average all rows into a single row
-/// if 'axis' is 1, average all columns into a single column
-/// TODO(wangwei) support arbitrary Tensor like numpy.average
-Tensor Average(const Tensor &t, int axis);
-/// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis-1] rows,
-/// and shape_[axis]*...*shape_[nDim()] columns.
-/// and do softmax along each row.
-Tensor SoftMax(const Tensor &t, int axis = 0);
-void SoftMax(const Tensor &t, int axis, Tensor *ret);
+/// Element-wise operation, out[i]= (in[i] <= x) ? 1.f : 0.f
+template <typename SType>
+Tensor operator<=(const Tensor &in, const SType x);
+template <typename SType>
+void LE(const Tensor &in, const SType x, Tensor *out);
-/// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis] rows,
-/// and shape_[axis+1]*...*shape_[nDim()] columns.
-/// and do softmax along each row.
-// Tensor Softmax(const Tensor& t, int axis = -1);
-// void Softmax(const Tensor& t, Tensor* ret, int axis = -1);
-
-/// Element-wise operation, ret[i]= (t[i] < x) ? 1.f : 0.f
-template <typename DType>
-Tensor operator<(const Tensor &t, const DType x);
-template <typename DType>
-void LT(const Tensor &t, DType x, Tensor *ret);
-
-/// Element-wise operation, ret[i]= (t[i] <= x) ? 1.f : 0.f
-template <typename DType>
-Tensor operator<=(const Tensor &t, const DType x);
-template <typename DType>
-void LE(const Tensor &t, DType x, Tensor *ret);
-
-/// Element-wise operation, ret[i]= (t[i] > x) ? 1.f : 0.f
-template <typename DType>
-Tensor operator>(const Tensor &t, const DType x);
-template <typename DType>
-void GT(const Tensor &t, DType x, Tensor *ret);
-
-/// Element-wise operation, ret[i]= (t[i] >= x) ? 1.f : 0.f
-template <typename DType>
-Tensor operator>=(const Tensor &t, const DType x);
-template <typename DType>
-void GE(const Tensor &t, DType x, Tensor *ret);
-
-/// Element-wise opeartion, ret[i]=t[i]^x
-template <typename DType>
-Tensor Pow(const Tensor &t, DType x);
-/// Element-wise opeartion, ret[i]=t[i]^x
-template <typename DType>
-void Pow(const Tensor &t, DType x, Tensor *ret);
-/// Element-wise opeartion, ret[i]=baes[i]^exp[i]
-Tensor Pow(const Tensor &base, Tensor exp);
-/// Element-wise opeartion, ret[i]=baes[i]^exp[i]
-void Pow(const Tensor &base, const Tensor &exp, Tensor *ret);
+/// Element-wise operation, out[i]= (in[i] > x) ? 1.f : 0.f
+template <typename SType>
+Tensor operator>(const Tensor &in, const SType x);
+template <typename SType>
+void GT(const Tensor &in, const SType x, Tensor *out);
+
+/// Element-wise operation, out[i]= (in[i] >= x) ? 1.f : 0.f
+template <typename SType>
+Tensor operator>=(const Tensor &in, const SType x);
+template <typename SType>
+void GE(const Tensor &in, const SType x, Tensor *out);
Tensor operator+(const Tensor &lhs, const Tensor &rhs);
-void Add(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
+void Add(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator-(const Tensor &lhs, const Tensor &rhs);
-void Sub(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
+void Sub(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator*(const Tensor &lhs, const Tensor &rhs);
-void EltwiseMult(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
+void EltwiseMult(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator/(const Tensor &lhs, const Tensor &rhs);
-void Div(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
+void Div(const Tensor &lhs, const Tensor &rhs, Tensor *out);
-template <typename DType>
-Tensor operator+(const Tensor &t, DType x);
-template <typename DType>
-void Add(const Tensor &t, DType x, Tensor *ret);
-
-template <typename DType>
-Tensor operator-(const Tensor &t, DType x);
-template <typename DType>
-void Sub(const Tensor &t, DType x, Tensor *ret);
-
-template <typename DType>
-Tensor operator*(const Tensor &t, DType x);
-template <typename DType>
-void EltwiseMult(const Tensor &t, DType x, Tensor *ret);
-
-template <typename DType>
-Tensor operator/(const Tensor &t, DType x);
-template <typename DType>
-void Div(const Tensor &t, DType x, Tensor *ret);
+template <typename SType>
+Tensor operator+(const Tensor &in, const SType x);
+template <typename SType>
+void Add(const Tensor &in, const SType x, Tensor *out);
-// ================Blas operations============================================
-// We fix the scalar argument type to be float.
+template <typename SType>
+Tensor operator-(const Tensor &in, const SType x);
+template <typename SType>
+void Sub(const Tensor &in, const SType x, Tensor *out);
-// ===== Level 1
-// TODO(wangwei) make amax/amin/asum a member function of tensor
-// void Amax(Tensor, Context* ctx); Get the index of the max value in a vector
-// void Asum(Tensor Context* ctx);
+template <typename SType>
+Tensor operator*(const Tensor &in, const SType x);
+template <typename SType>
+void EltwiseMult(const Tensor &in, const SType x, Tensor *out);
-// template <typename DType>
-// void Axpy(DType x, const Blob& t, Blob* ret, Context* ctx);
+/// For each element e of Tensor 'in', compute e / x
+template <typename SType>
+Tensor operator/(const Tensor &in, const SType x);
+/// For each element e of Tensor 'in', compute e / x into out
+template <typename SType>
+void Div(const Tensor &in, const SType x, Tensor *out);
-/// Do matrix vector multipication or matrix matrix multiplication depdending
-/// on the Tensor shape. result = A * B
-Tensor Mult(const Tensor &A, const Tensor &B);
-/// Do matrix vector multipication or matrix matrix multiplication depdending
-/// on the Tensor shape. C = A * B
-void Mult(const Tensor &A, const Tensor &B, Tensor *C);
+/// For each element e of Tensor 'in', compute x/e
+template <typename SType>
+Tensor Div(const SType x, const Tensor &in);
+/// For each element e of Tensor 'in', compute x/e into 'out'
+template <typename SType>
+void Div(const SType x, const Tensor &in, Tensor *out);
-/// Do matrix vector multipication or matrix matrix multiplication depdending
-/// on the Tensor shape. ret = alpha lhs * rhs + beta * ret
-void Mult(const float alpha, const Tensor &lhs, const Tensor &rhs,
- const float beta, Tensor *C);
+template <typename SType>
+SType Sum(const Tensor &in);
-// ================Random operations==========================================
-/// For each element x set x = 1 if random() < p; otherwise x = 1.
-void Bernoulli(float p, Tensor *t);
-/// Fill in Tensor 't' following uniform distribution.
-void Uniform(float low, float high, Tensor *t);
-/// Fill in Tensor 't' following Gaussian distribution.
-void Gaussian(float mean, float std, Tensor *t);
+// ============Matrix (row/column) operations==================================
+/// Average elements in the Tensor, currently only support vector and matrix.
+/// if 'axis' is 0, average all rows into a single row
+/// if 'axis' is 1, average all columns into a single column
+/// TODO(wangwei) support arbitrary Tensor like numpy.average
+Tensor Average(const Tensor &in, const int axis);
+/// Sum elements in the Tensor, currently only support vector and matrix.
+/// if 'axis' is 0, sum all rows into a single row
+/// if 'axis' is 1, sum all columns into a single column
+/// TODO(wangwei) support arbitrary Tensor like numpy.sum
+Tensor Sum(const Tensor &in, const int axis);
+/// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis-1] rows,
+/// and shape_[axis]*...*shape_[nDim()] columns.
+/// and do softmax along each row.
+Tensor SoftMax(const Tensor &in, const int axis = 0);
+void SoftMax(const Tensor &in, const int axis, Tensor *out);
-// follow the consistency guide
-// https://issues.apache.org/jira/browse/SINGA-182
-// ============Matrix vector operations=======================================
/// Add column 'v' with each column of matrix M
void AddColumn(const Tensor &v, Tensor *M);
-void AddColumn(const float alpha, const float beta, const Tensor &v,
+/// For each column 'c' of matrix out, do c=alpha*v + beta*c
+template <typename SType>
+void AddColumn(const SType alpha, const SType beta, const Tensor &v,
Tensor *out);
-/// Sub column 'v' by each column of matrix M
-void SubColumn(const Tensor &v, Tensor *M);
-/// Multiply column 'v' and each column of matrix M; write results into 'out'
-void MultColumn(const Tensor &v, Tensor *M);
-/// Divide column 'v' by each column of matrix M; write results into 'out'
-void DivColumn(const Tensor &v, Tensor *M);
-
/// Add row 'v' with each row of matrix M; write results into 'out'
void AddRow(const Tensor &v, Tensor *out);
-void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M);
-/// Sub row 'v' by each row of matrix M; write results into 'out'
-void SubRow(const Tensor &v, Tensor *M);
-/// Multiply row 'v' with each row of matrix M; write results into 'out'
-void MultRow(const Tensor &v, Tensor *M);
+/// For each row 'r' of matrix out, do r=alpha*v + beta*r
+template <typename SType>
+void AddRow(const SType alpha, const SType beta, const Tensor &v, Tensor *M);
+/// Divide column 'v' by each column of matrix M; write results into 'out'
+void DivColumn(const Tensor &v, Tensor *M);
/// Divide row 'v' by each row of matrix M; write results into 'out'
void DivRow(const Tensor &v, Tensor *M);
-
-/// Sum all rows of matrix M into a single row as 'out'
-void SumRows(const Tensor &M, Tensor *out);
+/// Multiply column 'v' and each column of matrix M; write results into 'out'
+void MultColumn(const Tensor &v, Tensor *M);
+/// Multiply row 'v' with each row of matrix M; write results into 'out'
+void MultRow(const Tensor &v, Tensor *M);
+/// Sub column 'v' by each column of matrix M
+void SubColumn(const Tensor &v, Tensor *M);
+/// Sub row 'v' by each row of matrix M; write results into 'out'
+void SubRow(const Tensor &v, Tensor *M);
/// Sum all columns of matrix M into a single column as 'out'
void SumColumns(const Tensor &M, Tensor *out);
+/// Sum all rows of matrix M into a single row as 'out'
+void SumRows(const Tensor &M, Tensor *out);
-/// For each element x of Tensor 'in', compute alpha/x
+// ================Random operations==========================================
+/// For each element x set x = 1 if random() < p; otherwise x = 1.
template <typename SType>
-Tensor Div(const SType alpha, const Tensor &in);
+void Bernoulli(const SType p, Tensor *out);
+/// Fill in Tensor 't' following Gaussian distribution.
+template <typename SType>
+void Gaussian(const SType mean, const SType std, Tensor *out);
+/// Fill in Tensor 't' following uniform distribution.
+template <typename SType>
+void Uniform(const SType low, const SType high, Tensor *out);
-/// For each element x of Tensor 'in', compute alpha/x into 'out'
+// ================Blas operations============================================
+// TODO(wangwei) make amax/amin/asum a member function of tensor
+
+/// out = alpha*in + out
template <typename SType>
-void Div(const SType alpha, const Tensor &in, Tensor *out);
-
-/*
-/// Multiply each column of the lhs matrix with the rhs column
-Tensor MultColumn(const Tensor &lhs, const Tensor &rhs);
-void MultColumn(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
-/// Multiply each row of the lhs matrix with the rhs row
-Tensor MultRow(const Tensor &lhs, const Tensor &rhs);
-void MultRow(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
-/// Div each row of the lhs matrix with the rhs column
-Tensor DivColumn(const Tensor &lhs, const Tensor &rhs);
-void DivColumn(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
-/// Divide each row of the lhs matrix by the rhs row
-Tensor DivRow(const Tensor &lhs, const Tensor &rhs);
-void DivRow(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
-*/
+void Axpy(SType alpha, const Tensor &in, Tensor *out);
+
+/// Do matrix vector multipication or matrix matrix multiplication depdending
+/// on the Tensor shape. result = A * B
+Tensor Mult(const Tensor &A, const Tensor &B);
+/// Do matrix vector multipication or matrix matrix multiplication depdending
+/// on the Tensor shape. C = A * B
+void Mult(const Tensor &A, const Tensor &B, Tensor *C);
+/// Do matrix vector multipication or matrix matrix multiplication depdending
+/// on the Tensor shape. out = alpha lhs * rhs + beta * out
+template <typename SType>
+void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
+ Tensor *C);
} // namespace singa
#endif // SINGA_CORE_TENSOR_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/564c88ad/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index 5ae375c..f4e9da2 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -26,61 +26,61 @@ namespace singa {
Tensor::~Tensor() {
// LOG(ERROR) << "~";
- if (blob_ != nullptr && blob_->DecRefCount() == 0)
- device_->FreeBlob(blob_);
+ if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_);
blob_ = nullptr;
}
Tensor::Tensor() { device_ = &defaultDevice; }
-Tensor::Tensor(const Shape &shape, DataType dtype)
+Tensor::Tensor(const Shape &shape, const DataType dtype)
: data_type_(dtype), device_(&defaultDevice), shape_(shape) {
device_ = &defaultDevice;
blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_));
}
-Tensor::Tensor(Shape &&shape, DataType dtype)
+Tensor::Tensor(Shape &&shape, const DataType dtype)
: data_type_(dtype), device_(&defaultDevice), shape_(shape) {
device_ = &defaultDevice;
blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_));
}
-Tensor::Tensor(const Shape &shape, Device *device, DataType dtype)
+Tensor::Tensor(const Shape &shape, Device *device, const DataType dtype)
: data_type_(dtype), device_(device), shape_(shape) {
blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_));
}
-Tensor::Tensor(Shape &&shape, Device *device, DataType dtype)
+Tensor::Tensor(Shape &&shape, Device *device, const DataType dtype)
: data_type_(dtype), device_(device), shape_(shape) {
blob_ = device_->NewBlob(Product(shape_) * SizeOf(data_type_));
}
-Tensor::Tensor(const Tensor &t)
- : transpose_(t.transpose_), data_type_(t.data_type_), device_(t.device_),
- blob_(t.blob()), shape_(t.shape_) {
+Tensor::Tensor(const Tensor &in)
+ : transpose_(in.transpose_),
+ data_type_(in.data_type_),
+ device_(in.device_),
+ blob_(in.blob()),
+ shape_(in.shape_) {
blob_->IncRefCount();
- // LOG(ERROR) << "const&";
}
-Tensor::Tensor(Tensor &&t)
- : transpose_(t.transpose_), data_type_(t.data_type_), device_(t.device_),
- shape_(std::move(t.shape_)) {
- blob_ = t.blob_;
- t.blob_ = nullptr;
- // LOG(ERROR) << "&&";
+Tensor::Tensor(Tensor &&in)
+ : transpose_(in.transpose_),
+ data_type_(in.data_type_),
+ device_(in.device_),
+ shape_(std::move(in.shape_)) {
+ blob_ = in.blob_;
+ in.blob_ = nullptr;
}
-void Tensor::ResetLike(const Tensor &t) {
- if (blob_ == nullptr || device_ != t.device_ || MemSize() != t.MemSize()) {
- if (blob_ != nullptr && blob_->DecRefCount() == 0)
- device_->FreeBlob(blob_);
- shape_ = t.shape_;
- device_ = t.device_;
- data_type_ = t.data_type_;
- blob_ = device_->NewBlob(t.MemSize());
+void Tensor::ResetLike(const Tensor &in) {
+ if (blob_ == nullptr || device_ != in.device_ || MemSize() != in.MemSize()) {
+ if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_);
+ shape_ = in.shape_;
+ device_ = in.device_;
+ data_type_ = in.data_type_;
+ blob_ = device_->NewBlob(in.MemSize());
}
}
void Tensor::Reshape(const Shape &shape) {
if (Product(shape_) != Product(shape)) {
- if (blob_ != nullptr && blob_->DecRefCount() == 0)
- device_->FreeBlob(blob_);
+ if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_);
blob_ = device_->NewBlob(Product(shape) * SizeOf(data_type_));
}
shape_ = shape;
@@ -88,17 +88,15 @@ void Tensor::Reshape(const Shape &shape) {
void Tensor::Reshape(Shape &&shape) {
if (Product(shape_) != Product(shape)) {
- if (blob_ != nullptr && blob_->DecRefCount() == 0)
- device_->FreeBlob(blob_);
+ if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_);
blob_ = device_->NewBlob(Product(shape) * SizeOf(data_type_));
}
shape_ = std::move(shape);
}
-void Tensor::AsType(DataType type) {
+void Tensor::AsType(const DataType type) {
if (data_type_ != type) {
- if (blob_ != nullptr && blob_->DecRefCount() == 0)
- device_->FreeBlob(blob_);
+ if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_);
blob_ = device_->NewBlob(Product(shape_) * SizeOf(type));
data_type_ = type;
}
@@ -109,8 +107,7 @@ void Tensor::ToDevice(Device *dst) {
if (device_ != dst) {
Tensor tmp(shape_, dst, data_type_);
tmp.CopyData(*this);
- if (blob_ != nullptr && blob_->DecRefCount() == 0)
- device_->FreeBlob(blob_);
+ if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_);
blob_ = tmp.blob_;
tmp.blob_ = nullptr;
device_ = dst;
@@ -120,7 +117,7 @@ void Tensor::ToDevice(Device *dst) {
void Tensor::ToHost() { ToDevice(device_->host()); }
template <typename DType>
-void Tensor::CopyDataFromHostPtr(const DType *src, size_t num) {
+void Tensor::CopyDataFromHostPtr(const DType *src, const size_t num) {
CHECK_EQ(sizeof(DType), SizeOf(data_type_))
<< "data_type is " << DataType_Name(data_type_)
<< " user given type is of size " << sizeof(DType);
@@ -130,8 +127,8 @@ void Tensor::CopyDataFromHostPtr(const DType *src, size_t num) {
LOG(WARNING) << "Copy data from null host ptr";
}
}
-template void Tensor::CopyDataFromHostPtr(const float *src, size_t num);
-template void Tensor::CopyDataFromHostPtr(const int *src, size_t num);
+template void Tensor::CopyDataFromHostPtr(const float *src, const size_t num);
+template void Tensor::CopyDataFromHostPtr(const int *src, const size_t num);
void Tensor::CopyData(const Tensor &src) {
CHECK_EQ(Size(), src.Size());
@@ -162,29 +159,27 @@ Tensor Tensor::T() const {
return t;
}
-Tensor &Tensor::operator=(const Tensor &t) {
+Tensor &Tensor::operator=(const Tensor &in) {
// LOG(ERROR) << "= const &";
- if (blob_ != nullptr && blob_->DecRefCount() == 0)
- device_->FreeBlob(blob_);
- transpose_ = t.transpose_;
- data_type_ = t.data_type_;
- shape_ = t.shape_;
- device_ = t.device_;
- blob_ = t.blob();
+ if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_);
+ transpose_ = in.transpose_;
+ data_type_ = in.data_type_;
+ shape_ = in.shape_;
+ device_ = in.device_;
+ blob_ = in.blob();
blob_->IncRefCount();
return *this;
}
-Tensor &Tensor::operator=(Tensor &&t) {
+Tensor &Tensor::operator=(Tensor &&in) {
// LOG(ERROR) << "= &&";
- if (blob_ != nullptr && blob_->DecRefCount() == 0)
- device_->FreeBlob(blob_);
- transpose_ = t.transpose_;
- data_type_ = t.data_type_;
- shape_ = std::move(t.shape_);
- device_ = t.device_;
- blob_ = t.blob_;
- t.blob_ = nullptr;
+ if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_);
+ transpose_ = in.transpose_;
+ data_type_ = in.data_type_;
+ shape_ = std::move(in.shape_);
+ device_ = in.device_;
+ blob_ = in.blob_;
+ in.blob_ = nullptr;
return *this;
}
@@ -200,10 +195,10 @@ Tensor Reshape(const Tensor &in, Shape &&s) {
return out;
}
-#define GenUnaryTensorArgMemberFn(op, fn) \
- Tensor &Tensor::op(const Tensor &t) { \
- fn(*this, t, this); \
- return *this; \
+#define GenUnaryTensorArgMemberFn(op, fn) \
+ Tensor &Tensor::op(const Tensor &in) { \
+ fn(*this, in, this); \
+ return *this; \
}
GenUnaryTensorArgMemberFn(operator+=, Add);
@@ -211,12 +206,13 @@ GenUnaryTensorArgMemberFn(operator-=, Sub);
GenUnaryTensorArgMemberFn(operator*=, EltwiseMult);
GenUnaryTensorArgMemberFn(operator/=, Div);
-#define GenUnaryScalarArgMemberFn(op, fn) \
- template <typename DType> Tensor &Tensor::op(DType x) { \
- fn(*this, x, this); \
- return *this; \
- } \
- template Tensor &Tensor::op<float>(float x)
+#define GenUnaryScalarArgMemberFn(op, fn) \
+ template <typename DType> \
+ Tensor &Tensor::op(const DType x) { \
+ fn(*this, x, this); \
+ return *this; \
+ } \
+ template Tensor &Tensor::op<float>(const float x)
GenUnaryScalarArgMemberFn(operator-=, Sub);
GenUnaryScalarArgMemberFn(operator+=, Add);
@@ -224,103 +220,105 @@ GenUnaryScalarArgMemberFn(operator*=, EltwiseMult);
GenUnaryScalarArgMemberFn(operator/=, Div);
// ====================Tensor Operations=======================================
-void CopyDataToFrom(Tensor *dst, const Tensor &src, size_t num,
- size_t dst_offset, size_t src_offset) {
+void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
+ const size_t dst_offset, const size_t src_offset) {
auto width = SizeOf(src.data_type());
CHECK_EQ(width, SizeOf(dst->data_type()));
size_t nBytes = num * width;
- dst_offset *= width;
- src_offset *= width;
- CHECK_GE(src.MemSize(), src_offset + nBytes);
- CHECK_GE(dst->MemSize(), dst_offset + nBytes);
+ auto d_offset = dst_offset * width;
+ auto s_offset = src_offset * width;
+ CHECK_GE(src.MemSize(), s_offset + nBytes);
+ CHECK_GE(dst->MemSize(), d_offset + nBytes);
Device *src_dev = src.device(), *dst_dev = dst->device();
Blob *from = src.blob(), *to = dst->blob();
if (dst_dev->lang() != src_dev->lang()) {
// let the none cpp device conduct copy op
if (dst_dev->lang() == kCpp) {
- src_dev->CopyDataToFrom(to, from, nBytes, kDeviceToHost, dst_offset,
- src_offset);
+ src_dev->CopyDataToFrom(to, from, nBytes, kDeviceToHost, d_offset,
+ s_offset);
} else if (src_dev->lang() == kCpp) {
- dst_dev->CopyDataToFrom(to, from, nBytes, kHostToDevice, dst_offset,
- src_offset);
+ dst_dev->CopyDataToFrom(to, from, nBytes, kHostToDevice, d_offset,
+ s_offset);
} else {
LOG(FATAL) << "Not support mem copy betwee Cuda and OpenCL device";
}
} else {
auto direct = src_dev->lang() == kCpp ? kHostToHost : kDeviceToDevice;
- src_dev->CopyDataToFrom(to, from, nBytes, direct, dst_offset, src_offset);
+ src_dev->CopyDataToFrom(to, from, nBytes, direct, d_offset, s_offset);
}
}
//============================================================================
/// typedef DType accroding to type value.
/// DType would be used in the code block __VA_ARGS__.
-#define TYPE_SWITCH(type, DType, ...) \
- do { \
- switch (type) { \
- case kFloat32: { \
- typedef float DType; \
- { __VA_ARGS__ } \
- break; \
- } \
- case kInt: { \
- typedef int DType; \
- { __VA_ARGS__ } \
- break; \
- } \
- case kChar: { \
- typedef char DType; \
- { __VA_ARGS__ } \
- break; \
- } \
- default: \
- LOG(FATAL) << "Unknow data type = " << DataType_Name(type); \
- } \
+#define TYPE_SWITCH(type, DType, ...) \
+ do { \
+ switch (type) { \
+ case kFloat32: { \
+ typedef float DType; \
+ { __VA_ARGS__ } \
+ break; \
+ } \
+ case kInt: { \
+ typedef int DType; \
+ { __VA_ARGS__ } \
+ break; \
+ } \
+ case kChar: { \
+ typedef char DType; \
+ { __VA_ARGS__ } \
+ break; \
+ } \
+ default: \
+ LOG(FATAL) << "Unknow data type = " << DataType_Name(type); \
+ } \
} while (0)
/// typedef DType and Lang according to data type and device programming
/// language respectively.
/// type is from DataType, and lang is from LangType.
/// DType and Lang would be used in __VA_ARGS__.
-#define TYPE_LANG_SWITCH(dtype, DType, ltype, Lang, ...) \
- do { \
- const int _SwitchShift = 3; \
- int _SwitchHash = ((dtype) << _SwitchShift) + (ltype); \
- switch (_SwitchHash) { \
- case ((kFloat32 << _SwitchShift) + kCuda): { \
- typedef float DType; \
- typedef lang::Cuda Lang; \
- { __VA_ARGS__ } \
- break; \
- } \
- case ((kFloat32 << _SwitchShift) + kCpp): { \
- typedef float DType; \
- typedef lang::Cpp Lang; \
- { __VA_ARGS__ } \
- break; \
- } \
- case ((kFloat32 << _SwitchShift) + kOpencl): { \
- typedef float DType; \
- typedef lang::Opencl Lang; \
- { __VA_ARGS__ } \
- break; \
- } \
- default: \
- LOG(FATAL) << "Unknown combination of data type " \
- << DataType_Name(dtype) << " and language " \
- << LangType_Name(ltype); \
- } \
+#define TYPE_LANG_SWITCH(dtype, DType, ltype, Lang, ...) \
+ do { \
+ const int _SwitchShift = 3; \
+ int _SwitchHash = ((dtype) << _SwitchShift) + (ltype); \
+ switch (_SwitchHash) { \
+ case ((kFloat32 << _SwitchShift) + kCuda): { \
+ typedef float DType; \
+ typedef lang::Cuda Lang; \
+ { __VA_ARGS__ } \
+ break; \
+ } \
+ case ((kFloat32 << _SwitchShift) + kCpp): { \
+ typedef float DType; \
+ typedef lang::Cpp Lang; \
+ { __VA_ARGS__ } \
+ break; \
+ } \
+ case ((kFloat32 << _SwitchShift) + kOpencl): { \
+ typedef float DType; \
+ typedef lang::Opencl Lang; \
+ { __VA_ARGS__ } \
+ break; \
+ } \
+ default: \
+ LOG(FATAL) << "Unknown combination of data type " \
+ << DataType_Name(dtype) << " and language " \
+ << LangType_Name(ltype); \
+ } \
} while (0)
-template <typename SType> void Tensor::SetValue(const SType x) {
+// =============Element-wise operations====================================
+template <typename SType>
+void Tensor::SetValue(const SType x) {
CHECK_EQ(sizeof(SType), SizeOf(data_type_));
auto size = Size();
auto ptr = blob_;
TYPE_LANG_SWITCH(data_type_, DType, device_->lang(), Lang, {
// cast x to DType
- device_->Exec(
- [size, x, ptr](Context *ctx) { Set<DType, Lang>(size, x, ptr, ctx); },
- {}, {ptr});
+ device_->Exec([size, x, ptr](Context *ctx) {
+ Set<DType, Lang>(size, x, ptr, ctx);
+ }, {}, {ptr});
});
}
template void Tensor::SetValue<float>(const float x);
@@ -328,21 +326,19 @@ template void Tensor::SetValue<float>(const float x);
#define EltwiseUnaryTensorFn(fn, t, ret) \
do { \
TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { \
- ret->device()->Exec( \
- [t, ret](Context* ctx) { \
- fn<DType, Lang>(t.Size(), t.blob(), ret->blob(), ctx); \
- }, \
- {t.blob()}, {ret->blob()}); \
+ ret->device()->Exec([t, ret](Context * ctx) { \
+ fn<DType, Lang>(t.Size(), t.blob(), ret->blob(), ctx); \
+ }, {t.blob()}, {ret->blob()}); \
}); \
} while (0)
-#define GenUnaryTensorFn(fn) \
- Tensor fn(const Tensor &t) { \
- Tensor ret(t.shape(), t.device(), t.data_type()); \
- auto *retptr = &ret; \
- EltwiseUnaryTensorFn(fn, t, retptr); \
- return ret; \
- } \
+#define GenUnaryTensorFn(fn) \
+ Tensor fn(const Tensor &in) { \
+ Tensor ret(in.shape(), in.device(), in.data_type()); \
+ auto *retptr = &ret; \
+ EltwiseUnaryTensorFn(fn, in, retptr); \
+ return ret; \
+ } \
void fn(const Tensor &in, Tensor *out) { EltwiseUnaryTensorFn(fn, in, out); }
GenUnaryTensorFn(Abs);
@@ -355,33 +351,89 @@ GenUnaryTensorFn(Sqrt);
GenUnaryTensorFn(Square);
GenUnaryTensorFn(Tanh);
-// TODO(wangwei) conside async exec
-template <> float Sum<float>(const Tensor &t) {
- float s = 0.0f;
- TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, {
- t.device()->Exec(
- [t, &s](Context *ctx) {
- Sum<DType, Lang>(t.Size(), t.blob(), &s, ctx);
- },
- {t.blob()}, {});
- });
- return s;
-}
+#define EltwiseBinaryTensorFn(fn, lhs, rhs, ret) \
+ do { \
+ TYPE_LANG_SWITCH(lhs.data_type(), DType, lhs.device()->lang(), Lang, { \
+ CHECK_EQ(sizeof(DType), SizeOf(rhs.data_type())); \
+ ret->device()->Exec([lhs, rhs, ret](Context * ctx) { \
+ fn<DType, Lang>(lhs.Size(), lhs.blob(), rhs.blob(), ret->blob(), ctx); \
+ }, {lhs.blob(), rhs.blob()}, {ret->blob()}); \
+ }); \
+ } while (0)
-Tensor Sum(const Tensor &M, int axis) {
- if (axis == 0) {
- Tensor out(Shape{M.shape(1)}, M.device(), M.data_type());
- SumRows(M, &out);
- return out;
- } else {
- CHECK_EQ(axis, 1) << "Not support Sum over axis = " << axis;
- Tensor out(Shape{M.shape(0)}, M.device(), M.data_type());
- SumColumns(M, &out);
- return out;
+#define GenBinaryTensorFn(op, fn) \
+ Tensor op(const Tensor &lhs, const Tensor &rhs) { \
+ Tensor ret(lhs.shape(), lhs.device(), lhs.data_type()); \
+ fn(lhs, rhs, &ret); \
+ return ret; \
+ } \
+ void fn(const Tensor &lhs, const Tensor &rhs, Tensor *ret) { \
+ EltwiseBinaryTensorFn(fn, lhs, rhs, ret); \
}
+
+GenBinaryTensorFn(operator+, Add);
+GenBinaryTensorFn(operator-, Sub);
+GenBinaryTensorFn(operator*, EltwiseMult);
+GenBinaryTensorFn(operator/, Div);
+GenBinaryTensorFn(Pow, Pow);
+
+#define EltwiseTensorScalarFn(fn, t, x, ret) \
+ do { \
+ TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { \
+ static_assert(std::is_same<SType, DType>::value, \
+ "The Scalar type must match the Tensor data type"); \
+ ret->device()->Exec([t, x, ret](Context * ctx) { \
+ fn<DType, Lang>(t.Size(), t.blob(), x, ret->blob(), ctx); \
+ }, {t.blob()}, {ret->blob()}); \
+ }); \
+ } while (0)
+
+#define GenTensorScalarFn(op, fn) \
+ template <typename SType> \
+ Tensor op(const Tensor &in, const SType x) { \
+ Tensor ret(in.shape(), in.device(), in.data_type()); \
+ fn(in, x, &ret); \
+ return ret; \
+ } \
+ template <typename SType> \
+ void fn(const Tensor &in, const SType x, Tensor *ret) { \
+ EltwiseTensorScalarFn(fn, in, x, ret); \
+ } \
+ template Tensor op<float>(const Tensor &in, const float x); \
+ template void fn<float>(const Tensor &in, const float x, Tensor *ret)
+
+GenTensorScalarFn(operator+, Add);
+GenTensorScalarFn(operator-, Sub);
+GenTensorScalarFn(operator*, EltwiseMult);
+GenTensorScalarFn(operator/, Div);
+GenTensorScalarFn(Pow, Pow);
+GenTensorScalarFn(operator<, LT);
+GenTensorScalarFn(operator<=, LE);
+GenTensorScalarFn(operator>, GT);
+GenTensorScalarFn(operator>=, GE);
+template <typename SType>
+Tensor Div(const SType alpha, const Tensor &in) {
+ Tensor out(in.shape(), in.device(), in.data_type());
+ Div(alpha, in, &out);
+ return out;
}
+template Tensor Div<float>(const float, const Tensor &);
-Tensor Average(const Tensor &t, int axis) {
+template <typename SType>
+void Div(const SType alpha, const Tensor &in, Tensor *out) {
+ CheckDataTypeAndLang(in, *out);
+ CHECK(in.shape() == out->shape());
+ TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, {
+ // TODO(wangwei) type cast SType to DType;
+ in.device()->Exec([alpha, in, out](Context *ctx) {
+ Div<DType, Lang>(in.Size(), alpha, in.blob(), out->blob(), ctx);
+ }, {in.blob()}, {out->blob()});
+ });
+}
+template void Div<float>(const float, const Tensor &, Tensor *);
+
+// =============Matrix operations============================================
+Tensor Average(const Tensor &M, int axis) {
// operator/ only has implementation for float scalar type, hence it is
// necessary to cast the denominator to a float.
// TODO(wangwei) implement function for cast scalar type involved in Tensor
@@ -396,10 +448,34 @@ Tensor Average(const Tensor &t, int axis) {
// ....
// }
if (axis == 0) {
- return Sum(t, 0) / (1.0f * t.shape().at(0));
+ return Sum(M, 0) / (1.0f * M.shape(0));
} else {
CHECK_EQ(axis, 1);
- return Sum(t, 1) / (1.0f * t.shape().at(1));
+ return Sum(M, 1) / (1.0f * M.shape(1));
+ }
+}
+// TODO(wangwei) conside async exec
+template <>
+float Sum<float>(const Tensor &in) {
+ float s = 0.0f;
+ TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, {
+ in.device()->Exec([in, &s](Context *ctx) {
+ Sum<DType, Lang>(in.Size(), in.blob(), &s, ctx);
+ }, {in.blob()}, {});
+ });
+ return s;
+}
+
+Tensor Sum(const Tensor &M, int axis) {
+ if (axis == 0) {
+ Tensor out(Shape{M.shape(1)}, M.device(), M.data_type());
+ SumRows(M, &out);
+ return out;
+ } else {
+ CHECK_EQ(axis, 1) << "Not support Sum over axis = " << axis;
+ Tensor out(Shape{M.shape(0)}, M.device(), M.data_type());
+ SumColumns(M, &out);
+ return out;
}
}
@@ -424,141 +500,10 @@ void SoftMax(const Tensor &in, int axis, Tensor *out) {
DivColumn(sum, out);
}
-#define EltwiseBinaryTensorFn(fn, lhs, rhs, ret) \
- do { \
- TYPE_LANG_SWITCH(lhs.data_type(), DType, lhs.device()->lang(), Lang, { \
- CHECK_EQ(sizeof(DType), SizeOf(rhs.data_type())); \
- ret->device()->Exec( \
- [lhs, rhs, ret](Context *ctx) { \
- fn<DType, Lang>(lhs.Size(), lhs.blob(), rhs.blob(), ret->blob(), \
- ctx); \
- }, \
- {lhs.blob(), rhs.blob()}, {ret->blob()}); \
- }); \
- } while (0)
-
-#define GenBinaryTensorFn(op, fn) \
- Tensor op(const Tensor &lhs, const Tensor &rhs) { \
- Tensor ret(lhs.shape(), lhs.device(), lhs.data_type()); \
- fn(lhs, rhs, &ret); \
- return ret; \
- } \
- void fn(const Tensor &lhs, const Tensor &rhs, Tensor *ret) { \
- EltwiseBinaryTensorFn(fn, lhs, rhs, ret); \
- }
-
-GenBinaryTensorFn(operator+, Add);
-GenBinaryTensorFn(operator-, Sub);
-GenBinaryTensorFn(operator*, EltwiseMult);
-GenBinaryTensorFn(operator/, Div);
-GenBinaryTensorFn(Pow, Pow);
-
-#define EltwiseTensorScalarFn(fn, t, x, ret) \
- do { \
- TYPE_LANG_SWITCH(t.data_type(), DType, t.device()->lang(), Lang, { \
- static_assert(std::is_same<SType, DType>::value, \
- "The Scalar type must match the Tensor data type"); \
- ret->device()->Exec( \
- [t, x, ret](Context *ctx) { \
- fn<DType, Lang>(t.Size(), t.blob(), x, ret->blob(), ctx); \
- }, \
- {t.blob()}, {ret->blob()}); \
- }); \
- } while (0)
-
-#define GenTensorScalarFn(op, fn) \
- template <typename SType> Tensor op(const Tensor &t, SType x) { \
- Tensor ret(t.shape(), t.device(), t.data_type()); \
- fn(t, x, &ret); \
- return ret; \
- } \
- template <typename SType> void fn(const Tensor &t, SType x, Tensor *ret) { \
- EltwiseTensorScalarFn(fn, t, x, ret); \
- } \
- template Tensor op<float>(const Tensor &t, float x); \
- template void fn<float>(const Tensor &t, const float x, Tensor *ret)
-
-GenTensorScalarFn(operator+, Add);
-GenTensorScalarFn(operator-, Sub);
-GenTensorScalarFn(operator*, EltwiseMult);
-GenTensorScalarFn(operator/, Div);
-GenTensorScalarFn(Pow, Pow);
-GenTensorScalarFn(operator<, LT);
-GenTensorScalarFn(operator<=, LE);
-GenTensorScalarFn(operator>, GT);
-GenTensorScalarFn(operator>=, GE);
-
-// ================Blas operations============================================
-Tensor Mult(const Tensor &lhs, const Tensor &rhs) {
- Tensor ret(Shape{lhs.shape(0), rhs.shape(1)}, lhs.device(), lhs.data_type());
- Mult(lhs, rhs, &ret);
- return ret;
-}
-
-void Mult(const Tensor &lhs, const Tensor &rhs, Tensor *ret) {
- Mult(1.0f, lhs, rhs, 0.0f, ret);
-}
-
-void Mult(const float alpha, const Tensor &A, const Tensor &B, const float beta,
- Tensor *C) {
- CHECK_EQ(A.shape().size(), 2u);
- if (B.nDim() == 1u) {
- TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, {
- C->device()->Exec(
- [alpha, A, beta, B, C](Context *ctx) {
- GEMV<DType, Lang>(A.transpose(), A.shape(0), A.shape(1), alpha,
- A.blob(), B.blob(), beta, C->blob(), ctx);
- },
- {A.blob(), B.blob()}, {C->blob()});
- });
- } else {
- CHECK(!C->transpose());
- TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, {
- C->device()->Exec(
- [alpha, A, beta, B, C](Context *ctx) {
- GEMM<DType, Lang>(A.transpose(), B.transpose(), A.shape(0),
- B.shape(1), A.shape(1), alpha, A.blob(), B.blob(),
- beta, C->blob(), ctx);
- },
- {A.blob(), B.blob()}, {C->blob()});
- });
- }
-}
-
-void Bernoulli(float p, Tensor *t) {
- TYPE_LANG_SWITCH(t->data_type(), DType, t->device()->lang(), Lang, {
- t->device()->Exec(
- [p, t](Context *ctx) {
- Bernoulli<DType, Lang>(t->Size(), p, t->blob(), ctx);
- },
- {}, {t->blob()}, true);
- });
-}
-
-void Uniform(float low, float high, Tensor *t) {
- TYPE_LANG_SWITCH(t->data_type(), DType, t->device()->lang(), Lang, {
- t->device()->Exec(
- [low, high, t](Context *ctx) {
- Uniform<DType, Lang>(t->Size(), low, high, t->blob(), ctx);
- },
- {}, {t->blob()}, true);
- });
-}
-
-void Gaussian(float mean, float std, Tensor *t) {
- TYPE_LANG_SWITCH(t->data_type(), DType, t->device()->lang(), Lang, {
- t->device()->Exec(
- [mean, std, t](Context *ctx) {
- Gaussian<DType, Lang>(t->Size(), mean, std, t->blob(), ctx);
- },
- {}, {t->blob()}, true);
- });
-}
-
-// ======follow the consistency guide
void AddColumn(const Tensor &v, Tensor *M) { AddColumn(1, 1, v, M); }
/// Add column 'v' onto each column of matrix M;
-void AddColumn(const float alpha, const float beta, const Tensor &v,
+template <typename SType>
+void AddColumn(const SType alpha, const SType beta, const Tensor &v,
Tensor *M) {
if (M->transpose()) {
Tensor X = M->T();
@@ -570,15 +515,19 @@ void AddColumn(const float alpha, const float beta, const Tensor &v,
CHECK_EQ(nb_row, v.Size());
Tensor one(Shape{1, nb_col}, M->device(), M->data_type());
- one.SetValue(1.0f); // TODO(wangwei) cast type
+ one.SetValue(1.0f); // TODO(wangwei) cast type
Tensor vmat = Reshape(v, Shape{nb_row, 1});
Mult(alpha, vmat, one, beta, M);
}
}
+template <>
+void AddColumn(const float alpha, const float beta, const Tensor &v, Tensor *M);
+
void AddRow(const Tensor &v, Tensor *M) { AddRow(1, 1, v, M); }
/// Sub column 'v' by each column of matrix M; write results into 'out'
-void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M) {
+template <typename SType>
+void AddRow(const SType alpha, const SType beta, const Tensor &v, Tensor *M) {
if (M->transpose()) {
Tensor X = M->T();
AddColumn(v, &X);
@@ -594,29 +543,8 @@ void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M) {
Mult(alpha, one, vmat, beta, M);
}
}
-
-template <typename SType> Tensor Div(const SType alpha, const Tensor &in) {
- Tensor out(in.shape(), in.device(), in.data_type());
- Div(alpha, in, &out);
- return out;
-}
-
-template Tensor Div<float>(const float, const Tensor &);
-
-template <typename SType>
-void Div(const SType alpha, const Tensor &in, Tensor *out) {
- CheckDataTypeAndLang(in, *out);
- CHECK(in.shape() == out->shape());
- TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, {
- // TODO(wangwei) type cast SType to DType;
- in.device()->Exec(
- [alpha, in, out](Context *ctx) {
- Div<DType, Lang>(in.Size(), alpha, in.blob(), out->blob(), ctx);
- },
- {in.blob()}, {out->blob()});
- });
-}
-template void Div<float>(const float, const Tensor &, Tensor *);
+template <>
+void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M);
/// Divide column 'v' by each column of matrix M; write results into 'out'
void DivColumn(const Tensor &v, Tensor *M) {
@@ -640,12 +568,10 @@ void MultColumn(const Tensor &v, Tensor *M) {
CHECK_EQ(v.Size(), M->shape(0));
CheckDataTypeAndLang(*M, v);
TYPE_LANG_SWITCH(v.data_type(), DType, v.device()->lang(), Lang, {
- v.device()->Exec(
- [M, v](Context *ctx) {
- DGMM<DType, Lang>(false, M->shape(0), M->shape(1), M->blob(),
- v.blob(), M->blob(), ctx);
- },
- {M->blob(), v.blob()}, {M->blob()});
+ v.device()->Exec([M, v](Context *ctx) {
+ DGMM<DType, Lang>(false, M->shape(0), M->shape(1), M->blob(), v.blob(),
+ M->blob(), ctx);
+ }, {M->blob(), v.blob()}, {M->blob()});
});
}
@@ -657,12 +583,10 @@ void MultRow(const Tensor &v, Tensor *M) {
CHECK_EQ(v.Size(), M->shape(1));
CheckDataTypeAndLang(*M, v);
TYPE_LANG_SWITCH(v.data_type(), DType, v.device()->lang(), Lang, {
- v.device()->Exec(
- [M, v](Context *ctx) {
- DGMM<DType, Lang>(true, M->shape(0), M->shape(1), M->blob(), v.blob(),
- M->blob(), ctx);
- },
- {M->blob(), v.blob()}, {M->blob()});
+ v.device()->Exec([M, v](Context *ctx) {
+ DGMM<DType, Lang>(true, M->shape(0), M->shape(1), M->blob(), v.blob(),
+ M->blob(), ctx);
+ }, {M->blob(), v.blob()}, {M->blob()});
});
}
@@ -680,8 +604,8 @@ void SumColumns(const Tensor &M, Tensor *v) {
size_t nb_row = M.shape().at(0), nb_col = M.shape().at(1);
CHECK_EQ(nb_row, v->Size());
- Tensor one(Shape{nb_col, 1}, M.device(), M.data_type());
- one.SetValue(1.0f); // TODO(wangwei) cast type
+ Tensor one(Shape{nb_col}, M.device(), M.data_type());
+ one.SetValue(1.0f); // TODO(wangwei) cast type
Mult(M, one, v);
}
}
@@ -695,10 +619,98 @@ void SumRows(const Tensor &M, Tensor *v) {
size_t nb_row = M.shape(0), nb_col = M.shape(1);
CHECK_EQ(nb_col, v->Size());
- Tensor one(Shape{nb_row, 1}, M.device(), M.data_type());
- one.SetValue(1.0f); // TODO(wangwei) cast type
+ Tensor one(Shape{nb_row}, M.device(), M.data_type());
+ one.SetValue(1.0f); // TODO(wangwei) cast type
Tensor X = M.T();
Mult(X, one, v);
}
}
+// ====================Random operations=====================================
+template <typename SType>
+void Bernoulli(const SType p, Tensor *out) {
+ TYPE_LANG_SWITCH(out->data_type(), DType, out->device()->lang(), Lang, {
+ auto prob = TypeCast<SType, DType>(p);
+ out->device()->Exec([prob, out](Context *ctx) {
+ Bernoulli<DType, Lang>(out->Size(), prob, out->blob(), ctx);
+ }, {}, {out->blob()}, true);
+ });
+}
+template void Bernoulli<float>(const float p, Tensor *out);
+
+template <typename SType>
+void Uniform(const SType low, const SType high, Tensor *out) {
+ TYPE_LANG_SWITCH(out->data_type(), DType, out->device()->lang(), Lang, {
+ auto l = TypeCast<SType, DType>(low);
+ auto h = TypeCast<SType, DType>(high);
+ out->device()->Exec([l, h, out](Context *ctx) {
+ Uniform<DType, Lang>(out->Size(), l, h, out->blob(), ctx);
+ }, {}, {out->blob()}, true);
+ });
+}
+template void Uniform<float>(const float low, const float high, Tensor *out);
+
+template <typename SType>
+void Gaussian(const SType mean, const SType std, Tensor *out) {
+ TYPE_LANG_SWITCH(out->data_type(), DType, out->device()->lang(), Lang, {
+ auto m = TypeCast<SType, DType>(mean);
+ auto s = TypeCast<SType, DType>(std);
+ out->device()->Exec([m, s, out](Context *ctx) {
+ Gaussian<DType, Lang>(out->Size(), m, s, out->blob(), ctx);
+ }, {}, {out->blob()}, true);
+ });
+}
+template void Gaussian<float>(const float mean, const float std, Tensor *out);
+
+// ================Blas operations============================================
+template <typename SType>
+void Axpy(const SType alpha, const Tensor &in, Tensor *out) {
+ TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, {
+ auto a = TypeCast<SType, DType>(alpha);
+ out->device()->Exec([a, in, out](Context *ctx) {
+ Axpy<DType, Lang>(in.Size(), a, in.blob(), out->blob(), ctx);
+ }, {in.blob(), out->blob()}, {out->blob()});
+ });
+}
+template <>
+void Axpy(const float alpha, const Tensor &in, Tensor *out);
+
+Tensor Mult(const Tensor &A, const Tensor &B) {
+ Shape s;
+ s.push_back(A.shape(0));
+ if (B.nDim() == 2) s.push_back(B.shape(1));
+ Tensor out(s, A.device(), A.data_type());
+ Mult(A, B, &out);
+ return out;
+}
+
+void Mult(const Tensor &A, const Tensor &B, Tensor *out) {
+ Mult(1.0f, A, B, 0.0f, out);
+}
+
+template <typename SType>
+void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
+ Tensor *C) {
+ CHECK_EQ(A.shape().size(), 2u);
+ if (B.nDim() == 1u) {
+ TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, {
+ auto a = TypeCast<SType, DType>(alpha);
+ auto b = TypeCast<SType, DType>(beta);
+ C->device()->Exec([a, A, b, B, C](Context *ctx) {
+ GEMV<DType, Lang>(A.transpose(), A.shape(0), A.shape(1), a, A.blob(),
+ B.blob(), b, C->blob(), ctx);
+ }, {A.blob(), B.blob()}, {C->blob()});
+ });
+ } else {
+ CHECK(!C->transpose());
+ TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, {
+ auto a = TypeCast<SType, DType>(alpha);
+ auto b = TypeCast<SType, DType>(beta);
+ C->device()->Exec([a, A, b, B, C](Context *ctx) {
+ GEMM<DType, Lang>(A.transpose(), B.transpose(), A.shape(0), B.shape(1),
+ A.shape(1), a, A.blob(), B.blob(), b, C->blob(), ctx);
+ }, {A.blob(), B.blob()}, {C->blob()});
+ });
+ }
+}
+
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/564c88ad/src/core/tensor/tensor_math.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h
index 1bf6fc7..b5d0ba9 100644
--- a/src/core/tensor/tensor_math.h
+++ b/src/core/tensor/tensor_math.h
@@ -29,12 +29,14 @@ namespace singa {
/// device programming language, e.g., Langice::kCpp, Langice::kCuda
///
/// TODO(wangwei) Clean the functions to make the function APIs consistent:
-/// 1. All function names should be like XxxYyy or XY, i.e., capitablize the first
+/// 1. All function names should be like XxxYyy or XY, i.e., capitablize the
+/// first
/// letter.
/// 2. Order functions based on function name in alphabetical order.
-/// 3. Function arguments order is [const basic type] [const Blob] [mutable Blob].
+/// 3. Function arguments order is [const basic type] [const Blob] [mutable
+/// Blob].
/// 4. Function argument names, use 'num' for total number of elements in
-/// elementwise operations; use 'in1' 'in2' for input blobs; use 'out' for
+/// elementwise operations; use 'in1' 'in2' for in blobs; use 'out' for
/// output blob or value. With exceptions for some functions, e.g.,
/// Scale(const float alpha, const Blob* in, Blob* out);
/// For such cases, use x, v, alpha, etc for scalar types.
@@ -46,262 +48,283 @@ namespace singa {
/// 7. Use size_t for the number of elements, rows or columns.
/// 8. Use the same name for the Tensor and Blob level math functions.
-
-// ================Linear algebra functions====================================
-/// ret[i] = |input[i]|
+// =============Element-wise operations====================================
+/// out[i] = |in[i]|
template <typename DType, typename Lang>
void Abs(const size_t num, const Blob *in, Blob *out, Context *ctx) {
LOG(FATAL) << "Abs Not Implemented";
}
+/// out = in + x
template <typename DType, typename Lang>
-void Set(const size_t num, const DType x, Blob *out, Context *ctx) {
- LOG(FATAL) << "Set Not Implemented";
+void Add(const size_t num, const Blob *in, const DType x, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "Add Not Implemented";
}
-/// sum all elements of input into ret
+/// out = in1 + in2
template <typename DType, typename Lang>
-void Sum(const size_t num, const Blob *in, DType *out, Context *ctx) {
- LOG(FATAL) << "Sum Not Implemented";
+void Add(const size_t num, const Blob *in1, const Blob *in2, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "Add-Pair Not Implemented";
}
-
-/// ret[i] = sign(input[i])
+/// Element-wise operation, clamp every element into [low, high]
+/// if x>high, then x=high; if x<low, then x=low.
template <typename DType, typename Lang>
-void Sign(const size_t num, const Blob *in, Blob *out, Context *ctx) {
- LOG(FATAL) << "Sign Not Implemented";
+void Clamp(const size_t num, const DType low, const DType high, const Blob *in,
+ Blob *out, Context *ctx) {
+ LOG(FATAL) << "Clamp Not Implemented";
}
-/// Base is e, Neper number. ret[i]=exp(input[i])
+/// out = x / in
template <typename DType, typename Lang>
-void Exp(const size_t num, const Blob *in, Blob *out, Context *ctx) {
- LOG(FATAL) << "Exp Not Implemented";
+void Div(const size_t num, const DType x, const Blob *in, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "Div Not Implemented";
}
-/// Natual logarithm, the base is e, Neper number ret[i]=log(input[i]).
-template <typename DType, typename Lang>
-void Log(const size_t num, const Blob *in, Blob *out, Context *ctx) {
- LOG(FATAL) << "Log Not Implemented";
-}
-/// Element-wise operation, ret[i]=sqrt([input[i])
template <typename DType, typename Lang>
-void Sqrt(const size_t num, const Blob *in, Blob *out, Context *ctx) {
- LOG(FATAL) << "Sqrt Not Implemented";
+void Div(const size_t num, const Blob *in, const DType x, Blob *out,
+ Context *ctx) {
+ CHECK_NE(x, 0.f);
+ EltwiseMult<DType, Lang>(num, in, DType(1) / x, out, ctx);
}
-/// Element-wise operation, ret[i]=square([input[i])
+/// out = in1 / in2
template <typename DType, typename Lang>
-void Square(const size_t num, const Blob *in, Blob *out, Context *ctx) {
- LOG(FATAL) << "Square Not Implemented";
+void Div(const size_t num, const Blob *in1, const Blob *in2, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "Div-Pair Not Implemented";
}
-/// Element-wise operation, ret[i]=tanh([input[i])
+/// out = in * x
template <typename DType, typename Lang>
-void Tanh(const size_t num, const Blob *in, Blob *out, Context *ctx) {
- LOG(FATAL) << "Tanh Not Implemented";
+void EltwiseMult(const size_t num, const Blob *in, const DType x, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "EltwiseMult Not Implemented";
}
-/// Element-wise operation, ret[i]=max(0, input[i])
+
+/// out = in2 * in2
template <typename DType, typename Lang>
-void ReLU(const size_t num, const Blob *in, Blob *out, Context *ctx) {
- LOG(FATAL) << "ReLU Not Implemented";
+void EltwiseMult(const size_t num, const Blob *in1, const Blob *in2, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "EltwiseMult-Pair Not Implemented";
}
-/// Element-wise operation, ret[i]=sigmoid([input[i])
+
+/// Base is e, Neper number. out[i]=exp(in[i])
template <typename DType, typename Lang>
-void Sigmoid(const size_t num, const Blob *in, Blob *out, Context *ctx) {
- LOG(FATAL) << "Sigmoid Not Implemented";
+void Exp(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+ LOG(FATAL) << "Exp Not Implemented";
}
-// Do softmax for each row invidually
+/// out[i]=(in[i]<=x)?1.f:0.f
template <typename DType, typename Lang>
-void Softmax(const size_t nrow, const size_t ncol, const Blob *in,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "Softmax Not Implemented";
+void LE(const size_t num, const Blob *in, const DType x, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "LE Not Implemented";
}
-
-// TODO(wangwei) unify SumRow and SumCol.
-/// Sum the rows of the input matrix into a vector
+/// Natual logarithm, the base is e, Neper number out[i]=log(in[i]).
template <typename DType, typename Lang>
-void SumRows(const size_t nrow, const size_t ncol, const Blob *in,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "SumRows Not Implemented";
+void Log(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+ LOG(FATAL) << "Log Not Implemented";
}
-
-/// Sum the columns of the input matrix into a vector
+/// out[i]=(in[i]<x)?1.f:0.f
template <typename DType, typename Lang>
-void SumColumns(const size_t nrow, const size_t ncol, const Blob *in,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "SumColumns Not Implemented";
+void LT(const size_t num, const Blob *in, const DType x, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "LT Not Implemented";
}
-
-// TODO(wangwei) unify AddRow and AddCol.
-/// Add the vector v to every row of A as the row of out
+/// out[i]=(in[i]>=x)?1.f:0.f
template <typename DType, typename Lang>
-void AddRow(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "AddRow Not Implemented";
+void GE(const size_t num, const Blob *in, const DType x, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "GE Not Implemented";
}
-
-/// Add the vector v to every column of A as the column of out
+/// out[i]=(in[i]>x)?1.f:0.f
template <typename DType, typename Lang>
-void AddCol(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "AddCol Not Implemented";
+void GT(const size_t num, const Blob *in, const DType x, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "GT Not Implemented";
}
-
-/// Element-wise operation, do v^x for every v from the input tensor
+/// Element-wise operation, do v^x for every v from the in tensor
template <typename DType, typename Lang>
-void Pow(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) {
+void Pow(const size_t num, const Blob *in, const DType x, Blob *out,
+ Context *ctx) {
LOG(FATAL) << "Pow Not Implemented";
}
/// Element-wise operation, do v^x for every v from the lhs and every x from rhs
template <typename DType, typename Lang>
-void Pow(const size_t num, const Blob *in1, const Blob *in2,
- Blob *out, Context *ctx) {
+void Pow(const size_t num, const Blob *in1, const Blob *in2, Blob *out,
+ Context *ctx) {
LOG(FATAL) << "Pow-Pair Not Implemented";
}
-/// Element-wise operation, clamp every element into [low, high]
-/// if x>high, then x=high; if x<low, then x=low.
+/// Element-wise operation, out[i]=max(0, in[i])
template <typename DType, typename Lang>
-void Clamp(const size_t num, const DType low, const DType high, const Blob *in, Blob *out, Context *ctx) {
- LOG(FATAL) << "Clamp Not Implemented";
+void ReLU(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+ LOG(FATAL) << "ReLU Not Implemented";
}
-/// ret = input + x
template <typename DType, typename Lang>
-void Add(const size_t num, const Blob *in, const DType x,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "Add Not Implemented";
+void Set(const size_t num, const DType x, Blob *out, Context *ctx) {
+ LOG(FATAL) << "Set Not Implemented";
}
-
-/// ret = lhs + rhs
+/// Element-wise operation, out[i]=sigmoid([in[i])
template <typename DType, typename Lang>
-void Add(const size_t num, const Blob *in1, const Blob *in2,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "Add-Pair Not Implemented";
+void Sigmoid(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+ LOG(FATAL) << "Sigmoid Not Implemented";
}
-/// ret = input - x
+/// out[i] = sign(in[i])
template <typename DType, typename Lang>
-void Sub(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) {
- Add<DType, Lang>(num, in, -x, out, ctx);
+void Sign(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+ LOG(FATAL) << "Sign Not Implemented";
}
-
-/// ret = lhs - rhs
+/// Element-wise operation, out[i]=sqrt([in[i])
template <typename DType, typename Lang>
-void Sub(const size_t num, const Blob *in1, const Blob *in2,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "Sub-Pair Not Implemented";
+void Sqrt(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+ LOG(FATAL) << "Sqrt Not Implemented";
}
-/// ret = input * x
+/// Element-wise operation, out[i]=square([in[i])
template <typename DType, typename Lang>
-void EltwiseMult(const size_t num, const Blob *in, const DType x, Blob *out,
- Context *ctx) {
- LOG(FATAL) << "EltwiseMult Not Implemented";
+void Square(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+ LOG(FATAL) << "Square Not Implemented";
}
-/// ret = lhs * rhs
+/// out = in - x
template <typename DType, typename Lang>
-void EltwiseMult(const size_t num, const Blob *in1, const Blob *in2,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "EltwiseMult-Pair Not Implemented";
+void Sub(const size_t num, const Blob *in, const DType x, Blob *out,
+ Context *ctx) {
+ Add<DType, Lang>(num, in, -x, out, ctx);
}
-/// ret = input / x
+/// out = in1 - in2
template <typename DType, typename Lang>
-void Div(const size_t num, const DType x, const Blob *in,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "Div Not Implemented";
+void Sub(const size_t num, const Blob *in1, const Blob *in2, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "Sub-Pair Not Implemented";
}
-
+/// sum all elements of in into out
template <typename DType, typename Lang>
-void Div(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) {
- CHECK_NE(x,0.f);
- EltwiseMult<DType, Lang>(num, in, DType(1) / x, out, ctx);
+void Sum(const size_t num, const Blob *in, DType *out, Context *ctx) {
+ LOG(FATAL) << "Sum Not Implemented";
}
-/// ret = lhs / rhs
+/// Element-wise operation, out[i]=tanh([in[i])
template <typename DType, typename Lang>
-void Div(const size_t num, const Blob *in1, const Blob *in2,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "Div-Pair Not Implemented";
+void Tanh(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+ LOG(FATAL) << "Tanh Not Implemented";
}
+// =========== Matrix operations ===========================================
+/// Add the vector v to every column of A as the column of out
+template <typename DType, typename Lang>
+void AddCol(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v,
+ Blob *out, Context *ctx) {
+ LOG(FATAL) << "AddCol Not Implemented";
+}
+// TODO(wangwei) unify AddRow and AddCol.
+/// Add the vector v to every row of A as the row of out
+template <typename DType, typename Lang>
+void AddRow(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v,
+ Blob *out, Context *ctx) {
+ LOG(FATAL) << "AddRow Not Implemented";
+}
/// outer-product.
-/// lhs and rhs are vectors of len m and n. ret is matrix of shape m * n
+/// in1 and in2 are vectors of len m and n. out is matrix of shape m * n
template <typename DType, typename Lang>
-void Outer(const size_t m, const size_t n, const Blob *in1, const Blob *in2,
- Blob *out, Context *ctx) {
+void Outer(const size_t m, const size_t n, const Blob *in1, const Blob *in2,
+ Blob *out, Context *ctx) {
LOG(FATAL) << "Outer Not Implemented";
}
-
-/// ret[i]=(input[i]<x)?1.f:0.f
+// Do softmax for each row invidually
template <typename DType, typename Lang>
-void LT(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) {
- LOG(FATAL) << "LT Not Implemented";
+void Softmax(const size_t nrow, const size_t ncol, const Blob *in, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "Softmax Not Implemented";
}
-/// ret[i]=(input[i]<=x)?1.f:0.f
+/// Sum the columns of the in matrix into a vector
template <typename DType, typename Lang>
-void LE(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) {
- LOG(FATAL) << "LE Not Implemented";
+void SumColumns(const size_t nrow, const size_t ncol, const Blob *in, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "SumColumns Not Implemented";
}
-/// ret[i]=(input[i]>x)?1.f:0.f
+// TODO(wangwei) unify SumRow and SumCol.
+/// Sum the rows of the in matrix into a vector
template <typename DType, typename Lang>
-void GT(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) {
- LOG(FATAL) << "GT Not Implemented";
+void SumRows(const size_t nrow, const size_t ncol, const Blob *in, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "SumRows Not Implemented";
+}
+
+// ================Random functions===========================================
+/// Each element of out would be 1 with prob p and 0 with 1-p. 0<= p <= 1
+// Get the random generator from 'ctx'
+// If DType is not float, then convert the threshold to DType
+template <typename DType, typename Lang>
+void Bernoulli(const size_t num, const float p, Blob *out, Context *ctx) {
+ LOG(FATAL) << "Bernoulli Not Implemented";
}
-/// ret[i]=(input[i]>=x)?1.f:0.f
+// The random generator should be extracted from ctx.
+// If DType is not float, then convert the mean and std to DType
template <typename DType, typename Lang>
-void GE(const size_t num, const Blob *in, const DType x, Blob *out, Context *ctx) {
- LOG(FATAL) << "GE Not Implemented";
+void Gaussian(const size_t num, const float mean, const float std, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "Gaussian Not Implemented";
+}
+// The random generator should be extracted from ctx.
+// If DType is not float, then convert the low and high to DType
+template <typename DType, typename Lang>
+void Uniform(const size_t num, const float low, const float high, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "Uniform Not Implemented";
}
// ===== BLAS functions, ref to http://docs.nvidia.com/cuda/cublas
-// ===== Level 1
-/// return the index of the element with the max value.
+/// outurn the index of the element with the max value.
template <typename DType, typename Lang>
void Amax(const size_t num, const Blob *in, size_t *out, Context *ctx) {
LOG(FATAL) << "Amax Not Implemented";
}
-/// return the index of the element with the min value.
+/// outurn the index of the element with the min value.
template <typename DType, typename Lang>
void Amin(const size_t num, const Blob *in, size_t *out, Context *ctx) {
LOG(FATAL) << "Amin Not Implemented";
}
-/// ret = sum |x| for all x in input
+/// out = sum |x| for all x in in
template <typename DType, typename Lang>
void Asum(const size_t num, const Blob *in, DType *out, Context *ctx) {
LOG(FATAL) << "Asum Not Implemented";
}
-/// ret = alpha * input + ret
+/// out = alpha * in + out
template <typename DType, typename Lang>
-void Axpy(const size_t num, const DType alpha, const Blob *in,
- Blob *out, Context *ctx) {
+void Axpy(const size_t num, const DType alpha, const Blob *in, Blob *out,
+ Context *ctx) {
LOG(FATAL) << "Axpy Not Implemented";
}
-/// ret *= x
+/// out *= x
template <typename DType, typename Lang>
void Scale(const size_t num, const DType x, Blob *out, Context *ctx) {
LOG(FATAL) << "Scale Not Implemented";
}
template <typename DType, typename Lang>
-void Dot(const size_t num, const Blob *in1, const Blob *in2,
- DType *out, Context *ctx) {
+void Dot(const size_t num, const Blob *in1, const Blob *in2, DType *out,
+ Context *ctx) {
LOG(FATAL) << "Dot Not Implemented";
}
-// ===== Level 2
-/// ret = alpha * op(A) * v + beta * ret.
-/// op(A) = A if trans = false; A^T otherwise; rows(op(A)) = m, cols(op(A)) = n.
+/// out = alpha * A * v + beta * out.
+/// transA indicates if the internal data layout is transposed of A
template <typename DType, typename Lang>
-void GEMV(bool trans, const size_t m, const size_t n, const DType alpha,
- const Blob *A, const Blob *v,
- const DType beta, Blob *out, Context *ctx) {
+void GEMV(bool trans, const size_t m, const size_t n, const DType alpha,
+ const Blob *A, const Blob *v, const DType beta, Blob *out,
+ Context *ctx) {
LOG(FATAL) << "GEMV Not Implemented";
}
@@ -323,34 +346,5 @@ void GEMM(const bool transA, const bool transB, const size_t nrowA,
LOG(FATAL) << "GEMM Not Implemented";
}
-
-// ===== Level 3
-
-// ================Random functions===========================================
-/// Each element of ret would be 1 with prob p and 0 with 1-p. 0<= p <= 1
-// Get the random generator from 'ctx'
-// If DType is not float, then convert the threshold to DType
-template <typename DType, typename Lang>
-void Bernoulli(const size_t num, const float p, Blob *out, Context *ctx) {
- LOG(FATAL) << "Bernoulli Not Implemented";
-}
-// The random generator should be extracted from ctx.
-// If DType is not float, then convert the low and high to DType
-template <typename DType, typename Lang>
-void Uniform(const size_t num, const float low, const float high,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "Uniform Not Implemented";
-}
-// The random generator should be extracted from ctx.
-// If DType is not float, then convert the mean and std to DType
-template <typename DType, typename Lang>
-void Gaussian(const size_t num, const float mean, const float std,
- Blob *out, Context *ctx) {
- LOG(FATAL) << "Gaussian Not Implemented";
-}
-
-
-
-
} // namespace singa
#endif // SINGA_CORE_MATH_H_