You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2016/06/13 13:20:10 UTC
[17/50] [abbrv] incubator-singa git commit: SINGA-182 Clean math
function APIs and implementations
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/src/core/tensor/tensor_math_cpp.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h
index 5ce33ad..97da896 100644
--- a/src/core/tensor/tensor_math_cpp.h
+++ b/src/core/tensor/tensor_math_cpp.h
@@ -29,34 +29,34 @@
/// For Blob argument xxx, name its pointer as xxxPtr.
namespace singa {
template <>
-void Square<float, lang::Cpp>(int count, const Blob* input,
- Blob* ret, Context* ctx) {
- float* dptr = static_cast<float*>(ret->mutable_data());
- const float* in = static_cast<const float*>(input->data());
+void Square<float, lang::Cpp>(int count, const Blob *input, Blob *ret,
+ Context *ctx) {
+ float *dptr = static_cast<float *>(ret->mutable_data());
+ const float *in = static_cast<const float *>(input->data());
for (int i = 0; i < count; i++) {
dptr[i] = in[i] * in[i];
}
}
template <>
-void Add<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs,
- Blob* ret, Context* ctx) {
+void Add<float, lang::Cpp>(int count, const Blob *lhs, const Blob *rhs,
+ Blob *ret, Context *ctx) {
// CHECK_EQ(ctx->stream, nullptr);
- float* dptr = static_cast<float*>(ret->mutable_data());
- const float* lptr = static_cast<const float*>(lhs->data());
- const float* rptr = static_cast<const float*>(rhs->data());
+ float *dptr = static_cast<float *>(ret->mutable_data());
+ const float *lptr = static_cast<const float *>(lhs->data());
+ const float *rptr = static_cast<const float *>(rhs->data());
for (int i = 0; i < count; i++) {
dptr[i] = lptr[i] + rptr[i];
}
}
template <>
-void Sub<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs,
- Blob* ret, Context* ctx) {
+void Sub<float, lang::Cpp>(int count, const Blob *lhs, const Blob *rhs,
+ Blob *ret, Context *ctx) {
// CHECK_EQ(ctx->stream, nullptr);
- float* dptr = static_cast<float*>(ret->mutable_data());
- const float* lptr = static_cast<const float*>(lhs->data());
- const float* rptr = static_cast<const float*>(rhs->data());
+ float *dptr = static_cast<float *>(ret->mutable_data());
+ const float *lptr = static_cast<const float *>(lhs->data());
+ const float *rptr = static_cast<const float *>(rhs->data());
for (int i = 0; i < count; i++) {
dptr[i] = lptr[i] - rptr[i];
}
@@ -64,10 +64,10 @@ void Sub<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs,
// sum all elements of input into ret
// TODO(wangwei) optimize using omp
template <>
-void Sum<float, lang::Cpp>(int count, const Blob* input, float* ret,
- Context* ctx) {
+void Sum<float, lang::Cpp>(int count, const Blob *input, float *ret,
+ Context *ctx) {
float s = 0.f;
- const float* in = static_cast<const float*>(input->data());
+ const float *in = static_cast<const float *>(input->data());
for (int i = 0; i < count; i++) {
s += in[i];
}
@@ -76,10 +76,10 @@ void Sum<float, lang::Cpp>(int count, const Blob* input, float* ret,
// TODO(wangwei) optimize using omp
template <>
-void SumRows<float, lang::Cpp>(int nrow, int ncol, const Blob* input, Blob* ret,
- Context* ctx) {
- float* dptr = static_cast<float*>(ret->mutable_data());
- const float* in = static_cast<const float*>(input->data());
+void SumRows<float, lang::Cpp>(int nrow, int ncol, const Blob *input, Blob *ret,
+ Context *ctx) {
+ float *dptr = static_cast<float *>(ret->mutable_data());
+ const float *in = static_cast<const float *>(input->data());
memset(dptr, 0, ncol * sizeof(float));
for (int r = 0; r < nrow; r++) {
for (int c = 0; c < ncol; c++) {
@@ -91,10 +91,10 @@ void SumRows<float, lang::Cpp>(int nrow, int ncol, const Blob* input, Blob* ret,
// Sum the rows of the input matrix into a vector
// TODO(wangwei) optimize using omp
template <>
-void SumColumns<float, lang::Cpp>(int nrow, int ncol, const Blob* input, Blob* ret,
- Context* ctx) {
- float* dptr = static_cast<float*>(ret->mutable_data());
- const float* in = static_cast<const float*>(input->data());
+void SumColumns<float, lang::Cpp>(int nrow, int ncol, const Blob *input,
+ Blob *ret, Context *ctx) {
+ float *dptr = static_cast<float *>(ret->mutable_data());
+ const float *in = static_cast<const float *>(input->data());
memset(dptr, 0, ncol * sizeof(float));
for (int r = 0; r < nrow; r++) {
for (int c = 0; c < ncol; c++) {
@@ -104,64 +104,127 @@ void SumColumns<float, lang::Cpp>(int nrow, int ncol, const Blob* input, Blob* r
}
template <>
-void EltwiseMult<float, lang::Cpp>(int count, const Blob* input, float x,
- Blob* ret, Context* ctx) {
- float* dptr = static_cast<float*>(ret->mutable_data());
- const float* lptr = static_cast<const float*>(input->data());
+void EltwiseMult<float, lang::Cpp>(int count, const Blob *input, float x,
+ Blob *ret, Context *ctx) {
+ float *dptr = static_cast<float *>(ret->mutable_data());
+ const float *lptr = static_cast<const float *>(input->data());
for (int i = 0; i < count; i++) {
dptr[i] = lptr[i] * x;
}
}
template <>
-void EltwiseMult<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs,
- Blob* ret, Context* ctx) {
- float* dptr = static_cast<float*>(ret->mutable_data());
- const float* lptr = static_cast<const float*>(lhs->data());
- const float* rptr = static_cast<const float*>(rhs->data());
+void EltwiseMult<float, lang::Cpp>(int count, const Blob *lhs, const Blob *rhs,
+ Blob *ret, Context *ctx) {
+ float *dptr = static_cast<float *>(ret->mutable_data());
+ const float *lptr = static_cast<const float *>(lhs->data());
+ const float *rptr = static_cast<const float *>(rhs->data());
for (int i = 0; i < count; i++) {
dptr[i] = lptr[i] * rptr[i];
}
}
template <>
-void Bernoulli<float, lang::Cpp>(int count, float p, Blob* ret, Context* ctx) {
+void Bernoulli<float, lang::Cpp>(int count, float p, Blob *ret, Context *ctx) {
std::bernoulli_distribution distribution(p);
- float* ptr = static_cast<float*>(ret->mutable_data());
+ float *ptr = static_cast<float *>(ret->mutable_data());
for (int i = 0; i < count; i++) {
ptr[i] = distribution(ctx->random_generator) ? 1.0f : 0.0f;
}
}
template <>
-void Uniform<float, lang::Cpp>(int count, float low, float high, Blob* ret,
- Context* ctx) {
+void Uniform<float, lang::Cpp>(int count, float low, float high, Blob *ret,
+ Context *ctx) {
std::uniform_real_distribution<float> distribution(low, high);
- float* ptr = static_cast<float*>(ret->mutable_data());
+ float *ptr = static_cast<float *>(ret->mutable_data());
for (int i = 0; i < count; i++) {
ptr[i] = static_cast<float>(distribution(ctx->random_generator));
}
}
template <>
-void Gaussian<float, lang::Cpp>(int count, float mean, float std, Blob* ret,
- Context* ctx) {
+void Gaussian<float, lang::Cpp>(int count, float mean, float std, Blob *ret,
+ Context *ctx) {
std::normal_distribution<float> distribution(mean, std);
- float* ptr = static_cast<float*>(ret->mutable_data());
+ float *ptr = static_cast<float *>(ret->mutable_data());
for (int i = 0; i < count; i++) {
ptr[i] = static_cast<float>(distribution(ctx->random_generator));
}
}
+// follow the consistency guide of math API
+template <>
+void Div<float, lang::Cpp>(const size_t num, const float alpha, const Blob *in,
+ Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] = alpha / inPtr[i];
+ }
+}
+
+template <>
+void DGMM<float, lang::Cpp>(const bool side_right, const size_t nrow,
+ const size_t ncol, const Blob *M, const Blob *v,
+ Blob *out, Context *ctx) {
+ const float *MPtr = static_cast<const float *>(M->data());
+ const float *vPtr = static_cast<const float *>(v->data());
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ if (side_right) {
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[offset + c] = MPtr[offset + c] * vPtr[c];
+ }
+ }
+ } else {
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[offset + c] = MPtr[offset + c] * vPtr[r];
+ }
+ }
+ }
+}
+
+template <>
+void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ for (size_t i = 0; i < num; i++)
+ outPtr[i] = x;
+}
#ifdef USE_CBLAS
template <>
-void Dot<float, lang::Cpp>(int count, const Blob* lhs, const Blob* rhs,
- float* ret, Context* ctx) {
- float dptr = ret->mutable_data(), lptr = lhs->data(), rptr = rhs->data();
- *ret = cblas_sdot(count, lptr, 1, rptr, 1);
+void Dot<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
+ float *out, Context *ctx) {
+ const float *in1Ptr = static_cast<const float *>(in1->data());
+ const float *in2Ptr = static_cast<const float *>(in2->data());
+ *out = cblas_sdot(num, in1Ptr, 1, in2Ptr, 1);
}
-#endif
+template <>
+void GEMM<float, lang::Cpp>(const bool transA, const bool transB,
+ const size_t nrowA, const size_t ncolB,
+ const size_t ncolA, const float alpha,
+ const Blob *A, const Blob *B, const float beta,
+ Blob *C, Context *ctx) {
+ auto transa = transA ? CblasTrans : CblasNoTrans;
+ auto transb = transB ? CblasTrans : CblasNoTrans;
+ auto lda = transA ? nrowA : ncolA;
+ auto ldb = transB ? ncolA : ncolB;
+ auto ldc = ncolB;
+ const float *APtr = static_cast<const float *>(A->data());
+ const float *BPtr = static_cast<const float *>(B->data());
+ float *CPtr = static_cast<float *>(C->mutable_data());
+ cblas_sgemm(CblasRowMajor, transa, transb, nrowA, ncolB, ncolA, alpha, APtr,
+ lda, BPtr, ldb, beta, CPtr, ldc);
}
+#endif // USE_CBLAS
+
+
+} // namespace singa
+
#endif // SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/src/core/tensor/tensor_math_cuda.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h
index f26b5a3..26299ba 100644
--- a/src/core/tensor/tensor_math_cuda.h
+++ b/src/core/tensor/tensor_math_cuda.h
@@ -22,76 +22,129 @@
#ifdef USE_CUDA
#include "./tensor_math.h"
#include "./math_kernel.h"
+#include "singa/utils/cuda_utils.h"
#include "singa/core/common.h"
namespace singa {
// TODO(wangwei) Clean implementations following comments in tensor_math_cpp.h.
// TODO(wangwei) optimize using stream
-template<>
-void Add<float, lang::Cuda>(int count, const Blob* lhs, const Blob* rhs,
- Blob* ret, Context* ctx) {
- const float* a = static_cast<const float*> (lhs->data());
- const float* b = static_cast<const float*> (rhs->data());
- float* c = static_cast<float*> (ret->mutable_data());
+template <>
+void Add<float, lang::Cuda>(int count, const Blob *lhs, const Blob *rhs,
+ Blob *ret, Context *ctx) {
+ const float *a = static_cast<const float *>(lhs->data());
+ const float *b = static_cast<const float *>(rhs->data());
+ float *c = static_cast<float *>(ret->mutable_data());
cuda::add(count, a, b, c);
}
// TODO(wangwei) optimize using stream
-template<>
-void Sub<float, lang::Cuda>(int count, const Blob* lhs, const Blob* rhs,
- Blob* ret, Context* ctx) {
- const float* a = static_cast<const float*> (lhs->data());
- const float* b = static_cast<const float*> (rhs->data());
- float* c = static_cast<float*> (ret->mutable_data());
+template <>
+void Sub<float, lang::Cuda>(int count, const Blob *lhs, const Blob *rhs,
+ Blob *ret, Context *ctx) {
+ const float *a = static_cast<const float *>(lhs->data());
+ const float *b = static_cast<const float *>(rhs->data());
+ float *c = static_cast<float *>(ret->mutable_data());
cuda::sub(count, a, b, c);
}
template <>
-void EltwiseMult<float, lang::Cuda>(int count, const Blob* input, float x,
- Blob* ret, Context* ctx)
-{
- float* dptr = static_cast<float*>(ret->mutable_data());
- const float* lptr = static_cast<const float*>(input->data());
+void EltwiseMult<float, lang::Cuda>(int count, const Blob *input, float x,
+ Blob *ret, Context *ctx) {
+ float *dptr = static_cast<float *>(ret->mutable_data());
+ const float *lptr = static_cast<const float *>(input->data());
cuda::mult(count, lptr, x, dptr);
}
// TODO(wangwei) optimize using stream
template <>
-void Square<float, lang::Cuda>(int count, const Blob* input, Blob* ret,
- Context* ctx) {
- const float* in = static_cast<const float*>(input->data());
- float* out = static_cast<float*>(ret->mutable_data());
+void Square<float, lang::Cuda>(int count, const Blob *input, Blob *ret,
+ Context *ctx) {
+ const float *in = static_cast<const float *>(input->data());
+ float *out = static_cast<float *>(ret->mutable_data());
cuda::square(count, in, out);
}
+
// sum all elements of input into ret
// TODO(wangwei) optimize using stream
template <>
-void Sum<float, lang::Cuda>(int count, const Blob* input, float* ret,
- Context* ctx) {
- const float* in = static_cast<const float*>(input->data());
+void Sum<float, lang::Cuda>(int count, const Blob *input, float *ret,
+ Context *ctx) {
+ const float *in = static_cast<const float *>(input->data());
cuda::sum(count, in, ret);
}
// TODO(wangwei) optimize using stream
template <>
-void SumRows<float, lang::Cuda>(int nrow, int ncol, const Blob* input,
- Blob* ret, Context* ctx) {
- float* dptr = static_cast<float*>(ret->mutable_data());
- const float* in = static_cast<const float*>(input->data());
+void SumRows<float, lang::Cuda>(int nrow, int ncol, const Blob *input,
+ Blob *ret, Context *ctx) {
+ float *dptr = static_cast<float *>(ret->mutable_data());
+ const float *in = static_cast<const float *>(input->data());
cuda::sum_row(nrow, ncol, ncol, in, dptr);
}
// Sum the rows of the input matrix into a vector
// TODO(wangwei) optimize using stream
template <>
-void SumColumns<float, lang::Cuda>(int nrow, int ncol, const Blob* input,
- Blob* ret, Context* ctx) {
- float* dptr = static_cast<float*>(ret->mutable_data());
- const float* in = static_cast<const float*>(input->data());
+void SumColumns<float, lang::Cuda>(int nrow, int ncol, const Blob *input,
+ Blob *ret, Context *ctx) {
+ float *dptr = static_cast<float *>(ret->mutable_data());
+ const float *in = static_cast<const float *>(input->data());
cuda::sum_col(nrow, ncol, ncol, in, dptr);
}
+
+// follow the consistency guide of math API
+template <>
+void Div<float, lang::Cuda>(const size_t num, const float alpha, const Blob *in,
+ Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ cuda::Div(num, alpha, inPtr, outPtr, ctx->stream);
}
+template <>
+void Set<float, lang::Cuda>(const size_t num, const float x, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ cuda::Set(num, x, outPtr, ctx->stream);
+}
+// NOTE: cublas uses column major order.
+// http://peterwittek.com/cublas-matrix-c-style.html
+template <>
+void DGMM<float, lang::Cuda>(const bool side_right, const size_t nrow,
+ const size_t ncol, const Blob *M, const Blob *v,
+ Blob *out, Context *ctx) {
+ auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
+ const float *MPtr = static_cast<const float *>(M->data());
+ const float *vPtr = static_cast<const float *>(v->data());
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ if (side_right) {
+ CUBLAS_CHECK(cublasSdgmm(handle, CUBLAS_SIDE_LEFT, ncol, nrow, MPtr, ncol,
+ vPtr, 1, outPtr, ncol));
+ } else {
+ CUBLAS_CHECK(cublasSdgmm(handle, CUBLAS_SIDE_RIGHT, ncol, nrow, MPtr, ncol,
+ vPtr, 1, outPtr, ncol));
+ }
+}
+// http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm
+template <>
+void GEMM<float, lang::Cuda>(const bool transA, const bool transB,
+ const size_t nrowA, const size_t ncolB,
+ const size_t ncolA, const float alpha,
+ const Blob *A, const Blob *B, const float beta,
+ Blob *C, Context *ctx) {
+ auto transa = transA ? CUBLAS_OP_T : CUBLAS_OP_N;
+ auto transb = transB ? CUBLAS_OP_T : CUBLAS_OP_N;
+ int lda = transA ? nrowA : ncolA;
+ int ldb = transB ? ncolA : ncolB;
+ int ldc = ncolB;
+ const float *APtr = static_cast<const float *>(A->data());
+ const float *BPtr = static_cast<const float *>(B->data());
+ float *CPtr = static_cast<float *>(C->mutable_data());
+ auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
+ CUBLAS_CHECK(cublasSgemm(handle, transb, transa, ncolB, nrowA, ncolA, &alpha,
+ BPtr, ldb, APtr, lda, &beta, CPtr, ldc));
+}
+} // namespace singa
#endif // USE_CUDA
#endif // SINGA_CORE_TENSOR_TENSOR_MATH_CUDA_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/test/singa/test_cpp_math.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cpp_math.cc b/test/singa/test_cpp_math.cc
deleted file mode 100644
index 78c713f..0000000
--- a/test/singa/test_cpp_math.cc
+++ /dev/null
@@ -1,25 +0,0 @@
-/************************************************************
-*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements. See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership. The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License. You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied. See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*
-*************************************************************/
-
-#include "gtest/gtest.h"
-#include "../src/core/tensor/tensor_math_cpp.h"
-
-
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/test/singa/test_mse.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_mse.cc b/test/singa/test_mse.cc
index 3ee6bf8..67f583c 100644
--- a/test/singa/test_mse.cc
+++ b/test/singa/test_mse.cc
@@ -23,7 +23,7 @@
#include "singa/core/tensor.h"
#include "singa/core/device.h"
#include "../src/model/loss/mse.h"
-
+#include "singa_config.h"
using singa::Tensor;
class TestMSE : public ::testing::Test {
protected:
@@ -39,6 +39,7 @@ class TestMSE : public ::testing::Test {
singa::Tensor p, t;
};
+#ifdef USE_CBLAS
TEST_F(TestMSE, CppForward) {
singa::MSE mse;
const Tensor& loss = mse.Forward(p, t);
@@ -54,6 +55,17 @@ TEST_F(TestMSE, CppForward) {
}
}
+TEST_F(TestMSE, CppBackward) {
+ singa::MSE mse;
+ mse.Forward(p, t);
+ const Tensor& grad = mse.Backward();
+
+ auto gdat = grad.data<const float*>();
+
+ for (size_t i = 0; i < grad.Size(); i++)
+ EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i]));
+}
+#endif
TEST_F(TestMSE, CudaForward) {
singa::MSE mse;
singa::CudaGPU dev;
@@ -73,18 +85,6 @@ TEST_F(TestMSE, CudaForward) {
EXPECT_FLOAT_EQ(ldat[i], 0.5 * l);
}
}
-
-TEST_F(TestMSE, CppBackward) {
- singa::MSE mse;
- mse.Forward(p, t);
- const Tensor& grad = mse.Backward();
-
- auto gdat = grad.data<const float*>();
-
- for (size_t i = 0; i < grad.Size(); i++)
- EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i]));
-}
-
TEST_F(TestMSE, CudaBackward) {
singa::MSE mse;
singa::CudaGPU dev;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/test/singa/test_tensor.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor.cc b/test/singa/test_tensor.cc
index f9acdb0..bd039ad 100644
--- a/test/singa/test_tensor.cc
+++ b/test/singa/test_tensor.cc
@@ -111,5 +111,3 @@ TEST(TensorClass, T) {
EXPECT_EQ(t.shape()[1], o.shape()[0]);
}
-
-
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/870d1a97/test/singa/test_tensor_math.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor_math.cc b/test/singa/test_tensor_math.cc
index fb7e3e8..8368c55 100644
--- a/test/singa/test_tensor_math.cc
+++ b/test/singa/test_tensor_math.cc
@@ -5,10 +5,8 @@ using singa::Shape;
using singa::Device;
class TestTensorMath : public ::testing::Test {
- protected:
+protected:
virtual void SetUp() {
- const float dat1[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
- const float dat2[] = {1.1f, 2.1f, 3.1f, 4.1f, 5.1f, 6.1f};
a.Reshape(singa::Shape{6});
b.Reshape(singa::Shape{6});
c.Reshape(singa::Shape{6, 1});
@@ -18,12 +16,14 @@ class TestTensorMath : public ::testing::Test {
b.CopyDataFromHostPtr<float>(dat2, 6);
}
Tensor a, b, c, d;
+ const float dat1[6] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+ const float dat2[6] = {1.1f, 2.1f, 3.1f, 4.1f, 5.1f, 6.1f};
};
TEST_F(TestTensorMath, MemberAddTensor) {
Tensor aa = a.Clone();
aa += a;
- const float* dptr = aa.data<const float*>();
+ const float *dptr = aa.data<const float *>();
EXPECT_FLOAT_EQ(2.0f, dptr[0]);
EXPECT_FLOAT_EQ(4.0f, dptr[1]);
EXPECT_FLOAT_EQ(6.0f, dptr[2]);
@@ -31,40 +31,467 @@ TEST_F(TestTensorMath, MemberAddTensor) {
// check p is initialized to 0
Tensor p(Shape{6});
p += aa;
- const float* dptr1 = p.data<const float*>();
+ const float *dptr1 = p.data<const float *>();
EXPECT_FLOAT_EQ(2.0f, dptr1[0]);
EXPECT_FLOAT_EQ(4.0f, dptr1[1]);
EXPECT_FLOAT_EQ(6.0f, dptr1[2]);
a += b;
- const float* dptr2 = a.data<const float*>();
+ const float *dptr2 = a.data<const float *>();
EXPECT_FLOAT_EQ(2.1f, dptr2[0]);
EXPECT_FLOAT_EQ(4.1f, dptr2[1]);
EXPECT_FLOAT_EQ(6.1f, dptr2[2]);
EXPECT_FLOAT_EQ(12.1f, dptr2[5]);
}
-
TEST_F(TestTensorMath, AddTensors) {
Tensor ret(a.shape(), a.device(), a.data_type());
Add(a, b, &ret);
- const float* dptr = ret.data<const float*>();
+ const float *dptr = ret.data<const float *>();
EXPECT_FLOAT_EQ(2.1f, dptr[0]);
EXPECT_FLOAT_EQ(4.1f, dptr[1]);
EXPECT_FLOAT_EQ(6.1f, dptr[2]);
EXPECT_FLOAT_EQ(12.1f, dptr[5]);
const Tensor d = a + b;
- const float* dptr2 = d.data<const float*>();
+ const float *dptr2 = d.data<const float *>();
EXPECT_FLOAT_EQ(2.1f, dptr2[0]);
EXPECT_FLOAT_EQ(4.1f, dptr2[1]);
EXPECT_FLOAT_EQ(6.1f, dptr2[2]);
EXPECT_FLOAT_EQ(12.1f, dptr2[5]);
Add(a, b, &a);
- const float* dptr1 = a.data<const float*>();
+ const float *dptr1 = a.data<const float *>();
EXPECT_FLOAT_EQ(2.1f, dptr1[0]);
EXPECT_FLOAT_EQ(4.1f, dptr1[1]);
EXPECT_FLOAT_EQ(6.1f, dptr1[2]);
EXPECT_FLOAT_EQ(12.1f, dptr1[5]);
}
+
+TEST_F(TestTensorMath, SetValue) {
+ Tensor t(Shape{4});
+ t.SetValue(0.3f);
+ const float *ptr = t.data<const float *>();
+ for (int i = 0; i < 4; i++)
+ EXPECT_FLOAT_EQ(ptr[i], 0.3f);
+}
+
+TEST_F(TestTensorMath, Reshape) {
+ Tensor t(Shape{4});
+ t.SetValue(0.3f);
+ Tensor p = Reshape(t, Shape{4, 1});
+ const float *ptr = t.data<const float *>();
+ EXPECT_EQ(p.shape(0), 4u);
+ EXPECT_EQ(p.shape(1), 1u);
+ for (int i = 0; i < 4; i++)
+ EXPECT_FLOAT_EQ(ptr[i], 0.3f);
+}
+#ifdef USE_CBLAS
+TEST_F(TestTensorMath, MultCpp) {
+ const float x[4] = {1.0f, 2.0f, 3.0f, 4.0f};
+ Tensor t(Shape{2, 2});
+ t.CopyDataFromHostPtr(x, 4);
+ d.CopyDataFromHostPtr(dat1, 6);
+ Tensor C = Mult(d, t);
+ const float *xptr = C.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ float tmp = 0;
+ for (int k = 0; k < 2; k++) {
+ tmp += dat1[i * 2 + k] * x[k * 2 + j];
+ }
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], tmp);
+ }
+ }
+ const float y[8] = {1.0f, 2.0f, 3.0f, 4.0f, 1.1f, 2.1f, 3.1f, 4.1f};
+ Tensor s(Shape{4, 2});
+ s.CopyDataFromHostPtr(y, 8);
+ const float *sPtr = s.data<const float *>();
+ for (int i = 0; i < 8; i++)
+ EXPECT_FLOAT_EQ(sPtr[i], y[i]);
+ Tensor D = Mult(d, s.T());
+ const float *DPtr = D.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 4; j++) {
+ float tmp = 0;
+ for (int k = 0; k < 2; k++) {
+ tmp += dat1[i * 2 + k] * y[j * 2 + k];
+ }
+ EXPECT_FLOAT_EQ(DPtr[i * 4 + j], tmp);
+ }
+ }
+ Tensor p(Shape{4, 1});
+ p.CopyDataFromHostPtr(x, 4);
+ Tensor q(Shape{1, 4});
+ q.SetValue(1.0f);
+ Tensor o(Shape{4, 4});
+
+ Mult(p, q, &o);
+ const float *oPtr = o.data<const float *>();
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ EXPECT_FLOAT_EQ(oPtr[i * 4 + j], x[i]);
+ }
+ }
+}
+
+TEST_F(TestTensorMath, AddColumnCpp) {
+ const float x[3] = {1.0f, 2.0f, 3.0f};
+ Tensor t(Shape{3});
+ t.CopyDataFromHostPtr(x, 3);
+ d.CopyDataFromHostPtr(dat1, 6);
+ AddColumn(t, &d);
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[i]);
+ }
+ }
+}
+TEST_F(TestTensorMath, SubColumnCpp) {
+ const float x[3] = {1.0f, 2.0f, 3.0f};
+ Tensor t(Shape{3});
+ t.CopyDataFromHostPtr(x, 3);
+ d.CopyDataFromHostPtr(dat1, 6);
+ SubColumn(t, &d);
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[i]);
+ }
+ }
+}
+
+
+TEST_F(TestTensorMath, DivColumnCpp) {
+ const float x[3] = {1.0f, 2.0f, 3.0f};
+ Tensor t(Shape{3});
+ t.CopyDataFromHostPtr(x, 3);
+ d.CopyDataFromHostPtr(dat1, 6);
+ DivColumn(t, &d);
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[i]);
+ }
+ }
+}
+
+
+TEST_F(TestTensorMath, AddRowCpp) {
+ const float x[2] = {1.1f, 2.1f};
+ Tensor t(Shape{2});
+ t.CopyDataFromHostPtr(x, 2);
+ d.CopyDataFromHostPtr(dat1, 6);
+ AddRow(t, &d);
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[j]);
+ }
+ }
+}
+
+
+TEST_F(TestTensorMath, SubRowCpp) {
+ const float x[2] = {1.1f, 2.1f};
+ Tensor t(Shape{2});
+ t.CopyDataFromHostPtr(x, 2);
+ d.CopyDataFromHostPtr(dat1, 6);
+ SubRow(t, &d);
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[j]);
+ }
+ }
+}
+
+
+TEST_F(TestTensorMath, MultRowCpp) {
+ const float x[2] = {1.1f, 2.1f};
+ Tensor t(Shape{2});
+ t.CopyDataFromHostPtr(x, 2);
+ d.CopyDataFromHostPtr(dat1, 6);
+ MultRow(t, &d);
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[j]);
+ }
+ }
+}
+
+
+TEST_F(TestTensorMath, SumRowsCpp) {
+ Tensor t(Shape{2});
+ d.CopyDataFromHostPtr(dat1, 6);
+ SumRows(d, &t);
+ const float *tptr = t.data<const float *>();
+ for (int i = 0; i < 2; i++) {
+ float tmp = 0;
+ for (int j = 0; j < 3; j++) {
+ tmp += dat1[j * 2 + i];
+ }
+ EXPECT_FLOAT_EQ(tptr[i], tmp);
+ }
+}
+
+
+TEST_F(TestTensorMath, SumColumnsCpp) {
+ Tensor t(Shape{3});
+ d.CopyDataFromHostPtr(dat1, 6);
+ SumColumns(d, &t);
+ const float *tptr = t.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ float tmp = 0;
+ for (int j = 0; j < 2; j++) {
+ tmp += dat1[i * 2 + j];
+ }
+ EXPECT_FLOAT_EQ(tptr[i], tmp);
+ }
+}
+#endif
+TEST_F(TestTensorMath, MultCuda) {
+ const float x[4] = {1.0f, 2.0f, 3.0f, 4.0f};
+ singa::CudaGPU dev;
+ Tensor t(Shape{2, 2}, &dev);
+ t.CopyDataFromHostPtr(x, 4);
+ d.ToDevice(&dev);
+ d.CopyDataFromHostPtr(dat1, 6);
+ Tensor C = Mult(d, t);
+ C.ToHost();
+ const float *xptr = C.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ float tmp = 0;
+ for (int k = 0; k < 2; k++) {
+ tmp += dat1[i * 2 + k] * x[k * 2 + j];
+ }
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], tmp);
+ }
+ }
+
+ const float y[8] = {1.0f, 2.0f, 3.0f, 4.0f, 1.1f, 2.1f, 3.1f, 4.1f};
+ Tensor s(Shape{4, 2}, &dev);
+ s.CopyDataFromHostPtr(y, 8);
+ Tensor D = Mult(d, s.T());
+ D.ToHost();
+ const float *DPtr = D.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 4; j++) {
+ float tmp = 0;
+ for (int k = 0; k < 2; k++) {
+ tmp += dat1[i * 2 + k] * y[j * 2 + k];
+ }
+ EXPECT_FLOAT_EQ(DPtr[i * 4 + j], tmp);
+ }
+ }
+ Tensor p(Shape{4, 1}, &dev);
+ p.CopyDataFromHostPtr(x, 4);
+ Tensor q(Shape{1, 4}, &dev);
+ q.SetValue(1.0f);
+ Tensor o(Shape{4, 4}, &dev);
+
+ Mult(p, q, &o);
+ o.ToHost();
+ const float *oPtr = o.data<const float *>();
+ for (int i = 0; i < 4; i++) {
+ for (int j = 0; j < 4; j++) {
+ EXPECT_FLOAT_EQ(oPtr[i * 4 + j], x[i]);
+ }
+ }
+}
+
+TEST_F(TestTensorMath, AddColumnCuda) {
+ const float x[3] = {1.0f, 2.0f, 3.0f};
+ singa::CudaGPU dev;
+ Tensor t(Shape{3}, &dev);
+ t.CopyDataFromHostPtr(x, 3);
+ d.CopyDataFromHostPtr(dat1, 6);
+ d.ToDevice(&dev);
+ AddColumn(t, &d);
+ d.ToHost();
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[i]);
+ }
+ }
+}
+
+
+TEST_F(TestTensorMath, SubColumnCuda) {
+ const float x[3] = {1.0f, 2.0f, 3.0f};
+ singa::CudaGPU dev;
+ Tensor t(Shape{3}, &dev);
+ t.CopyDataFromHostPtr(x, 3);
+ d.CopyDataFromHostPtr(dat1, 6);
+ d.ToDevice(&dev);
+ SubColumn(t, &d);
+ d.ToHost();
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[i]);
+ }
+ }
+}
+
+TEST_F(TestTensorMath, MultColumnCpp) {
+ const float x[3] = {1.0f, 2.0f, 3.0f};
+ Tensor t(Shape{3});
+ t.CopyDataFromHostPtr(x, 3);
+ d.CopyDataFromHostPtr(dat1, 6);
+ MultColumn(t, &d);
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[i]);
+ }
+ }
+}
+
+TEST_F(TestTensorMath, MultColumnCuda) {
+ const float x[3] = {1.0f, 2.0f, 3.0f};
+ singa::CudaGPU dev;
+ Tensor t(Shape{3}, &dev);
+ t.CopyDataFromHostPtr(x, 3);
+ d.CopyDataFromHostPtr(dat1, 6);
+ d.ToDevice(&dev);
+ MultColumn(t, &d);
+ d.ToHost();
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[i]);
+ }
+ }
+}
+TEST_F(TestTensorMath, DivColumnCuda) {
+ const float x[3] = {1.0f, 2.0f, 3.0f};
+ singa::CudaGPU dev;
+ Tensor t(Shape{3}, &dev);
+ t.CopyDataFromHostPtr(x, 3);
+ d.CopyDataFromHostPtr(dat1, 6);
+ d.ToDevice(&dev);
+ DivColumn(t, &d);
+ d.ToHost();
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[i]);
+ }
+ }
+}
+TEST_F(TestTensorMath, AddRowCuda) {
+ const float x[2] = {1.1f, 2.1f};
+ singa::CudaGPU dev;
+ Tensor t(Shape{2}, &dev);
+ t.CopyDataFromHostPtr(x, 2);
+ d.CopyDataFromHostPtr(dat1, 6);
+ d.ToDevice(&dev);
+ AddRow(t, &d);
+ d.ToHost();
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] + x[j]);
+ }
+ }
+}
+TEST_F(TestTensorMath, SubRowCuda) {
+ const float x[2] = {1.1f, 2.1f};
+ singa::CudaGPU dev;
+ Tensor t(Shape{2}, &dev);
+ t.CopyDataFromHostPtr(x, 2);
+ d.CopyDataFromHostPtr(dat1, 6);
+ d.ToDevice(&dev);
+ SubRow(t, &d);
+ d.ToHost();
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] - x[j]);
+ }
+ }
+}
+TEST_F(TestTensorMath, MultRowCuda) {
+ const float x[2] = {1.1f, 2.1f};
+ singa::CudaGPU dev;
+ Tensor t(Shape{2}, &dev);
+ t.CopyDataFromHostPtr(x, 2);
+ d.CopyDataFromHostPtr(dat1, 6);
+ d.ToDevice(&dev);
+ MultRow(t, &d);
+ d.ToHost();
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] * x[j]);
+ }
+ }
+}
+
+TEST_F(TestTensorMath, DivRowCpp) {
+ const float x[2] = {1.1f, 2.1f};
+ Tensor t(Shape{2});
+ t.CopyDataFromHostPtr(x, 2);
+ d.CopyDataFromHostPtr(dat1, 6);
+ DivRow(t, &d);
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[j]);
+ }
+ }
+}
+
+TEST_F(TestTensorMath, DivRowCuda) {
+ const float x[2] = {1.1f, 2.1f};
+ singa::CudaGPU dev;
+ Tensor t(Shape{2}, &dev);
+ t.CopyDataFromHostPtr(x, 2);
+ d.CopyDataFromHostPtr(dat1, 6);
+ d.ToDevice(&dev);
+ DivRow(t, &d);
+ d.ToHost();
+ const float *xptr = d.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ for (int j = 0; j < 2; j++) {
+ EXPECT_FLOAT_EQ(xptr[i * 2 + j], dat1[i * 2 + j] / x[j]);
+ }
+ }
+}
+TEST_F(TestTensorMath, SumRowsCuda) {
+ singa::CudaGPU dev;
+ Tensor t(Shape{2}, &dev);
+ d.CopyDataFromHostPtr(dat1, 6);
+ d.ToDevice(&dev);
+ SumRows(d, &t);
+ t.ToHost();
+ const float *tptr = t.data<const float *>();
+ for (int i = 0; i < 2; i++) {
+ float tmp = 0;
+ for (int j = 0; j < 3; j++) {
+ tmp += dat1[j * 2 + i];
+ }
+ EXPECT_FLOAT_EQ(tptr[i], tmp);
+ }
+}
+TEST_F(TestTensorMath, SumColumnCuda) {
+ singa::CudaGPU dev;
+ Tensor t(Shape{3}, &dev);
+ d.CopyDataFromHostPtr(dat1, 6);
+ d.ToDevice(&dev);
+ SumColumns(d, &t);
+ t.ToHost();
+ const float *tptr = t.data<const float *>();
+ for (int i = 0; i < 3; i++) {
+ float tmp = 0;
+ for (int j = 0; j < 2; j++) {
+ tmp += dat1[i * 2 + j];
+ }
+ EXPECT_FLOAT_EQ(tptr[i], tmp);
+ }
+}