You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2016/04/07 15:12:17 UTC
[1/4] incubator-singa git commit: SINGA-80 New Blob Level and Address
Level Math Operation Interface
Repository: incubator-singa
Updated Branches:
refs/heads/master 369d87960 -> 8329aa0c3
SINGA-80 New Blob Level and Address Level Math Operation Interface
-------
add gtest for blob level functions
and bug fix in math_blob.h
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/247002d3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/247002d3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/247002d3
Branch: refs/heads/master
Commit: 247002d3027b5ac45dda735f62cc2f8df7c2dcac
Parents: 0233049
Author: jinyangturbo <pk...@gmail.com>
Authored: Wed Mar 16 00:16:01 2016 -0700
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Sat Apr 2 21:33:27 2016 +0800
----------------------------------------------------------------------
include/singa/utils/blob.h | 15 +
include/singa/utils/math_blob.h | 70 +++--
src/test/test_math.cc | 587 +++++++++++++++++++++++++++++++++++
3 files changed, 649 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/247002d3/include/singa/utils/blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/blob.h b/include/singa/utils/blob.h
index 3351cff..9defeac 100644
--- a/include/singa/utils/blob.h
+++ b/include/singa/utils/blob.h
@@ -276,6 +276,21 @@ class Blob {
ret.transpose_ = !transpose_;
return ret;
}
+ // to check if two blob has the exact same content
+ bool check_equal(Blob* other) const {
+ if (transpose() != other->transpose()) return false;
+ if (count() != other->count()) return false;
+ if (shape().size() != other->shape().size()) return false;
+ for (int i = 0; i < shape().size(); i++) {
+ if (shape(i) != other->shape(i)) return false;
+ }
+ const Dtype * a = cpu_data();
+ const Dtype * b = other->cpu_data();
+ for (int i = 0; i < count(); i++) {
+ if (a[i] != b[i]) return false;
+ }
+ return true;
+ }
protected:
std::shared_ptr<SyncedMemory> data_ = nullptr;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/247002d3/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index 55ba44b..35985f1 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -32,14 +32,16 @@
#include "singa/utils/context.h"
namespace singa {
+
+#define NO_GPU LOG(FATAL) << "Not compiled with GPU";
/**
* \file math_blob.h is not tested thorough.
* Only GEMM() and MMDot() MVSumRow() andMVAddRow() are used now.
*/
/************* BLAS level 1 *****************/
/**
- * Scale each element of A with alpha, and put the result into B.
- * Bi = alpha*Ai
+ * Scale each element of A with alpha, and put the result into A.
+ * Ai = alpha*Ai
* Use blas scale internally.
*/
template<typename Dtype>
@@ -52,6 +54,8 @@ void Scale(Dtype alpha, Blob<Dtype> * B) {
#ifdef USE_GPU
gpu_scale(context->cublas_handle(device), B->count(), alpha,
B->mutable_gpu_data());
+#else
+ NO_GPU;
#endif
}
}
@@ -70,7 +74,9 @@ void AXPY(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype> * B) {
#ifdef USE_GPU
gpu_axpy(context->cublas_handle(device), A.count(), alpha, A.gpu_data(),
B->mutable_gpu_data());
-#endif // USE_GPU
+#else
+ NO_GPU;
+#endif
}
}
@@ -111,6 +117,8 @@ void GEMV(Dtype alpha, Dtype beta, const Blob<Dtype>& A,
#ifdef USE_GPU
gpu_gemv(context->cublas_handle(device), A.gpu_data(), B.gpu_data(), m, n,
alpha, beta, TranA, C->mutable_gpu_data());
+#else
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -176,6 +184,8 @@ void GEMM(Dtype alpha, Dtype beta, const Blob<Dtype>& A, const Blob<Dtype>& B,
#ifdef USE_GPU
gpu_gemm(context->cublas_handle(device), A.gpu_data(), B.gpu_data(),
m, n, k, alpha, beta, TranA, TranB, C->mutable_gpu_data());
+#else
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -216,9 +226,10 @@ Dtype VVDot(const Blob<Dtype> & A, const Blob<Dtype> & B) {
res = cpu_dot(A.cpu_data(), B.cpu_data(), n);
} else {
#ifdef USE_GPU
- // gpu part
res = gpu_dot(context->cublas_handle(device), A.gpu_data(), B.gpu_data(),
n);
+#else
+ NO_GPU;
#endif // USE_GPU
}
return res;
@@ -242,12 +253,14 @@ void OuterProduct(const Blob<Dtype>& A, const Blob<Dtype>& B, Blob<Dtype> * C) {
auto context = Singleton<Context>::Instance();
int device = context->device_id(std::this_thread::get_id());
if (device < 0) {
- cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, 1, 1, 0, false, false,
- C->mutable_cpu_data());
+ cpu_gemm(A.cpu_data(), B.cpu_data(), m, n, 1, Dtype(1), Dtype(0), false,
+ false, C->mutable_cpu_data());
} else {
#ifdef USE_GPU
gpu_gemm(context->cublas_handle(device), A.gpu_data(), B.gpu_data(),
m, n, 1, 1, 0, false, false, C->mutable_gpu_data());
+#else
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -268,7 +281,7 @@ void Map(const Blob<Dtype> & A, Blob<Dtype> * B) {
#ifdef USE_GPU
gpu_e_f<Op>(A.count(), A.gpu_data(), B->mutable_gpu_data());
#else
- LOG(ERROR) << "Not implemented";
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -310,7 +323,7 @@ void Map(Dtype alpha, const Blob<Dtype>& A, Blob<Dtype>* B) {
#ifdef USE_GPU
gpu_e_f<Op>(A.count(), A.gpu_data(), alpha, B->mutable_gpu_data());
#else
- LOG(FATAL) << "Not implemented";
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -328,9 +341,8 @@ void Map(Dtype alpha, const Blob<Dtype>& A, const Blob<Dtype>& B,
cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->cpu_data(),
C->mutable_cpu_data());
} else {
-#ifdef USE_GPU
- LOG(ERROR) << "Not implemented";
-#endif // USE_GPU
+ // TODO(wangwei) implement gpu version.
+ NO_GPU;
}
}
@@ -353,7 +365,7 @@ void Copy(const Blob<Dtype>& A, Blob<Dtype>* B) {
CUDA_CHECK(cudaMemcpy(static_cast<Dtype*>(B->mutable_gpu_data()),
A.gpu_data(), sizeof(Dtype) * A.count(), cudaMemcpyDefault));
#else
- LOG(FATAL) << "Not implemented";
+ NO_GPU;
#endif
}
}
@@ -365,7 +377,7 @@ void Copy(const Blob<Dtype>& A, Blob<Dtype>* B) {
*/
template<typename Dtype>
void Add(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype> * B) {
- Map<singa::op::Add<Dtype>>(alpha, A, B);
+ Map<singa::op::Add<Dtype>, Dtype>(alpha, A, B);
}
/**
@@ -385,7 +397,7 @@ void Add(const Blob<Dtype> & A, const Blob<Dtype> & B,
*/
template<typename Dtype>
void Sub(Dtype alpha, const Blob<Dtype> & A, Blob<Dtype>* B) {
- Map<singa::op::Sub<Dtype>>(alpha, A, B);
+ Map<singa::op::Sub<Dtype>, Dtype>(alpha, A, B);
}
/**
@@ -406,7 +418,7 @@ void Sub(const Blob<Dtype> & A, const Blob<Dtype> & B,
template<typename Dtype>
void Mult(const Blob<Dtype> & A, const Blob<Dtype> & B,
Blob<Dtype> * C) {
- Map<singa::op::Mult<Dtype>>(A, B, C);
+ Map<singa::op::Mult<Dtype>, Dtype>(A, B, C);
// TODO(wangwei) use MKL's vector func
}
@@ -417,7 +429,7 @@ void Mult(const Blob<Dtype> & A, const Blob<Dtype> & B,
template<typename Dtype>
void Div(const Blob<Dtype> & A, const Blob<Dtype> & B,
Blob<Dtype> * C) {
- Map<singa::op::Div<Dtype>>(A, B, C);
+ Map<singa::op::Div<Dtype>, Dtype>(A, B, C);
// TODO(wangwei) use MKL's vector func
}
/**
@@ -481,6 +493,8 @@ void MVAddCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) {
#ifdef USE_GPU
singa_gpu_add_vec_row(A.gpu_data(), B->gpu_data(), B->mutable_gpu_data(),
m, n, n);
+#else
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -520,6 +534,8 @@ void MVAddRow(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) {
#ifdef USE_GPU
singa_gpu_add_vec_row(A.gpu_data(), B->gpu_data(), B->mutable_gpu_data(),
m, n, n);
+#else
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -574,7 +590,8 @@ void MVSumCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) {
} else {
#ifdef USE_GPU
singa_gpu_sum_col(A.gpu_data(), B->mutable_gpu_data(), m, n, n);
- // gpu part (TODO check transpose case)
+#else
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -599,7 +616,8 @@ void MVSumRow(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) {
} else {
#ifdef USE_GPU
singa_gpu_sum_row(A.gpu_data(), B->mutable_gpu_data(), m, n, n);
- // gpu part (TODO check transpose case)
+#else
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -619,8 +637,9 @@ void Reduce2D(const Blob<Dtype> & A, Blob<Dtype> * B) {
cpu_reduce_f<Op>(A.cpu_data(), m, n, B->mutable_cpu_data());
} else {
#ifdef USE_GPU
- // gpu part
gpu_reduce_f<Op>(A.gpu_data(), m, n, B->mutable_gpu_data());
+#else
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -640,6 +659,8 @@ void Expand2D(const Blob<Dtype> & A, Blob<Dtype> * B) {
} else {
#ifdef USE_GPU
gpu_expand_f<Op>(A.gpu_data(), m, n, B->mutable_gpu_data());
+#else
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -659,6 +680,8 @@ Dtype Asum(const Blob<Dtype>& A) {
#ifdef USE_GPU
ret = gpu_asum(context->cublas_handle(device), A.count(), A.gpu_data(), 1)
/ A.count();
+#else
+ NO_GPU;
#endif
}
return ret;
@@ -679,7 +702,7 @@ void SampleUniform(Dtype low, Dtype high, Blob<Dtype>* A) {
gpu_sample_uniform(context->curand_generator(thread), A->count(), low, high,
A->mutable_gpu_data());
#else
- LOG(FATAL) << "Not implemented";
+ NO_GPU;
#endif
}
}
@@ -696,6 +719,8 @@ void SampleGaussian(Dtype mean, Dtype std, Blob<Dtype>* A) {
#ifdef USE_GPU
gpu_sample_gaussian(context->curand_generator(thread), A->count(),
mean, std, A->mutable_gpu_data());
+#else
+ NO_GPU;
#endif
}
}
@@ -712,8 +737,7 @@ void Softmax(int nb_rows, const Blob<Dtype>& A, Blob<Dtype>* B) {
cpu_softmax(nb_rows, A.count() / nb_rows, A.cpu_data(),
B->mutable_cpu_data());
} else {
-#ifdef USE_GPU
-#endif // USE_GPU
+ NO_GPU;
}
}
@@ -727,7 +751,7 @@ void Zero(Blob<Dtype>* B) {
#ifdef USE_GPU
cudaMemset(B->mutable_gpu_data(), 0, B->count() * sizeof(float));
#else
- LOG(FATAL) << "Not implemented";
+ NO_GPU;
#endif // USE_GPU
}
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/247002d3/src/test/test_math.cc
----------------------------------------------------------------------
diff --git a/src/test/test_math.cc b/src/test/test_math.cc
index 2627b2e..6bb6001 100644
--- a/src/test/test_math.cc
+++ b/src/test/test_math.cc
@@ -20,6 +20,8 @@
*************************************************************/
#include <thread>
#include "gtest/gtest.h"
+#include "singa/utils/blob.h"
+#include "singa/utils/math_blob.h"
#include "singa/utils/math_addr.h"
#include "singa/utils/math_kernel.h"
#include "singa/utils/singa_op.h"
@@ -34,6 +36,591 @@
using namespace singa;
using namespace std;
+TEST(MathBlobTest, TestScale) {
+ Blob<float> *A = new Blob<float>(10);
+ Blob<float> *B = new Blob<float>(10);
+ A->SetValue(2);
+ B->SetValue(6);
+ Scale<float>(3.0, A);
+ ASSERT_EQ(A->check_equal(B), true);
+}
+
+TEST(MathBlobTest, TestAXPY) {
+ Blob<float> * A = new Blob<float>(10);
+ Blob<float> * B = new Blob<float>(10);
+ Blob<float> * C = new Blob<float>(10);
+ Blob<float> * D = new Blob<float>(10);
+ A->SetValue(2);
+ B->SetValue(3);
+ C->SetValue(7);
+ D->SetValue(2);
+ AXPY<float>(2.0, *A, B);
+ ASSERT_EQ(B->check_equal(C), true);
+ ASSERT_EQ(A->check_equal(D), true);
+}
+
+TEST(MathBlobTest, TestGEMV) {
+ float A[5][5] = {};
+ float AT[5][5] = {};
+ float B[5] = {};
+ float Res[5] = {};
+ for(int i = 0; i < 5; i++) {
+ for(int j = 0; j < 5; j++) {
+ A[i][j] = i * j + i - j;
+ AT[j][i] = i * j + i - j;
+ }
+ B[i] = 5*i + 3;
+ Res[i] = i;
+ }
+
+ Blob<float> * BlobA = new Blob<float>(5, 5);
+ Blob<float> * BlobAT = new Blob<float>(5, 5);
+ Blob<float> * BlobB = new Blob<float>(5);
+ Blob<float> * BlobAB = new Blob<float>(5);
+ Blob<float> * BlobATB = new Blob<float>(5);
+ Blob<float> * BlobRes = new Blob<float>(5);
+
+ BlobA->set_cpu_data(A[0]);
+ BlobAT->set_cpu_data(AT[0]);
+ BlobAT->set_transpose(true);
+ BlobB->set_cpu_data(B);
+ BlobAB->set_cpu_data(Res);
+ BlobATB->set_cpu_data(Res);
+
+ for (int i = 0; i < 5; i++) {
+ for(int j = 0; j < 5; j++) {
+ Res[i] += 2*A[i][j] * B[j];
+ }
+ }
+
+ BlobRes->set_cpu_data(Res);
+
+ GEMV<float>(2, 1, *BlobA, *BlobB, BlobAB);
+ GEMV<float>(2, 1, *BlobAT, *BlobB, BlobATB);
+
+ ASSERT_EQ(BlobAB->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobATB->check_equal(BlobRes), true);
+}
+
+TEST(MathBlobTest, TestMVDot) {
+ float A[5][5] = {};
+ float AT[5][5] = {};
+ float B[5] = {};
+ float Res[5] = {};
+ for(int i = 0; i < 5; i++) {
+ for(int j = 0; j < 5; j++) {
+ A[i][j] = i * j + i - j;
+ AT[j][i] = i * j + i - j;
+ }
+ B[i] = 5*i -2;
+ Res[i] = 0;
+ }
+
+ Blob<float> * BlobA = new Blob<float>(5, 5);
+ Blob<float> * BlobAT = new Blob<float>(5, 5);
+ Blob<float> * BlobB = new Blob<float>(5);
+ Blob<float> * BlobAB = new Blob<float>(5);
+ Blob<float> * BlobATB = new Blob<float>(5);
+ Blob<float> * BlobRes = new Blob<float>(5);
+
+ BlobA->set_cpu_data(A[0]);
+ BlobAT->set_cpu_data(AT[0]);
+ BlobAT->set_transpose(true);
+ BlobB->set_cpu_data(B);
+ BlobAB->set_cpu_data(Res);
+ BlobATB->set_cpu_data(Res);
+
+ for (int i = 0; i < 5; i++) {
+ for(int j = 0; j < 5; j++) {
+ Res[i] += A[i][j] * B[j];
+ }
+ }
+
+ BlobRes->set_cpu_data(Res);
+
+ MVDot<float>(*BlobA, *BlobB, BlobAB);
+ MVDot<float>(*BlobAT, *BlobB, BlobATB);
+
+ const float * addrRes = BlobAB->cpu_data();
+ for (int i = 0; i < 5; i++) {
+ ASSERT_EQ(addrRes[i], Res[i]);
+ }
+ ASSERT_EQ(BlobAB->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobAB->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobATB->check_equal(BlobRes), true);
+}
+
+TEST(MathBlobTest, TestGEMM) {
+ float A[5][5] = {};
+ float AT[5][5] = {};
+ float B[5][5]= {};
+ float BT[5][5]= {};
+ float Res[5][5]= {};
+ for(int i = 0; i < 5; i++) {
+ for(int j = 0; j < 5; j++) {
+ A[i][j] = i * j + i - j;
+ AT[j][i] = i * j + i - j;
+ B[i][j] = - i * j + i * i - j * j;
+ BT[j][i] = - i * j + i * i - j * j;
+ Res[i][j] = i * j + i * i + j * j;
+ }
+ }
+
+ Blob<float> * BlobA = new Blob<float>(5, 5);
+ BlobA->set_cpu_data(A[0]);
+ Blob<float> * BlobAT = new Blob<float>(5, 5);
+ BlobAT->set_cpu_data(AT[0]);
+ BlobAT->set_transpose(true);
+ Blob<float> * BlobB = new Blob<float>(5, 5);
+ BlobB->set_cpu_data(B[0]);
+ Blob<float> * BlobBT = new Blob<float>(5, 5);
+ BlobBT->set_cpu_data(BT[0]);
+ BlobBT->set_transpose(true);
+ Blob<float> * BlobAB = new Blob<float>(5, 5);
+ BlobAB->set_cpu_data(Res[0]);
+ Blob<float> * BlobABT = new Blob<float>(5, 5);
+ BlobABT->set_cpu_data(Res[0]);
+ Blob<float> * BlobATB = new Blob<float>(5, 5);
+ BlobATB->set_cpu_data(Res[0]);
+ Blob<float> * BlobATBT = new Blob<float>(5, 5);
+ BlobATBT->set_cpu_data(Res[0]);
+
+ for (int i = 0; i < 5; i++) {
+ for (int j = 0; j < 5; j++) {
+ Res[i][j] *= 2;
+ for (int k = 0; k < 5; k++) {
+ Res[i][j] += 3 * A[i][k]*B[k][j];
+ }
+ }
+ }
+
+ Blob<float> * BlobRes = new Blob<float>(5, 5);
+ BlobRes->set_cpu_data(Res[0]);
+
+ GEMM<float>(3, 2, *BlobA, *BlobB, BlobAB);
+ GEMM<float>(3, 2, *BlobA, *BlobBT, BlobABT);
+ GEMM<float>(3, 2, *BlobAT, *BlobB, BlobATB);
+ GEMM<float>(3, 2, *BlobAT, *BlobBT, BlobATBT);
+
+ ASSERT_EQ(BlobAB->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobATB->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobABT->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobATBT->check_equal(BlobRes), true);
+}
+
+TEST(MathBlobTest, TestMMDot) {
+ float A[5][5] = {};
+ float AT[5][5] = {};
+ float B[5][5]= {};
+ float BT[5][5]= {};
+ float Res[5][5]= {};
+ for(int i = 0; i < 5; i++) {
+ for(int j = 0; j < 5; j++) {
+ A[i][j] = i * j + i - j;
+ AT[j][i] = i * j + i - j;
+ B[i][j] = - i * j + i * i - j * j;
+ BT[j][i] = - i * j + i * i - j * j;
+ Res[i][j] = i * j + i * i + j * j;
+ }
+ }
+
+ Blob<float> * BlobA = new Blob<float>(5, 5);
+ BlobA->set_cpu_data(A[0]);
+ Blob<float> * BlobAT = new Blob<float>(5, 5);
+ BlobAT->set_cpu_data(AT[0]);
+ BlobAT->set_transpose(true);
+ Blob<float> * BlobB = new Blob<float>(5, 5);
+ BlobB->set_cpu_data(B[0]);
+ Blob<float> * BlobBT = new Blob<float>(5, 5);
+ BlobBT->set_cpu_data(BT[0]);
+ BlobBT->set_transpose(true);
+ Blob<float> * BlobAB = new Blob<float>(5, 5);
+ BlobAB->set_cpu_data(Res[0]);
+ Blob<float> * BlobABT = new Blob<float>(5, 5);
+ BlobABT->set_cpu_data(Res[0]);
+ Blob<float> * BlobATB = new Blob<float>(5, 5);
+ BlobATB->set_cpu_data(Res[0]);
+ Blob<float> * BlobATBT = new Blob<float>(5, 5);
+ BlobATBT->set_cpu_data(Res[0]);
+
+ for (int i = 0; i < 5; i++) {
+ for (int j = 0; j < 5; j++) {
+ Res[i][j] = 0;
+ for (int k = 0; k < 5; k++) {
+ Res[i][j] += A[i][k]*B[k][j];
+ }
+ }
+ }
+
+ Blob<float> * BlobRes = new Blob<float>(5, 5);
+ BlobRes->set_cpu_data(Res[0]);
+
+ MMDot<float>(*BlobA, *BlobB, BlobAB);
+ MMDot<float>(*BlobA, *BlobBT, BlobABT);
+ MMDot<float>(*BlobAT, *BlobB, BlobATB);
+ MMDot<float>(*BlobAT, *BlobBT, BlobATBT);
+
+ ASSERT_EQ(BlobAB->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobATB->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobABT->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobATBT->check_equal(BlobRes), true);
+}
+
+TEST(MathBlobTest, TestVVDot) {
+ float A[10] = {};
+ float B[10] = {};
+ float prod = 0;
+ for (int i = 0; i < 10; i++) {
+ A[i] = i * i - 5* (i%2);
+ B[i] = 2* i * i - 3* (i%4);
+ prod += A[i] * B[i];
+ }
+
+ Blob<float> * BlobA = new Blob<float>(10);
+ BlobA->set_cpu_data(A);
+ Blob<float> * BlobB = new Blob<float>(10);
+ BlobB->set_cpu_data(B);
+ float blobprod = VVDot<float>(*BlobA, *BlobB);
+ ASSERT_EQ(blobprod, prod);
+}
+
+TEST(MathBlobTest, TestOuterProduct) {
+ float A[10] = {};
+ float B[10] = {};
+ float AB[10][10] = {};
+ for (int i = 0; i < 10; i++) {
+ A[i] = i * i - 5* (i%2);
+ B[i] = 2* i * i - 3* (i%4);
+ }
+ for(int i = 0; i < 10; i++) {
+ for(int j = 0; j < 10; j++) {
+ AB[i][j] = A[i]*B[j];
+ }
+ }
+ Blob<float> * BlobA = new Blob<float>(10);
+ BlobA->set_cpu_data(A);
+ Blob<float> * BlobB = new Blob<float>(10);
+ BlobB->set_cpu_data(B);
+ Blob<float> * BlobAB = new Blob<float>(10, 10);
+ // BlobAB->SetValue(3);
+ Blob<float> * BlobRes = new Blob<float>(10, 10);
+ BlobRes->set_cpu_data(AB[0]);
+ OuterProduct<float>(*BlobA, *BlobB, BlobAB);
+
+ ASSERT_EQ(BlobAB->check_equal(BlobRes), true);
+}
+
+TEST(MathBlobTest, TestMapAB) {
+ float A[10] = {};
+ float Res[10] = {};
+ for (int i = 0; i < 10; i++) {
+ A[i] = i * i - 5* (i%2);
+ Res[i] = A[i] * A[i];
+ }
+ Blob<float> * BlobA = new Blob<float>(10);
+ BlobA->set_cpu_data(A);
+ Blob<float> * BlobB = new Blob<float>(10);
+ Blob<float> * BlobRes = new Blob<float>(10);
+ BlobRes->set_cpu_data(Res);
+ Map<singa::op::Square<float>, float>(*BlobA, BlobB);
+ ASSERT_EQ(BlobB->check_equal(BlobRes), true);
+}
+
+TEST(MathBlobTest, TestMapABC) {
+ float A[10] = {};
+ float B[10] = {};
+ float Res[10] = {};
+ for (int i = 0; i < 10; i++) {
+ A[i] = i * i - 5* (i%2);
+ B[i] = 2* i * i - 3* (i%4);
+ Res[i] = A[i] * B[i];
+ }
+ Blob<float> * BlobA = new Blob<float>(10);
+ BlobA->set_cpu_data(A);
+ Blob<float> * BlobB = new Blob<float>(10);
+ BlobB->set_cpu_data(B);
+ Blob<float> * BlobC = new Blob<float>(10);
+ Blob<float> * BlobRes = new Blob<float>(10);
+ BlobRes->set_cpu_data(Res);
+ Map<singa::op::Mult<float>, float>(*BlobA, *BlobB, BlobC);
+ ASSERT_EQ(BlobC->check_equal(BlobRes), true);
+}
+
+TEST(MathBlobTest, TestCopy) {
+ Blob<float> *BlobA = new Blob<float>(10);
+ Blob<float> *BlobB = new Blob<float>(10);
+ float A[10] = {};
+ for (int i = 0; i < 10; i++) {
+ A[i] = i * i - 5* (i%2);
+ }
+ BlobA->set_cpu_data(A);
+ Copy<float>(*BlobA, BlobB);
+ ASSERT_EQ(BlobA->check_equal(BlobB), true);
+}
+
+TEST(MathBlobTest, TestAdd) {
+ Blob<float> *A = new Blob<float>(10);
+ Blob<float> *B = new Blob<float>(10);
+ Blob<float> *C = new Blob<float>(10);
+ Blob<float> *D = new Blob<float>(10);
+ A->SetValue(5);
+ B->SetValue(6);
+ D->SetValue(11);
+ Add<float>(*A, *B, C);
+ ASSERT_EQ(C->check_equal(D), true);
+}
+
+TEST(MathBlobTest, TestSub) {
+ Blob<float> *A = new Blob<float>(10);
+ Blob<float> *B = new Blob<float>(10);
+ Blob<float> *C = new Blob<float>(10);
+ Blob<float> *D = new Blob<float>(10);
+ A->SetValue(5);
+ B->SetValue(6);
+ D->SetValue(-1);
+ Sub<float>(*A, *B, C);
+ ASSERT_EQ(C->check_equal(D), true);
+}
+
+TEST(MathBlobTest, TestMVAddCol) {
+ Blob<float> *BlobA = new Blob<float>(10);
+ Blob<float> *BlobB = new Blob<float>(10, 10);
+ Blob<float> *BlobBT = new Blob<float>(10, 10);
+ Blob<float> *BlobRes = new Blob<float>(10, 10);
+ Blob<float> *BlobResT = new Blob<float>(10, 10);
+
+ float A[10] = {};
+ float B[10][10] = {};
+ float BT[10][10] = {};
+ for(int i = 0; i < 10; i++) {
+ A[i] = 5*i -2;
+ for(int j = 0; j < 10; j++) {
+ B[i][j] = i * j + i - j;
+ BT[j][i] = i * j + i - j;
+ }
+ }
+
+ BlobA->set_cpu_data(A);
+ BlobB->set_cpu_data(B[0]);
+ BlobBT->set_cpu_data(BT[0]);
+ BlobBT->set_transpose(true);
+
+ for(int i = 0; i < 10; i++) {
+ for(int j = 0; j < 10; j++) {
+ B[i][j] = 2.0 * A[i] + 3.0 * B[i][j];
+ BT[j][i] = 2.0 * A[i] + 3.0 * BT[j][i];
+ }
+ }
+
+ BlobRes->set_cpu_data(B[0]);
+ BlobResT->set_cpu_data(BT[0]);
+ BlobResT->set_transpose(true);
+
+ MVAddCol<float>(2.0, 3.0, *BlobA, BlobB);
+ MVAddCol<float>(2.0, 3.0, *BlobA, BlobBT);
+
+ ASSERT_EQ(BlobB->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobBT->check_equal(BlobResT), true);
+}
+
+TEST(MathBlobTest, TestMVAddRow) {
+ Blob<float> *BlobA = new Blob<float>(10);
+ Blob<float> *BlobB = new Blob<float>(10, 10);
+ Blob<float> *BlobBT = new Blob<float>(10, 10);
+ Blob<float> *BlobRes = new Blob<float>(10, 10);
+ Blob<float> *BlobResT = new Blob<float>(10, 10);
+
+ float A[10] = {};
+ float B[10][10] = {};
+ float BT[10][10] = {};
+ for(int i = 0; i < 10; i++) {
+ A[i] = 5*i -2;
+ for(int j = 0; j < 10; j++) {
+ B[i][j] = i * j + i - j;
+ BT[j][i] = i * j + i - j;
+ }
+ }
+
+ BlobA->set_cpu_data(A);
+ BlobB->set_cpu_data(B[0]);
+ BlobBT->set_cpu_data(BT[0]);
+ BlobBT->set_transpose(true);
+
+ for(int i = 0; i < 10; i++) {
+ for(int j = 0; j < 10; j++) {
+ B[j][i] = 2.0 * A[i] + 3.0 * B[j][i];
+ BT[i][j] = 2.0 * A[i] + 3.0 * BT[i][j];
+ }
+ }
+
+ BlobRes->set_cpu_data(B[0]);
+ BlobResT->set_cpu_data(BT[0]);
+ BlobResT->set_transpose(true);
+
+ MVAddRow<float>(2.0, 3.0, *BlobA, BlobB);
+ MVAddRow<float>(2.0, 3.0, *BlobA, BlobBT);
+
+ ASSERT_EQ(BlobB->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobBT->check_equal(BlobResT), true);
+}
+
+TEST(MathBlobTest, TestRepmatCol) {
+ Blob<float> *BlobA = new Blob<float>(10);
+ Blob<float> *BlobB = new Blob<float>(10, 10);
+ Blob<float> *BlobBT = new Blob<float>(10, 10);
+ Blob<float> *BlobRes = new Blob<float>(10, 10);
+ Blob<float> *BlobResT = new Blob<float>(10, 10);
+
+ float A[10] = {};
+ float B[10][10] = {};
+ float BT[10][10] = {};
+ for(int i = 0; i < 10; i++) {
+ A[i] = 5*i -2;
+ for(int j = 0; j < 10; j++) {
+ B[i][j] = A[i];
+ BT[j][i] = A[i];
+ }
+ }
+
+ BlobA->set_cpu_data(A);
+ BlobBT->set_transpose(true);
+
+ BlobRes->set_cpu_data(B[0]);
+ BlobResT->set_cpu_data(BT[0]);
+ BlobResT->set_transpose(true);
+
+ RepmatCol<float>(*BlobA, BlobB);
+ RepmatCol<float>(*BlobA, BlobBT);
+
+ ASSERT_EQ(BlobB->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobBT->check_equal(BlobResT), true);
+}
+
+TEST(MathBlobTest, TestRepmatRow) {
+ Blob<float> *BlobA = new Blob<float>(10);
+ Blob<float> *BlobB = new Blob<float>(10, 10);
+ Blob<float> *BlobBT = new Blob<float>(10, 10);
+ Blob<float> *BlobRes = new Blob<float>(10, 10);
+ Blob<float> *BlobResT = new Blob<float>(10, 10);
+
+ float A[10] = {};
+ float B[10][10] = {};
+ float BT[10][10] = {};
+ for(int i = 0; i < 10; i++) {
+ A[i] = 5*i -2;
+ for(int j = 0; j < 10; j++) {
+ B[j][i] = A[i];
+ BT[i][j] = A[i];
+ }
+ }
+
+ BlobA->set_cpu_data(A);
+ BlobBT->set_transpose(true);
+
+ BlobRes->set_cpu_data(B[0]);
+ BlobResT->set_cpu_data(BT[0]);
+ BlobResT->set_transpose(true);
+
+ RepmatRow<float>(*BlobA, BlobB);
+ RepmatRow<float>(*BlobA, BlobBT);
+
+ ASSERT_EQ(BlobB->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobBT->check_equal(BlobResT), true);
+}
+
+TEST(MathBlobTest, TestMVSumCol) {
+ Blob<float> *BlobA = new Blob<float>(10);
+ Blob<float> *BlobACopy = new Blob<float>(10);
+ Blob<float> *BlobB = new Blob<float>(10, 10);
+ Blob<float> *BlobBT = new Blob<float>(10, 10);
+ Blob<float> *BlobRes = new Blob<float>(10);
+
+ float A[10] = {};
+ float B[10][10] = {};
+ float BT[10][10] = {};
+ for(int i = 0; i < 10; i++) {
+ A[i] = 5*i -2;
+ for(int j = 0; j < 10; j++) {
+ B[i][j] = i * j + i - j;
+ BT[j][i] = i * j + i - j;
+ }
+ }
+
+ BlobA->set_cpu_data(A);
+ BlobACopy->set_cpu_data(A);
+ BlobB->set_cpu_data(B[0]);
+ BlobBT->set_cpu_data(BT[0]);
+ BlobBT->set_transpose(true);
+
+ for(int i = 0; i < 10; i++) {
+ A[i] *= 2.0;
+ for(int j = 0; j < 10; j++) {
+ A[i] += 3.0 * B[i][j];
+ }
+ }
+ BlobRes->set_cpu_data(A);
+
+ MVSumCol<float>(2.0, 3.0, *BlobB, BlobA);
+ MVSumCol<float>(2.0, 3.0, *BlobBT, BlobACopy);
+
+ ASSERT_EQ(BlobA->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobACopy->check_equal(BlobRes), true);
+}
+
+TEST(MathBlobTest, TestMVSumRow) {
+ Blob<float> *BlobA = new Blob<float>(10);
+ Blob<float> *BlobACopy = new Blob<float>(10);
+ Blob<float> *BlobB = new Blob<float>(10, 10);
+ Blob<float> *BlobBT = new Blob<float>(10, 10);
+ Blob<float> *BlobRes = new Blob<float>(10);
+
+ float A[10] = {};
+ float B[10][10] = {};
+ float BT[10][10] = {};
+ for(int i = 0; i < 10; i++) {
+ A[i] = 5*i -2;
+ for(int j = 0; j < 10; j++) {
+ B[j][i] = i * j + i - j;
+ BT[i][j] = i * j + i - j;
+ }
+ }
+
+ BlobA->set_cpu_data(A);
+ BlobACopy->set_cpu_data(A);
+ BlobB->set_cpu_data(B[0]);
+ BlobBT->set_cpu_data(BT[0]);
+ BlobBT->set_transpose(true);
+
+ for(int i = 0; i < 10; i++) {
+ A[i] *= 2.0;
+ for(int j = 0; j < 10; j++) {
+ A[i] += 3.0 * B[j][i];
+ }
+ }
+ BlobRes->set_cpu_data(A);
+
+ MVSumRow<float>(2.0, 3.0, *BlobB, BlobA);
+ MVSumRow<float>(2.0, 3.0, *BlobBT, BlobACopy);
+
+ ASSERT_EQ(BlobA->check_equal(BlobRes), true);
+ ASSERT_EQ(BlobACopy->check_equal(BlobRes), true);
+}
+
+TEST(MathBlobTest, TestASum) {
+ float A[10] = {};
+ for(int i = 0; i < 10; i++) {
+ A[i] = ((i % 3) -1) * i;
+ }
+
+ Blob<float> *BlobA = new Blob<float>(10);
+ BlobA->set_cpu_data(A);
+
+ float BlobRes = Asum<float>(*BlobA);
+ float res = cblas_sasum(10, A, 1) / 10;
+
+ ASSERT_EQ(BlobRes, res);
+}
+
TEST(MathTest, TestGemmCPU) {
float A[3][2] = {};
float B[3][2] = {};
[4/4] incubator-singa git commit: SINGA-80 New Blob Level and Address
Level Math Operation Interface
Posted by wa...@apache.org.
SINGA-80 New Blob Level and Address Level Math Operation Interface
Merge PR 134 for SINGA-80
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8329aa0c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8329aa0c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8329aa0c
Branch: refs/heads/master
Commit: 8329aa0c3c613046b2bd8f70c3e43a765f74f602
Parents: 369d879 d452c1f
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Thu Apr 7 21:01:45 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Thu Apr 7 21:05:41 2016 +0800
----------------------------------------------------------------------
include/singa/utils/blob.h | 15 +
include/singa/utils/math_addr.h | 12 +-
include/singa/utils/math_blob.h | 88 ++++--
src/test/test_math.cc | 591 ++++++++++++++++++++++++++++++++++-
4 files changed, 667 insertions(+), 39 deletions(-)
----------------------------------------------------------------------
[2/4] incubator-singa git commit: SINGA-80 New Blob Level and Address
Level Math Operation Interface
Posted by wa...@apache.org.
SINGA-80 New Blob Level and Address Level Math Operation Interface
Uniform the signature of CPU and GPU.
Fixed some bugs about MVAddRow() and OuterProduct().
Run All Test OK.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8ade7d76
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8ade7d76
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8ade7d76
Branch: refs/heads/master
Commit: 8ade7d76dbe64b75088693febba7019e28d39c30
Parents: 247002d
Author: seaok <se...@gmail.com>
Authored: Fri Mar 18 15:00:44 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Sat Apr 2 21:35:39 2016 +0800
----------------------------------------------------------------------
include/singa/utils/math_addr.h | 6 +++---
include/singa/utils/math_blob.h | 8 ++++----
2 files changed, 7 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8ade7d76/include/singa/utils/math_addr.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_addr.h b/include/singa/utils/math_addr.h
index 524e13e..4a05cfd 100644
--- a/include/singa/utils/math_addr.h
+++ b/include/singa/utils/math_addr.h
@@ -234,13 +234,13 @@ void gpu_e_f(const int n, const Dtype * A, const Dtype * B, Dtype * C) {
}
template<typename Op, typename Dtype>
-void gpu_e_f(const int n, const Dtype * A, const Dtype alpha, Dtype * B) {
+void gpu_e_f(const int n, const Dtype alpha, const Dtype * A, Dtype * B) {
Op::CudaMap(alpha, A, B, n);
}
template<typename Op, typename Dtype>
-void gpu_e_f(const int n, const Dtype * A, const Dtype * B,
- const Dtype alpha, const Dtype beta, Dtype * C) {
+void gpu_e_f(const int n, const Dtype alpha, const Dtype beta,
+ const Dtype * A, const Dtype * B, Dtype * C) {
Op::CudaMap(alpha, beta, A, B, C, n);
}
// element-wise generalized operation defined in Op
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8ade7d76/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index 35985f1..50da1f0 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -258,7 +258,7 @@ void OuterProduct(const Blob<Dtype>& A, const Blob<Dtype>& B, Blob<Dtype> * C) {
} else {
#ifdef USE_GPU
gpu_gemm(context->cublas_handle(device), A.gpu_data(), B.gpu_data(),
- m, n, 1, 1, 0, false, false, C->mutable_gpu_data());
+ m, n, 1, Dtype(1), Dtype(0), false, false, C->mutable_gpu_data());
#else
NO_GPU;
#endif // USE_GPU
@@ -321,7 +321,7 @@ void Map(Dtype alpha, const Blob<Dtype>& A, Blob<Dtype>* B) {
cpu_e_f<Op>(A.count(), alpha, A.cpu_data(), B->mutable_cpu_data());
} else {
#ifdef USE_GPU
- gpu_e_f<Op>(A.count(), A.gpu_data(), alpha, B->mutable_gpu_data());
+ gpu_e_f<Op>(A.count(), alpha, A.gpu_data(), B->mutable_gpu_data());
#else
NO_GPU;
#endif // USE_GPU
@@ -491,8 +491,8 @@ void MVAddCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) {
B->mutable_cpu_data());
} else {
#ifdef USE_GPU
- singa_gpu_add_vec_row(A.gpu_data(), B->gpu_data(), B->mutable_gpu_data(),
- m, n, n);
+ gpu_gemm(context->cublas_handle(device), A.gpu_data(), one.gpu_data(), m, n, 1,
+ alpha, beta, false, false, B->mutable_gpu_data());
#else
NO_GPU;
#endif // USE_GPU
[3/4] incubator-singa git commit: SINGA-80 New Blob Level and Address
Level Math Operation Interface
Posted by wa...@apache.org.
SINGA-80 New Blob Level and Address Level Math Operation Interface
Clean the files with cpplint.
Add fatal log for places where GPU is needed but the code is not compiled with GPU.
There are few TODOs in math_blob.h left.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d452c1fb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d452c1fb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d452c1fb
Branch: refs/heads/master
Commit: d452c1fb4128ef8a90198100033160826290b0c3
Parents: 8ade7d7
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Apr 4 11:12:32 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Apr 4 11:12:32 2016 +0800
----------------------------------------------------------------------
include/singa/utils/blob.h | 2 +-
include/singa/utils/math_addr.h | 8 ++---
include/singa/utils/math_blob.h | 14 ++++----
src/test/test_math.cc | 70 ++++++++++++++++++------------------
4 files changed, 48 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/include/singa/utils/blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/blob.h b/include/singa/utils/blob.h
index 9defeac..1a0a592 100644
--- a/include/singa/utils/blob.h
+++ b/include/singa/utils/blob.h
@@ -281,7 +281,7 @@ class Blob {
if (transpose() != other->transpose()) return false;
if (count() != other->count()) return false;
if (shape().size() != other->shape().size()) return false;
- for (int i = 0; i < shape().size(); i++) {
+ for (unsigned int i = 0; i < shape().size(); i++) {
if (shape(i) != other->shape(i)) return false;
}
const Dtype * a = cpu_data();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/include/singa/utils/math_addr.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_addr.h b/include/singa/utils/math_addr.h
index 4a05cfd..cf1d227 100644
--- a/include/singa/utils/math_addr.h
+++ b/include/singa/utils/math_addr.h
@@ -78,7 +78,7 @@ void cpu_copy(const int n, const Dtype* A, Dtype *B) {
}
template<typename Dtype>
-Dtype cpu_dot(const Dtype * A, const Dtype * B, const int n) {
+Dtype cpu_dot(const int n, const Dtype * A, const Dtype * B) {
Dtype sum = 0;
for (int i = 0 ; i < n ; i++)
sum += A[i] * B[i];
@@ -210,8 +210,8 @@ void gpu_scale(cublasHandle_t handle, const int n, const Dtype alpha,
}
template<typename Dtype>
-Dtype gpu_dot(cublasHandle_t handle, const Dtype * A, const Dtype * B,
- const int n) {
+Dtype gpu_dot(cublasHandle_t handle, const int n, const Dtype * A,
+ const Dtype * B) {
Dtype result = 0.0;
cublasSdot(handle, n, A, 1, B, 1, &result);
return result;
@@ -240,7 +240,7 @@ void gpu_e_f(const int n, const Dtype alpha, const Dtype * A, Dtype * B) {
template<typename Op, typename Dtype>
void gpu_e_f(const int n, const Dtype alpha, const Dtype beta,
- const Dtype * A, const Dtype * B, Dtype * C) {
+ const Dtype * A, const Dtype * B, Dtype * C) {
Op::CudaMap(alpha, beta, A, B, C, n);
}
// element-wise generalized operation defined in Op
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index 50da1f0..abe7722 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -223,11 +223,11 @@ Dtype VVDot(const Blob<Dtype> & A, const Blob<Dtype> & B) {
auto context = Singleton<Context>::Instance();
int device = context->device_id(std::this_thread::get_id());
if (device < 0) {
- res = cpu_dot(A.cpu_data(), B.cpu_data(), n);
+ res = cpu_dot(n, A.cpu_data(), B.cpu_data());
} else {
#ifdef USE_GPU
- res = gpu_dot(context->cublas_handle(device), A.gpu_data(), B.gpu_data(),
- n);
+ res = gpu_dot(context->cublas_handle(device), n, A.gpu_data(),
+ B.gpu_data());
#else
NO_GPU;
#endif // USE_GPU
@@ -302,8 +302,9 @@ void Map(const Blob<Dtype> & A, const Blob<Dtype> & B, Blob<Dtype> * C) {
cpu_e_f<Op>(A.count(), A.cpu_data(), B.cpu_data(), C->mutable_cpu_data());
} else {
#ifdef USE_GPU
- // gpu part
gpu_e_f<Op>(A.count(), A.gpu_data(), B.gpu_data(), C->mutable_gpu_data());
+#else
+ NO_GPU;
#endif // USE_GPU
}
}
@@ -491,8 +492,8 @@ void MVAddCol(Dtype alpha, Dtype beta, const Blob<Dtype> & A, Blob<Dtype> * B) {
B->mutable_cpu_data());
} else {
#ifdef USE_GPU
- gpu_gemm(context->cublas_handle(device), A.gpu_data(), one.gpu_data(), m, n, 1,
- alpha, beta, false, false, B->mutable_gpu_data());
+ gpu_gemm(context->cublas_handle(device), A.gpu_data(), one.gpu_data(), m,
+ n, 1, alpha, beta, false, false, B->mutable_gpu_data());
#else
NO_GPU;
#endif // USE_GPU
@@ -737,6 +738,7 @@ void Softmax(int nb_rows, const Blob<Dtype>& A, Blob<Dtype>* B) {
cpu_softmax(nb_rows, A.count() / nb_rows, A.cpu_data(),
B->mutable_cpu_data());
} else {
+ // TODO(wangwei) implement the GPU version.
NO_GPU;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d452c1fb/src/test/test_math.cc
----------------------------------------------------------------------
diff --git a/src/test/test_math.cc b/src/test/test_math.cc
index 6bb6001..9830703 100644
--- a/src/test/test_math.cc
+++ b/src/test/test_math.cc
@@ -30,7 +30,7 @@
#ifdef USE_GPU
#include <cuda_runtime.h>
-#include "cublas_v2.h"
+#include <cublas_v2.h>
#endif
using namespace singa;
@@ -64,8 +64,8 @@ TEST(MathBlobTest, TestGEMV) {
float AT[5][5] = {};
float B[5] = {};
float Res[5] = {};
- for(int i = 0; i < 5; i++) {
- for(int j = 0; j < 5; j++) {
+ for (int i = 0; i < 5; i++) {
+ for (int j = 0; j < 5; j++) {
A[i][j] = i * j + i - j;
AT[j][i] = i * j + i - j;
}
@@ -88,7 +88,7 @@ TEST(MathBlobTest, TestGEMV) {
BlobATB->set_cpu_data(Res);
for (int i = 0; i < 5; i++) {
- for(int j = 0; j < 5; j++) {
+ for (int j = 0; j < 5; j++) {
Res[i] += 2*A[i][j] * B[j];
}
}
@@ -107,8 +107,8 @@ TEST(MathBlobTest, TestMVDot) {
float AT[5][5] = {};
float B[5] = {};
float Res[5] = {};
- for(int i = 0; i < 5; i++) {
- for(int j = 0; j < 5; j++) {
+ for (int i = 0; i < 5; i++) {
+ for (int j = 0; j < 5; j++) {
A[i][j] = i * j + i - j;
AT[j][i] = i * j + i - j;
}
@@ -131,7 +131,7 @@ TEST(MathBlobTest, TestMVDot) {
BlobATB->set_cpu_data(Res);
for (int i = 0; i < 5; i++) {
- for(int j = 0; j < 5; j++) {
+ for (int j = 0; j < 5; j++) {
Res[i] += A[i][j] * B[j];
}
}
@@ -156,8 +156,8 @@ TEST(MathBlobTest, TestGEMM) {
float B[5][5]= {};
float BT[5][5]= {};
float Res[5][5]= {};
- for(int i = 0; i < 5; i++) {
- for(int j = 0; j < 5; j++) {
+ for (int i = 0; i < 5; i++) {
+ for (int j = 0; j < 5; j++) {
A[i][j] = i * j + i - j;
AT[j][i] = i * j + i - j;
B[i][j] = - i * j + i * i - j * j;
@@ -214,8 +214,8 @@ TEST(MathBlobTest, TestMMDot) {
float B[5][5]= {};
float BT[5][5]= {};
float Res[5][5]= {};
- for(int i = 0; i < 5; i++) {
- for(int j = 0; j < 5; j++) {
+ for (int i = 0; i < 5; i++) {
+ for (int j = 0; j < 5; j++) {
A[i][j] = i * j + i - j;
AT[j][i] = i * j + i - j;
B[i][j] = - i * j + i * i - j * j;
@@ -292,8 +292,8 @@ TEST(MathBlobTest, TestOuterProduct) {
A[i] = i * i - 5* (i%2);
B[i] = 2* i * i - 3* (i%4);
}
- for(int i = 0; i < 10; i++) {
- for(int j = 0; j < 10; j++) {
+ for (int i = 0; i < 10; i++) {
+ for (int j = 0; j < 10; j++) {
AB[i][j] = A[i]*B[j];
}
}
@@ -392,9 +392,9 @@ TEST(MathBlobTest, TestMVAddCol) {
float A[10] = {};
float B[10][10] = {};
float BT[10][10] = {};
- for(int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10; i++) {
A[i] = 5*i -2;
- for(int j = 0; j < 10; j++) {
+ for (int j = 0; j < 10; j++) {
B[i][j] = i * j + i - j;
BT[j][i] = i * j + i - j;
}
@@ -405,8 +405,8 @@ TEST(MathBlobTest, TestMVAddCol) {
BlobBT->set_cpu_data(BT[0]);
BlobBT->set_transpose(true);
- for(int i = 0; i < 10; i++) {
- for(int j = 0; j < 10; j++) {
+ for (int i = 0; i < 10; i++) {
+ for (int j = 0; j < 10; j++) {
B[i][j] = 2.0 * A[i] + 3.0 * B[i][j];
BT[j][i] = 2.0 * A[i] + 3.0 * BT[j][i];
}
@@ -433,9 +433,9 @@ TEST(MathBlobTest, TestMVAddRow) {
float A[10] = {};
float B[10][10] = {};
float BT[10][10] = {};
- for(int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10; i++) {
A[i] = 5*i -2;
- for(int j = 0; j < 10; j++) {
+ for (int j = 0; j < 10; j++) {
B[i][j] = i * j + i - j;
BT[j][i] = i * j + i - j;
}
@@ -446,8 +446,8 @@ TEST(MathBlobTest, TestMVAddRow) {
BlobBT->set_cpu_data(BT[0]);
BlobBT->set_transpose(true);
- for(int i = 0; i < 10; i++) {
- for(int j = 0; j < 10; j++) {
+ for (int i = 0; i < 10; i++) {
+ for (int j = 0; j < 10; j++) {
B[j][i] = 2.0 * A[i] + 3.0 * B[j][i];
BT[i][j] = 2.0 * A[i] + 3.0 * BT[i][j];
}
@@ -474,9 +474,9 @@ TEST(MathBlobTest, TestRepmatCol) {
float A[10] = {};
float B[10][10] = {};
float BT[10][10] = {};
- for(int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10; i++) {
A[i] = 5*i -2;
- for(int j = 0; j < 10; j++) {
+ for (int j = 0; j < 10; j++) {
B[i][j] = A[i];
BT[j][i] = A[i];
}
@@ -506,9 +506,9 @@ TEST(MathBlobTest, TestRepmatRow) {
float A[10] = {};
float B[10][10] = {};
float BT[10][10] = {};
- for(int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10; i++) {
A[i] = 5*i -2;
- for(int j = 0; j < 10; j++) {
+ for (int j = 0; j < 10; j++) {
B[j][i] = A[i];
BT[i][j] = A[i];
}
@@ -538,9 +538,9 @@ TEST(MathBlobTest, TestMVSumCol) {
float A[10] = {};
float B[10][10] = {};
float BT[10][10] = {};
- for(int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10; i++) {
A[i] = 5*i -2;
- for(int j = 0; j < 10; j++) {
+ for (int j = 0; j < 10; j++) {
B[i][j] = i * j + i - j;
BT[j][i] = i * j + i - j;
}
@@ -552,9 +552,9 @@ TEST(MathBlobTest, TestMVSumCol) {
BlobBT->set_cpu_data(BT[0]);
BlobBT->set_transpose(true);
- for(int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10; i++) {
A[i] *= 2.0;
- for(int j = 0; j < 10; j++) {
+ for (int j = 0; j < 10; j++) {
A[i] += 3.0 * B[i][j];
}
}
@@ -577,9 +577,9 @@ TEST(MathBlobTest, TestMVSumRow) {
float A[10] = {};
float B[10][10] = {};
float BT[10][10] = {};
- for(int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10; i++) {
A[i] = 5*i -2;
- for(int j = 0; j < 10; j++) {
+ for (int j = 0; j < 10; j++) {
B[j][i] = i * j + i - j;
BT[i][j] = i * j + i - j;
}
@@ -591,9 +591,9 @@ TEST(MathBlobTest, TestMVSumRow) {
BlobBT->set_cpu_data(BT[0]);
BlobBT->set_transpose(true);
- for(int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10; i++) {
A[i] *= 2.0;
- for(int j = 0; j < 10; j++) {
+ for (int j = 0; j < 10; j++) {
A[i] += 3.0 * B[j][i];
}
}
@@ -608,7 +608,7 @@ TEST(MathBlobTest, TestMVSumRow) {
TEST(MathBlobTest, TestASum) {
float A[10] = {};
- for(int i = 0; i < 10; i++) {
+ for (int i = 0; i < 10; i++) {
A[i] = ((i % 3) -1) * i;
}
@@ -888,7 +888,7 @@ TEST(MathTest, TestDotGPU) {
cudaMemcpy(B_gpu, B, 12*sizeof(float), cudaMemcpyHostToDevice);
auto context = Singleton<Context>::Instance();
context->SetupDevice(std::this_thread::get_id(), 0);
- float gpu_ret = gpu_dot<float>(context->cublas_handle(0), A_gpu, B_gpu, 12);
+ float gpu_ret = gpu_dot<float>(context->cublas_handle(0), 12, A_gpu, B_gpu);
float cpu_ret = 0.0f;
for (int i = 0; i < 12; i++) {