You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2016/06/13 13:20:28 UTC
[35/50] [abbrv] incubator-singa git commit: SINGA-182 Clean math
function APIs and implementations
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/564c88ad/src/core/tensor/tensor_math_cpp.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h
index ec7a892..2c5c272 100644
--- a/src/core/tensor/tensor_math_cpp.h
+++ b/src/core/tensor/tensor_math_cpp.h
@@ -25,12 +25,11 @@
#include <cblas.h>
#endif
-/// TODO(wangwei) Clean the implementations following the comments in
-/// tensor_math.h.
namespace singa {
-template<>
-void Abs<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+template <>
+void Abs<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out,
+ Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
@@ -39,180 +38,150 @@ void Abs<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context
}
template <>
-void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out, Context *ctx) {
+void Add<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
+ Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
- for (size_t i = 0; i < num; i++) outPtr[i] = x;
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] = inPtr[i] + x;
+ }
}
-// sum all elements of input into out
-// TODO(wangwei) optimize using omp
template <>
-void Sum<float, lang::Cpp>(const size_t num, const Blob *in, float *out, Context *ctx) {
- float s = 0.f;
- const float *inPtr = static_cast<const float *>(in->data());
+void Add<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
+ Blob *out, Context *ctx) {
+ // CHECK_EQ(ctx->stream, nullptr);
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *in1Ptr = static_cast<const float *>(in1->data());
+ const float *in2Ptr = static_cast<const float *>(in2->data());
for (size_t i = 0; i < num; i++) {
- s += inPtr[i];
+ outPtr[i] = in1Ptr[i] + in2Ptr[i];
}
- *out = s;
}
template <>
-void Sign<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+void Clamp<float, lang::Cpp>(const size_t num, const float low,
+ const float high, const Blob *in, Blob *out,
+ Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
- const float *inPtr = static_cast<const float*>(in->data());
+ const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- outPtr[i] = inPtr[i] > 0 ? 1.0f : 0.0f;
+ if (inPtr[i] > high) {
+ outPtr[i] = high;
+ } else if (inPtr[i] < low) {
+ outPtr[i] = low;
+ } else {
+ outPtr[i] = inPtr[i];
+ }
}
}
template <>
-void Exp<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+void Div<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
+ Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *in1Ptr = static_cast<const float *>(in1->data());
+ const float *in2Ptr = static_cast<const float *>(in2->data());
+ for (size_t i = 0; i < num; i++) {
+ CHECK_NE(in2Ptr[i], 0.f);
+ outPtr[i] = in1Ptr[i] / in2Ptr[i];
+ }
+}
+
+template <>
+void Div<float, lang::Cpp>(const size_t num, const float x, const Blob *in,
+ Blob *out, Context *ctx) {
const float *inPtr = static_cast<const float *>(in->data());
+ float *outPtr = static_cast<float *>(out->mutable_data());
for (size_t i = 0; i < num; i++) {
- outPtr[i] = exp(inPtr[i]);
+ CHECK_NE(inPtr[i], 0.f);
+ outPtr[i] = x / inPtr[i];
}
}
template <>
-void Log<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+void EltwiseMult<float, lang::Cpp>(const size_t num, const Blob *in,
+ const float x, Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- CHECK_GT(inPtr[i], 0.f);
- outPtr[i] = log(inPtr[i]);
+ outPtr[i] = inPtr[i] * x;
}
}
template <>
-void Sqrt<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+void EltwiseMult<float, lang::Cpp>(const size_t num, const Blob *in1,
+ const Blob *in2, Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *in1Ptr = static_cast<const float *>(in1->data());
+ const float *in2Ptr = static_cast<const float *>(in2->data());
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] = in1Ptr[i] * in2Ptr[i];
+ }
+}
+template <>
+void Exp<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out,
+ Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- CHECK_GT(inPtr[i], 0.f);
- outPtr[i] = sqrt(inPtr[i]);
+ outPtr[i] = exp(inPtr[i]);
}
}
template <>
-void Square<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+void GE<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
+ Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- outPtr[i] = inPtr[i] * inPtr[i];
+ outPtr[i] = (inPtr[i] >= x) ? 1.f : 0.f;
}
}
template <>
-void Tanh<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+void GT<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
+ Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- outPtr[i] = tanh(inPtr[i]);
+ outPtr[i] = (inPtr[i] > x) ? 1.f : 0.f;
}
}
-
template <>
-void ReLU<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+void LE<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
+ Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- outPtr[i] = (inPtr[i] >= 0.f) ? inPtr[i] : 0.f;
+ outPtr[i] = (inPtr[i] <= x) ? 1.f : 0.f;
}
}
-
template <>
-void Sigmoid<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out, Context *ctx) {
+void Log<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out,
+ Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- outPtr[i] = 1.f / (1.f + exp(-inPtr[i]));
+ CHECK_GT(inPtr[i], 0.f);
+ outPtr[i] = log(inPtr[i]);
}
}
-
template <>
-void Softmax<float, lang::Cpp>(const size_t nrow, const size_t ncol, const Blob *in,
- Blob *out, Context *ctx) {
+void LT<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
+ Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
- float *bPtr = new float[ncol];
- for (size_t r = 0; r < nrow; r++) {
- size_t offset = r * ncol;
- float denom = 0.f;
- for (size_t c = 0; c < ncol; c++) {
- bPtr[c] = exp(inPtr[offset + c]);
- denom += bPtr[c];
- }
- for (size_t c = 0; c < ncol; c++) {
- size_t idx = offset + c;
- outPtr[idx] = bPtr[c] / denom;
- }
- }
- delete bPtr;
-}
-
-template <>
-void SumRows<float, lang::Cpp>(const size_t nrow, const size_t ncol, const Blob *in,
- Blob *out, Context *ctx) {
- float *outPtr = static_cast<float *>(out->mutable_data());
- const float *inPtr = static_cast<const float *>(in->data());
- for (size_t r = 0; r < nrow; r++) {
- size_t offset = r * ncol;
- outPtr[r] = 0.f;
- for (size_t c = 0; c < ncol; c++) {
- outPtr[r] += inPtr[offset + c];
- }
- }
-}
-
-template <>
-void SumColumns<float, lang::Cpp>(const size_t nrow, const size_t ncol, const Blob *in, Blob *out, Context *ctx) {
- float *outPtr = static_cast<float *>(out->mutable_data());
- const float *inPtr = static_cast<const float *>(in->data());
- for (size_t c = 0; c < ncol; c++) {
- outPtr[c] = 0.f;
- }
- for (size_t r = 0; r < nrow; r++) {
- size_t offset = r * ncol;
- for (size_t c = 0; c < ncol; c++) {
- outPtr[c] += inPtr[offset + c];
- }
- }
-}
-
-template <>
-void AddRow<float, lang::Cpp>(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v,
- Blob *out, Context *ctx) {
- float *outPtr = static_cast<float *>(out->mutable_data());
- const float *APtr = static_cast<const float *>(A->data());
- const float *vPtr = static_cast<const float *>(v->data());
- for (size_t r = 0; r < nrow; r++) {
- size_t offset = r * ncol;
- for (size_t c = 0; c < ncol; c++) {
- outPtr[offset + c] = APtr[offset + c] + vPtr[c];
- }
- }
-}
-
-template <>
-void AddCol<float, lang::Cpp>(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v,
- Blob *out, Context *ctx) {
- float *outPtr = static_cast<float *>(out->mutable_data());
- const float *APtr = static_cast<const float *>(A->data());
- const float *vPtr = static_cast<const float *>(v->data());
- for (size_t r = 0; r < nrow; r++) {
- size_t offset = r * ncol;
- for (size_t c = 0; c < ncol; c++) {
- outPtr[offset + c] = APtr[offset + c] + vPtr[r];
- }
- }
-}
-
-template <>
-void Pow<float, lang::Cpp>(const size_t num, const Blob *in, const float x, Blob *out, Context *ctx) {
- float *outPtr = static_cast<float *>(out->mutable_data());
- const float *inPtr = static_cast<const float *>(in->data());
- for (size_t i = 0; i < num; i++) {
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] = (inPtr[i] < x) ? 1.f : 0.f;
+ }
+}
+template <>
+void Pow<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
+ Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
outPtr[i] = pow(inPtr[i], x);
}
}
@@ -220,252 +189,230 @@ void Pow<float, lang::Cpp>(const size_t num, const Blob *in, const float x, Blob
template <>
void Pow<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
Blob *out, Context *ctx) {
- float *outPtr= static_cast<float *>(out->mutable_data());
- const float *in1Ptr= static_cast<const float *>(in1->data());
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *in1Ptr = static_cast<const float *>(in1->data());
const float *in2Ptr = static_cast<const float *>(in2->data());
for (size_t i = 0; i < num; i++) {
outPtr[i] = pow(in1Ptr[i], in2Ptr[i]);
}
}
-
template <>
-void Clamp<float, lang::Cpp>(const size_t num, const float low, const float high, const Blob *in,
- Blob *out, Context *ctx) {
- float *outPtr = static_cast<float *>(out->mutable_data());
- const float *inPtr = static_cast<const float *>(in->data());
- for (size_t i = 0; i < num; i++) {
- if (inPtr[i] > high) {
- outPtr[i] = high;
- }
- else if (inPtr[i] < low) {
- outPtr[i] = low;
- }
- else {
- outPtr[i] = inPtr[i];
- }
- }
+void ReLU<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] = (inPtr[i] >= 0.f) ? inPtr[i] : 0.f;
+ }
}
-
template <>
-void Add<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
- Blob *out, Context *ctx) {
+void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ for (size_t i = 0; i < num; i++) outPtr[i] = x;
+}
+template <>
+void Sigmoid<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out,
+ Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- outPtr[i] = inPtr[i] + x;
+ outPtr[i] = 1.f / (1.f + exp(-inPtr[i]));
}
}
template <>
-void Add<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
- Blob *out, Context *ctx) {
- // CHECK_EQ(ctx->stream, nullptr);
- float *outPtr= static_cast<float *>(out->mutable_data());
- const float *in1Ptr = static_cast<const float *>(in1->data());
- const float *in2Ptr = static_cast<const float *>(in2->data());
+void Sign<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- outPtr[i] = in1Ptr[i] + in2Ptr[i];
+ outPtr[i] = inPtr[i] > 0 ? 1.0f : 0.0f;
}
}
template <>
-void Sub<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
- Blob *out, Context *ctx) {
- // CHECK_EQ(ctx->stream, nullptr);
- float *outPtr= static_cast<float *>(out->mutable_data());
- const float *in1Ptr = static_cast<const float *>(in1->data());
- const float *in2Ptr = static_cast<const float *>(in2->data());
+void Sqrt<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- outPtr[i] = in1Ptr[i] - in2Ptr[i];
+ CHECK_GT(inPtr[i], 0.f);
+ outPtr[i] = sqrt(inPtr[i]);
}
}
template <>
-void EltwiseMult<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
- Blob *out, Context *ctx) {
- float *outPtr= static_cast<float *>(out->mutable_data());
+void Square<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- outPtr[i] = inPtr[i] * x;
+ outPtr[i] = inPtr[i] * inPtr[i];
}
}
template <>
-void EltwiseMult<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
- Blob *out, Context *ctx) {
- float *outPtr= static_cast<float *>(out->mutable_data());
+void Sub<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
+ Blob *out, Context *ctx) {
+ // CHECK_EQ(ctx->stream, nullptr);
+ float *outPtr = static_cast<float *>(out->mutable_data());
const float *in1Ptr = static_cast<const float *>(in1->data());
const float *in2Ptr = static_cast<const float *>(in2->data());
for (size_t i = 0; i < num; i++) {
- outPtr[i] = in1Ptr[i] * in2Ptr[i];
+ outPtr[i] = in1Ptr[i] - in2Ptr[i];
}
}
+// sum all elements of input into out
+// TODO(wangwei) optimize using omp
template <>
-void Div<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
- Blob *out, Context *ctx) {
- float *outPtr= static_cast<float *>(out->mutable_data());
- const float *in1Ptr = static_cast<const float *>(in1->data());
- const float *in2Ptr = static_cast<const float *>(in2->data());
+void Sum<float, lang::Cpp>(const size_t num, const Blob *in, float *out,
+ Context *ctx) {
+ float s = 0.f;
+ const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- CHECK_NE(in2Ptr[i],0.f);
- outPtr[i] = in1Ptr[i] / in2Ptr[i];
+ s += inPtr[i];
}
+ *out = s;
}
template <>
-void Div<float, lang::Cpp>(const size_t num, const float x, const Blob *in,
- Blob *out, Context *ctx) {
- float *outPtr= static_cast<float *>(out->mutable_data());
+void Tanh<float, lang::Cpp>(const size_t num, const Blob *in, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
for (size_t i = 0; i < num; i++) {
- CHECK_NE(inPtr[i],0.f);
- outPtr[i] = x / inPtr[i];
+ outPtr[i] = tanh(inPtr[i]);
}
}
+// =========Matrix operations ================================================
+
template <>
-void Outer<float, lang::Cpp>(const size_t m, const size_t n, const Blob *in1, const Blob *in2,
- Blob *out, Context *ctx) {
- float *outPtr= static_cast<float *>(out->mutable_data());
- const float *in1Ptr = static_cast<const float *>(in1->data());
- const float *in2Ptr = static_cast<const float *>(in2->data());
- for (size_t r = 0; r < m ; r++) {
- size_t offset = r * n;
- for (size_t c = 0; c < n; c++) {
- outPtr[offset + c] = in1Ptr[r] * in2Ptr[c];
- }
- }
+void AddCol<float, lang::Cpp>(const size_t nrow, const size_t ncol,
+ const Blob *A, const Blob *v, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *APtr = static_cast<const float *>(A->data());
+ const float *vPtr = static_cast<const float *>(v->data());
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[offset + c] = APtr[offset + c] + vPtr[r];
+ }
+ }
}
template <>
-void LT<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
- Blob *out, Context *ctx) {
+void AddRow<float, lang::Cpp>(const size_t nrow, const size_t ncol,
+ const Blob *A, const Blob *v, Blob *out,
+ Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
- const float *inPtr = static_cast<const float *>(in->data());
- for (size_t i = 0; i < num; i++) {
- outPtr[i] = (inPtr[i] < x) ? 1.f : 0.f;
+ const float *APtr = static_cast<const float *>(A->data());
+ const float *vPtr = static_cast<const float *>(v->data());
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[offset + c] = APtr[offset + c] + vPtr[c];
+ }
}
}
-
template <>
-void LE<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
- Blob *out, Context *ctx) {
+void Outer<float, lang::Cpp>(const size_t m, const size_t n, const Blob *in1,
+ const Blob *in2, Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
- const float *inPtr = static_cast<const float *>(in->data());
- for (size_t i = 0; i < num; i++) {
- outPtr[i] = (inPtr[i] <= x) ? 1.f : 0.f;
+ const float *in1Ptr = static_cast<const float *>(in1->data());
+ const float *in2Ptr = static_cast<const float *>(in2->data());
+ for (size_t r = 0; r < m; r++) {
+ size_t offset = r * n;
+ for (size_t c = 0; c < n; c++) {
+ outPtr[offset + c] = in1Ptr[r] * in2Ptr[c];
+ }
}
}
-
template <>
-void GT<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
- Blob *out, Context *ctx) {
+void Softmax<float, lang::Cpp>(const size_t nrow, const size_t ncol,
+ const Blob *in, Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
- for (size_t i = 0; i < num; i++) {
- outPtr[i] = (inPtr[i] > x) ? 1.f : 0.f;
+ float *bPtr = new float[ncol];
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ float denom = 0.f;
+ for (size_t c = 0; c < ncol; c++) {
+ bPtr[c] = exp(inPtr[offset + c]);
+ denom += bPtr[c];
+ }
+ for (size_t c = 0; c < ncol; c++) {
+ size_t idx = offset + c;
+ outPtr[idx] = bPtr[c] / denom;
+ }
}
+ delete bPtr;
}
template <>
-void GE<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
- Blob *out, Context *ctx) {
+void SumColumns<float, lang::Cpp>(const size_t nrow, const size_t ncol,
+ const Blob *in, Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
- for (size_t i = 0; i < num; i++) {
- outPtr[i] = (inPtr[i] >= x) ? 1.f : 0.f;
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[c] = 0.f;
+ }
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[c] += inPtr[offset + c];
+ }
}
}
template <>
-void Amax<float, lang::Cpp>(const size_t num, const Blob *in, size_t *out, Context *ctx) {
- size_t maxPos = 0;
- float maxVal = 0;
- const float *inPtr = static_cast<const float *>(in->data());
- for (size_t i = 0; i < num; i++) {
- if (i == 0) {
- maxVal = inPtr[i];
- }
- else if (inPtr[i] > maxVal) {
- maxVal = inPtr[i];
- maxPos = i;
- }
- }
- *out = maxPos;
-}
-
-template <>
-void Amin<float, lang::Cpp>(const size_t num, const Blob *in, size_t *out, Context *ctx) {
- size_t minPos = 0;
- float minVal = 0;
+void SumRows<float, lang::Cpp>(const size_t nrow, const size_t ncol,
+ const Blob *in, Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
- for (size_t i = 0; i < num; i++) {
- if (i == 0) {
- minVal = inPtr[i];
- }
- else if (inPtr[i] > minVal) {
- minVal = inPtr[i];
- minPos = i;
- }
- }
- *out = minPos;
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ outPtr[r] = 0.f;
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[r] += inPtr[offset + c];
+ }
+ }
}
+// ===============Random operations==========================================
template <>
-void Asum<float, lang::Cpp>(const size_t num, const Blob *in, float *out, Context *ctx) {
- float sum = 0;
- const float *inPtr = static_cast<const float *>(in->data());
- for (size_t i = 0; i < num; i++) {
- sum += fabs(inPtr[i]);
- }
+void Bernoulli<float, lang::Cpp>(const size_t num, const float p, Blob *out,
+ Context *ctx) {
+ std::bernoulli_distribution distribution(p);
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] = distribution(ctx->random_generator) ? 1.0f : 0.0f;
+ }
}
template <>
-void Axpy<float, lang::Cpp>(const size_t num, const float alpha, const Blob *in,
- Blob *out, Context *ctx) {
+void Gaussian<float, lang::Cpp>(const size_t num, const float mean,
+ const float std, Blob *out, Context *ctx) {
+ std::normal_distribution<float> distribution(mean, std);
float *outPtr = static_cast<float *>(out->mutable_data());
- const float *inPtr = static_cast<const float *>(in->data());
- for (size_t i = 0; i < num; i++) {
- outPtr[i] += alpha * inPtr[i];
- }
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] = static_cast<float>(distribution(ctx->random_generator));
+ }
}
-
template <>
-void Scale<float, lang::Cpp>(const size_t num, const float x, Blob *out, Context *ctx) {
- float *outPtr = static_cast<float *>(out->mutable_data());
- for (size_t i = 0; i < num; i++) {
- outPtr[i] *= x;
- }
+void Uniform<float, lang::Cpp>(const size_t num, const float low,
+ const float high, Blob *out, Context *ctx) {
+ std::uniform_real_distribution<float> distribution(low, high);
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] = static_cast<float>(distribution(ctx->random_generator));
+ }
}
-//template <>
-//void Dot<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
-// float *out, Context *ctx) {
-// float sum = 0;
-// const float *in1Ptr = static_cast<const float *>(in1->data());
-// const float *in2Ptr = static_cast<const float *>(in2->data());
-// for (size_t i = 0; i < num; i++) {
-// sum += in1Ptr[i] * in2Ptr[i];
-// }
-//}
-
-template <>
-void GEMV<float, lang::Cpp>(bool trans, const size_t m, const size_t n, const float alpha,
- const Blob *A, const Blob *v, const float beta,
- Blob *out, Context *ctx) {
- float *outPtr = static_cast<float *>(out->mutable_data());
- const float* APtr = static_cast<const float *>(A->data());
- const float* vPtr = static_cast<const float *>(v->data());
- for (size_t r = 0; r < m; r++) {
- float sum = 0;
- for (size_t c = 0; c < n; c++) {
- size_t idx = trans ? c * m + r : r * n + c;
- sum += APtr[idx] * vPtr[c];
- }
- outPtr[r] = alpha * sum + beta * outPtr[r];
- }
-}
+// ====================Blas operations======================================
template <>
void DGMM<float, lang::Cpp>(const bool side_right, const size_t nrow,
@@ -491,37 +438,21 @@ void DGMM<float, lang::Cpp>(const bool side_right, const size_t nrow,
}
}
+#ifdef USE_CBLAS
template <>
-void Bernoulli<float, lang::Cpp>(const size_t num, const float p, Blob *out, Context *ctx) {
- std::bernoulli_distribution distribution(p);
+void Axpy<float, lang::Cpp>(const size_t num, const float alpha, const Blob *in,
+ Blob *out, Context *ctx) {
+ const float *inPtr = static_cast<const float *>(in->data());
float *outPtr = static_cast<float *>(out->mutable_data());
- for (size_t i = 0; i < num; i++) {
- outPtr[i] = distribution(ctx->random_generator) ? 1.0f : 0.0f;
- }
+ cblas_saxpy(num, alpha, inPtr, 1, outPtr, 1);
}
-
-template <>
-void Uniform<float, lang::Cpp>(const size_t num, const float low, const float high, Blob *out,
- Context *ctx) {
- std::uniform_real_distribution<float> distribution(low, high);
- float *outPtr= static_cast<float *>(out->mutable_data());
- for (size_t i = 0; i < num; i++) {
- outPtr[i] = static_cast<float>(distribution(ctx->random_generator));
- }
-}
-
template <>
-void Gaussian<float, lang::Cpp>(const size_t num, const float mean, const float std, Blob *out,
- Context *ctx) {
- std::normal_distribution<float> distribution(mean, std);
+void Scale<float, lang::Cpp>(const size_t num, const float x, Blob *out,
+ Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
- for (size_t i = 0; i < num; i++) {
- outPtr[i] = static_cast<float>(distribution(ctx->random_generator));
- }
+ cblas_sscal(num, x, outPtr, 1);
}
-
-#ifdef USE_CBLAS
template <>
void Dot<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
float *out, Context *ctx) {
@@ -529,6 +460,21 @@ void Dot<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
const float *in2Ptr = static_cast<const float *>(in2->data());
*out = cblas_sdot(num, in1Ptr, 1, in2Ptr, 1);
}
+template <>
+void GEMV<float, lang::Cpp>(bool trans, const size_t m, const size_t n,
+ const float alpha, const Blob *A, const Blob *v,
+ const float beta, Blob *out, Context *ctx) {
+ const float *APtr = static_cast<const float *>(A->data());
+ const float *vPtr = static_cast<const float *>(v->data());
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ if (!trans) {
+ cblas_sgemv(CblasRowMajor, CblasNoTrans, m, n, alpha, APtr, n, vPtr, 1,
+ beta, outPtr, 1);
+ } else {
+ cblas_sgemv(CblasRowMajor, CblasTrans, n, m, alpha, APtr, m, vPtr, 1, beta,
+ outPtr, 1);
+ }
+}
template <>
void GEMM<float, lang::Cpp>(const bool transA, const bool transB,
@@ -548,6 +494,98 @@ void GEMM<float, lang::Cpp>(const bool transA, const bool transB,
lda, BPtr, ldb, beta, CPtr, ldc);
}
+#else
+
+template <>
+void Amax<float, lang::Cpp>(const size_t num, const Blob *in, size_t *out,
+ Context *ctx) {
+ size_t maxPos = 0;
+ float maxVal = 0;
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
+ if (i == 0) {
+ maxVal = inPtr[i];
+ } else if (inPtr[i] > maxVal) {
+ maxVal = inPtr[i];
+ maxPos = i;
+ }
+ }
+ *out = maxPos;
+}
+template <>
+void Amin<float, lang::Cpp>(const size_t num, const Blob *in, size_t *out,
+ Context *ctx) {
+ size_t minPos = 0;
+ float minVal = 0;
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
+ if (i == 0) {
+ minVal = inPtr[i];
+ } else if (inPtr[i] > minVal) {
+ minVal = inPtr[i];
+ minPos = i;
+ }
+ }
+ *out = minPos;
+}
+
+template <>
+void Asum<float, lang::Cpp>(const size_t num, const Blob *in, float *out,
+ Context *ctx) {
+ float sum = 0;
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
+ sum += fabs(inPtr[i]);
+ }
+}
+
+template <>
+void Axpy<float, lang::Cpp>(const size_t num, const float alpha, const Blob *in,
+ Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] += alpha * inPtr[i];
+ }
+}
+
+template <>
+void Scale<float, lang::Cpp>(const size_t num, const float x, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] *= x;
+ }
+}
+
+template <>
+void Dot<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
+ float *out, Context *ctx) {
+ float sum = 0;
+ const float *in1Ptr = static_cast<const float *>(in1->data());
+ const float *in2Ptr = static_cast<const float *>(in2->data());
+ for (size_t i = 0; i < num; i++) {
+ sum += in1Ptr[i] * in2Ptr[i];
+ }
+}
+
+template <>
+void GEMV<float, lang::Cpp>(bool trans, const size_t m, const size_t n,
+ const float alpha, const Blob *A, const Blob *v,
+ const float beta, Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *APtr = static_cast<const float *>(A->data());
+ const float *vPtr = static_cast<const float *>(v->data());
+ for (size_t r = 0; r < m; r++) {
+ float sum = 0;
+ for (size_t c = 0; c < n; c++) {
+ size_t idx = trans ? c * m + r : r * n + c;
+ sum += APtr[idx] * vPtr[c];
+ }
+ outPtr[r] = alpha * sum + beta * outPtr[r];
+ }
+}
+
#endif // USE_CBLAS
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/564c88ad/src/core/tensor/tensor_math_cuda.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h
index 4a2ba66..f9841a3 100644
--- a/src/core/tensor/tensor_math_cuda.h
+++ b/src/core/tensor/tensor_math_cuda.h
@@ -26,75 +26,100 @@
#include "singa/core/common.h"
namespace singa {
-
-// TODO(wangwei) Clean implementations following comments in tensor_math_cpp.h.
-// TODO(wangwei) optimize using stream
+// =================Elementwise operations===================================
template <>
-void Add<float, lang::Cuda>(int count, const Blob *lhs, const Blob *rhs,
- Blob *ret, Context *ctx) {
- const float *a = static_cast<const float *>(lhs->data());
- const float *b = static_cast<const float *>(rhs->data());
- float *c = static_cast<float *>(ret->mutable_data());
- cuda::add(count, a, b, c);
+void Add<float, lang::Cuda>(const size_t num, const Blob *in1, const Blob *in2,
+ Blob *out, Context *ctx) {
+ const float *in1Ptr = static_cast<const float *>(in1->data());
+ const float *in2Ptr = static_cast<const float *>(in2->data());
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ cuda::add(num, in1Ptr, in2Ptr, outPtr);
}
-// TODO(wangwei) optimize using stream
+// follow the consistency guide of math API
template <>
-void Sub<float, lang::Cuda>(int count, const Blob *lhs, const Blob *rhs,
- Blob *ret, Context *ctx) {
- const float *a = static_cast<const float *>(lhs->data());
- const float *b = static_cast<const float *>(rhs->data());
- float *c = static_cast<float *>(ret->mutable_data());
- cuda::sub(count, a, b, c);
+void Div<float, lang::Cuda>(const size_t num, const float x, const Blob *in,
+ Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ cuda::Div(num, x, inPtr, outPtr, ctx->stream);
}
template <>
-void EltwiseMult<float, lang::Cuda>(int count, const Blob *input, float x,
- Blob *ret, Context *ctx) {
- float *dptr = static_cast<float *>(ret->mutable_data());
- const float *lptr = static_cast<const float *>(input->data());
- cuda::mult(count, lptr, x, dptr);
+void EltwiseMult<float, lang::Cuda>(const size_t num, const Blob *in,
+ const float x, Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ cuda::mult(num, inPtr, x, outPtr);
}
-// TODO(wangwei) optimize using stream
template <>
-void Square<float, lang::Cuda>(int count, const Blob *input, Blob *ret,
- Context *ctx) {
- const float *in = static_cast<const float *>(input->data());
- float *out = static_cast<float *>(ret->mutable_data());
- cuda::square(count, in, out);
+void GE<float, lang::Cuda>(const size_t num, const Blob *in, const float x,
+ Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ cuda::GE(num, inPtr, x, outPtr, ctx->stream);
}
-
-// sum all elements of input into ret
-// TODO(wangwei) optimize using stream
template <>
-void Sum<float, lang::Cuda>(int count, const Blob *input, float *ret,
- Context *ctx) {
- const float *in = static_cast<const float *>(input->data());
- cuda::sum(count, in, ret);
+void GT<float, lang::Cuda>(const size_t num, const Blob *in, const float x,
+ Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ cuda::GT(num, inPtr, x, outPtr, ctx->stream);
}
-
-// follow the consistency guide of math API
template <>
-void Div<float, lang::Cuda>(const size_t num, const float alpha, const Blob *in,
- Blob *out, Context *ctx) {
+void LE<float, lang::Cuda>(const size_t num, const Blob *in, const float x,
+ Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
const float *inPtr = static_cast<const float *>(in->data());
- cuda::Div(num, alpha, inPtr, outPtr, ctx->stream);
+ cuda::LE(num, inPtr, x, outPtr, ctx->stream);
+}
+template <>
+void LT<float, lang::Cuda>(const size_t num, const Blob *in, const float x,
+ Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ cuda::LT(num, inPtr, x, outPtr, ctx->stream);
}
-
template <>
void Set<float, lang::Cuda>(const size_t num, const float x, Blob *out,
Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
cuda::Set(num, x, outPtr, ctx->stream);
}
+// TODO(wangwei) optimize using stream
+template <>
+void Square<float, lang::Cuda>(const size_t num, const Blob *in, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ cuda::square(num, inPtr, outPtr);
+}
+// TODO(wangwei) optimize using stream
+template <>
+void Sub<float, lang::Cuda>(const size_t num, const Blob *in1, const Blob *in2,
+ Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *in1Ptr = static_cast<const float *>(in1->data());
+ const float *in2Ptr = static_cast<const float *>(in2->data());
+ cuda::sub(num, in1Ptr, in2Ptr, outPtr);
+}
+// sum all elements of input into ret
+// TODO(wangwei) optimize using stream
+template <>
+void Sum<float, lang::Cuda>(const size_t num, const Blob *in, float *out,
+ Context *ctx) {
+ const float *inPtr = static_cast<const float *>(in->data());
+ cuda::sum(num, inPtr, out);
+}
+
+// =========================Blas operations==================================
// NOTE: cublas uses column major order.
// http://peterwittek.com/cublas-matrix-c-style.html
template <>
void DGMM<float, lang::Cuda>(const bool side_right, const size_t nrow,
const size_t ncol, const Blob *M, const Blob *v,
Blob *out, Context *ctx) {
- auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
+ auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
const float *MPtr = static_cast<const float *>(M->data());
const float *vPtr = static_cast<const float *>(v->data());
float *outPtr = static_cast<float *>(out->mutable_data());
@@ -106,6 +131,22 @@ void DGMM<float, lang::Cuda>(const bool side_right, const size_t nrow,
vPtr, 1, outPtr, ncol));
}
}
+template <>
+void GEMV<float, lang::Cuda>(bool trans, const size_t m, const size_t n,
+ const float alpha, const Blob *A, const Blob *v,
+ const float beta, Blob *out, Context *ctx) {
+ const float *APtr = static_cast<const float *>(A->data());
+ const float *vPtr = static_cast<const float *>(v->data());
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
+ if (!trans)
+ CUBLAS_CHECK(cublasSgemv(handle, CUBLAS_OP_T, n, m, &alpha, APtr, n, vPtr,
+ 1, &beta, outPtr, 1));
+ else
+ CUBLAS_CHECK(cublasSgemv(handle, CUBLAS_OP_N, m, n, &alpha, APtr, m, vPtr,
+ 1, &beta, outPtr, 1));
+}
+
// http://docs.nvidia.com/cuda/cublas/#cublas-lt-t-gt-gemm
template <>
void GEMM<float, lang::Cuda>(const bool transA, const bool transB,
@@ -121,44 +162,11 @@ void GEMM<float, lang::Cuda>(const bool transA, const bool transB,
const float *APtr = static_cast<const float *>(A->data());
const float *BPtr = static_cast<const float *>(B->data());
float *CPtr = static_cast<float *>(C->mutable_data());
- auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
+ auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
CUBLAS_CHECK(cublasSgemm(handle, transb, transa, ncolB, nrowA, ncolA, &alpha,
BPtr, ldb, APtr, lda, &beta, CPtr, ldc));
}
-template <>
-void GE<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
- Blob* out, Context *ctx) {
- float* outPtr = static_cast<float*>(out->mutable_data());
- const float* inPtr = static_cast<const float*>(in->data());
- cuda::GE(num, inPtr, x, outPtr, ctx->stream);
-}
-template <>
-void GT<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
- Blob* out, Context *ctx) {
- float* outPtr = static_cast<float*>(out->mutable_data());
- const float* inPtr = static_cast<const float*>(in->data());
- cuda::GT(num, inPtr, x, outPtr, ctx->stream);
-}
-template <>
-void LE<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
- Blob* out, Context *ctx) {
- float* outPtr = static_cast<float*>(out->mutable_data());
- const float* inPtr = static_cast<const float*>(in->data());
- cuda::LE(num, inPtr, x, outPtr, ctx->stream);
-}
-template <>
-void LT<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
- Blob* out, Context *ctx) {
- float* outPtr = static_cast<float*>(out->mutable_data());
- const float* inPtr = static_cast<const float*>(in->data());
- cuda::LT(num, inPtr, x, outPtr, ctx->stream);
-}
-
-
-
-
-
} // namespace singa
#endif // USE_CUDA
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/564c88ad/test/singa/test_tensor_math.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor_math.cc b/test/singa/test_tensor_math.cc
index 823445f..94ca283 100644
--- a/test/singa/test_tensor_math.cc
+++ b/test/singa/test_tensor_math.cc
@@ -117,12 +117,11 @@ TEST_F(TestTensorMath, MemberTanh) {
}
TEST_F(TestTensorMath, Sum) {
- Tensor p1(Shape{1,2});
- p1 = Sum(e, 0);
+ Tensor p1 = Sum(e, 0);
const float *dptr1 = p1.data<const float *>();
EXPECT_FLOAT_EQ(9.0f,dptr1[0]);
EXPECT_FLOAT_EQ(12.0f,dptr1[1]);
-
+
Tensor p2(Shape{3,1});
p2 = Sum(e, 1);
const float *dptr2 = p2.data<const float *>();
@@ -143,9 +142,9 @@ TEST_F(TestTensorMath, SoftMax) {
EXPECT_NEAR(exp(2)/sum, dptr1[1],1e-5);
EXPECT_NEAR(exp(4)/sum, dptr1[3],1e-5);
EXPECT_NEAR(exp(6)/sum, dptr1[5],1e-5);
-
+
Tensor p2(Shape{3,2});
- p2 = SoftMax(e,1);
+ p2 = SoftMax(e,1);
const float *dptr2 = p2.data<const float *>();
EXPECT_NEAR(exp(1)/(exp(1)+exp(2)),dptr2[0], 1e-5);
EXPECT_NEAR(exp(2)/(exp(1)+exp(2)),dptr2[1], 1e-5);
@@ -237,12 +236,12 @@ TEST_F(TestTensorMath, MemberDiv) {
TEST_F(TestTensorMath, MemberBernoulli) {
Tensor p1(Shape{10000});
- Bernoulli(0.3,&p1);
+ Bernoulli(0.3f, &p1);
const float* dptr1 = p1.data<const float*>();
float sum = 0;
for(int i = 0; i < 10000; i++) sum += dptr1[i];
float mean = sum/10000;
- EXPECT_NEAR(mean, 0.3, 1e-2);
+ EXPECT_NEAR(mean, 0.3f, 1e-2);
sum = 0;
for(int i = 0; i < 10000; i++) sum += (dptr1[i]-mean)*(dptr1[i]-mean);
@@ -267,7 +266,7 @@ TEST_F(TestTensorMath, MemberUniform) {
TEST_F(TestTensorMath, MemberGaussian) {
Tensor p1(Shape{50000});
- Gaussian(0.0,1.0,&p1);
+ Gaussian(0.0f,1.0f,&p1);
const float* dptr1 = p1.data<const float*>();
float sum = 0;
for(int i = 0; i < 50000; i++) sum += dptr1[i];