You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by ka...@apache.org on 2016/06/13 09:19:49 UTC
[1/3] incubator-singa git commit: SINGA-184 Add Cross Entropy loss
computation
Repository: incubator-singa
Updated Branches:
refs/heads/dev 26df5ac03 -> 21e4b2d79
SINGA-184 Add Cross Entropy loss computation
Implement Cross Entropy loss
Pass cpplint.py, test pass compilation
Todo: check test
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/efd7b627
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/efd7b627
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/efd7b627
Branch: refs/heads/dev
Commit: efd7b627bacb4acd6a3322468350f2b5399f725b
Parents: 3e2507b
Author: kaiping <ka...@comp.nus.edu.sg>
Authored: Fri May 27 12:09:30 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Tue May 31 22:14:09 2016 +0800
----------------------------------------------------------------------
src/model/loss/cross_entropy.h | 105 ++++++++++++++++++++++++++++++++++
test/singa/test_cross_entropy.cc | 66 +++++++++++++++++++++
2 files changed, 171 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/efd7b627/src/model/loss/cross_entropy.h
----------------------------------------------------------------------
diff --git a/src/model/loss/cross_entropy.h b/src/model/loss/cross_entropy.h
new file mode 100644
index 0000000..815b795
--- /dev/null
+++ b/src/model/loss/cross_entropy.h
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SRC_MODEL_LOSS_CROSS_ENTROPY_H_
+#define SRC_MODEL_LOSS_CROSS_ENTROPY_H_
+#include <stack>
+#include "singa/model/loss.h"
+
+namespace singa {
+
+/// Cross entropy is for cross entropy loss.
+class CrossEntropy : public Loss<Tensor> {
+ public:
+ /// Compute the loss values for each sample/instance given the prediction
+ /// and the target, which is sum {-log(prob_of_truth)}
+ /// Users can call Average(const Tensor&) to get the average
+ /// loss value over all samples in the batch.
+ Tensor Forward(const Tensor& prediction, const Tensor& target) override;
+
+ /// Compute the gradients of the loss values w.r.t. the prediction,
+ /// which is: if the entry x corresponds to ground truth,
+ /// then softmax(x) - 1; else, softmax(x)
+ Tensor Backward() override;
+
+ private:
+ // to buffer intermediate data, i.e., softmax(prediction), target
+ std::stack<Tensor> buf_;
+};
+
+Tensor CrossEntropy::Forward(const Tensor& prediction, const Tensor& target) {
+ CHECK(buf_.empty()) << "Do not call Forward successively for more than twice."
+ << " The calling pattern is [Forward|Evaluate] Backward";
+
+ size_t batchsize = 1;
+ if (prediction.nDim() > 1) batchsize = prediction.shape().at(0);
+ size_t dim = prediction.Size() / batchsize;
+ // a temporal Softmax layer for forward computation
+// LayerConf conf; // TODO(kaiping): this is currently commented
+// Softmax softmax_tmp;
+// softmax_tmp.Setup(conf);
+// Tensor softmax = softmax_tmp.Forward(0, prediction);
+
+ Tensor softmax(Shape{batchsize, dim}); // TODO(kaiping): Delete
+// softmax.SetValue<float>(0.5f); // TODO(kaiping): Delete
+
+ softmax.Reshape(Shape{batchsize, dim});
+ // buffer intermediate data
+ buf_.push(softmax);
+ buf_.push(target);
+
+ // Compute loss for each sample
+ Tensor loss(Shape{batchsize, 1});
+ float * pre_ptr = reinterpret_cast<float*>(softmax.blob()->mutable_data());
+ float * truth_ptr = reinterpret_cast<float*>(target.blob()->mutable_data());
+ float * loss_ptr = reinterpret_cast<float*>(loss.blob()->mutable_data());
+ for (size_t i = 0; i < batchsize; i++) {
+ int ilabel = static_cast<int>(truth_ptr[i]);
+ CHECK_GE(ilabel, 0);
+ float prob_of_truth = pre_ptr[ilabel];
+ loss_ptr[i] = -log(prob_of_truth);
+ pre_ptr += dim; // change to the next sample
+ }
+ return loss;
+}
+
+Tensor CrossEntropy::Backward() {
+ const Tensor& target = buf_.top();
+ buf_.pop();
+ Tensor softmax = buf_.top();
+ buf_.pop();
+
+ size_t batchsize = 1;
+ if (softmax.nDim() > 1)
+ batchsize = softmax.shape().at(0);
+ size_t dim = softmax.Size() / batchsize;
+ float * truth_ptr = reinterpret_cast<float*>(target.blob()->mutable_data());
+ float * pre_ptr = reinterpret_cast<float*>(softmax.blob()->mutable_data());
+ for (size_t i = 0; i < batchsize; i++) {
+ int ilabel = static_cast<int>(truth_ptr[i]);
+ // CHECK_GE(ilabel, 0);
+ pre_ptr[ilabel] -= 1.0;
+ pre_ptr += dim; // change to the next sample
+ }
+ return softmax;
+}
+} // namespace singa
+
+#endif // SRC_MODEL_LOSS_CROSS_ENTROPY_H_
+
+
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/efd7b627/test/singa/test_cross_entropy.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cross_entropy.cc b/test/singa/test_cross_entropy.cc
new file mode 100644
index 0000000..9bb2321
--- /dev/null
+++ b/test/singa/test_cross_entropy.cc
@@ -0,0 +1,66 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "gtest/gtest.h"
+#include "singa/core/tensor.h"
+#include "singa/core/device.h"
+#include "../src/model/loss/cross_entropy.h"
+
+using singa::Tensor;
+class TestCrossEntropy : public ::testing::Test {
+ protected:
+ virtual void SetUp() {
+ p.Reshape(singa::Shape{2, 4});
+ t.Reshape(singa::Shape{2, 1});
+ p.CopyDataFromHostPtr(pdat, sizeof(pdat) / sizeof(float));
+ t.CopyDataFromHostPtr(tdat, sizeof(pdat) / sizeof(float));
+ }
+ const float pdat[8] = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
+ const float tdat[2] = {0.0, 2.0};
+
+ singa::Tensor p, t;
+};
+
+TEST_F(TestCrossEntropy, CppForward) {
+ singa::CrossEntropy cross_entropy;
+ const Tensor& loss = cross_entropy.Forward(p, t);
+ auto ldat = loss.data<const float*>();
+
+ const float result_test = -log(0.25);
+ EXPECT_FLOAT_EQ(ldat[0], result_test);
+ EXPECT_FLOAT_EQ(ldat[1], result_test);
+}
+
+TEST_F(TestCrossEntropy, CppBackward) {
+ singa::CrossEntropy cross_entropy;
+ cross_entropy.Forward(p, t);
+ const Tensor& grad = cross_entropy.Backward();
+
+ auto gdat = grad.data<const float*>();
+ EXPECT_FLOAT_EQ(gdat[0], -0.75);
+ EXPECT_FLOAT_EQ(gdat[1], 0.25);
+ EXPECT_FLOAT_EQ(gdat[2], 0.25);
+ EXPECT_FLOAT_EQ(gdat[3], 0.25);
+ EXPECT_FLOAT_EQ(gdat[4], 0.25);
+ EXPECT_FLOAT_EQ(gdat[5], 0.25);
+ EXPECT_FLOAT_EQ(gdat[6], -0.75);
+ EXPECT_FLOAT_EQ(gdat[7], 0.25);
+}
[3/3] incubator-singa git commit: SINGA-184 Add Cross Entropy loss
computation
Posted by ka...@apache.org.
SINGA-184 Add Cross Entropy loss computation
Merge with asf dev.
Fix the bug for cuda crossentropy caused by the target type (should use
int instead of float).
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/21e4b2d7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/21e4b2d7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/21e4b2d7
Branch: refs/heads/dev
Commit: 21e4b2d79a4bd3acfb8b487cf96c197da464ae70
Parents: ec17aca 26df5ac
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Mon Jun 13 13:04:23 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Jun 13 13:04:23 2016 +0800
----------------------------------------------------------------------
CMakeLists.txt | 4 +-
cmake/Dependencies.cmake | 7 +-
cmake/ProtoBuf.cmake | 116 ----
include/singa/core/common.h | 2 +-
include/singa/core/tensor.h | 400 +++++++-------
src/CMakeLists.txt | 27 +-
src/core/tensor/math_kernel.cu | 707 +++++++++++++------------
src/core/tensor/math_kernel.h | 93 ++--
src/core/tensor/tensor.cc | 761 ++++++++++++++-------------
src/core/tensor/tensor_math.h | 404 +++++++-------
src/core/tensor/tensor_math_cpp.h | 610 +++++++++++++++------
src/core/tensor/tensor_math_cuda.h | 423 +++++++++++----
src/model/layer/activation.cc | 10 +-
src/model/layer/batchnorm.cc | 70 +++
src/model/layer/batchnorm.h | 84 +++
src/model/layer/cudnn_activation.cc | 13 +-
src/model/layer/cudnn_batchnorm.cc | 214 ++++++++
src/model/layer/cudnn_batchnorm.h | 60 +++
src/model/layer/cudnn_convolution.cc | 183 ++++---
src/model/layer/cudnn_lrn.cc | 118 +++++
src/model/layer/cudnn_lrn.h | 56 ++
src/model/layer/cudnn_pooling.cc | 7 +-
src/model/layer/cudnn_softmax.cc | 4 +-
src/model/layer/dense.cc | 86 +++
src/model/layer/dense.h | 70 +++
src/model/layer/flatten.cc | 55 ++
src/model/layer/flatten.h | 53 ++
src/model/layer/lrn.cc | 59 +++
src/model/layer/lrn.h | 70 +++
src/model/layer/prelu.cc | 139 +++++
src/model/layer/prelu.h | 59 +++
src/model/layer/softmax.cc | 10 +-
src/model/loss/softmax_cross_entropy.cc | 8 +-
src/proto/model.proto | 39 +-
test/CMakeLists.txt | 1 +
test/singa/test_activation.cc | 8 +-
test/singa/test_cross_entropy.cc | 4 +-
test/singa/test_cudnn_activation.cc | 6 +-
test/singa/test_cudnn_batchnorm.cc | 257 +++++++++
test/singa/test_cudnn_convolution.cc | 181 +++++++
test/singa/test_cudnn_lrn.cc | 205 ++++++++
test/singa/test_cudnn_softmax.cc | 6 +-
test/singa/test_dense.cc | 249 +++++++++
test/singa/test_flatten.cc | 144 +++++
test/singa/test_mse.cc | 1 -
test/singa/test_prelu.cc | 137 +++++
test/singa/test_sgd.cc | 2 +-
test/singa/test_softmax.cc | 12 +-
test/singa/test_tensor_math.cc | 303 ++++++++++-
49 files changed, 4873 insertions(+), 1664 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/CMakeLists.txt
----------------------------------------------------------------------
diff --cc CMakeLists.txt
index fbe3adc,fbe3adc..a9d9b17
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@@ -10,7 -10,7 +10,9 @@@ LIST(APPEND CMAKE_MODULE_PATH ${PROJECT
IF(UNIX OR APPLE)
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC -Wall")
ENDIF()
--
++IF(CMAKE_BUILD_TYPE=Debug)
++ SET(NVCC_FLAG "${NVCC_FLAG} -g -G ")
++ENDIF()
#message(STATUS "${CMAKE_CXX_FLAGS}")
SET(SINGA_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/include;${PROJECT_BINARY_DIR}")
#message(STATUS "include path: ${SINGA_INCLUDE_DIR}")
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --cc include/singa/core/tensor.h
index 865e1e4,cd750c5..8cfa705
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@@ -212,199 -213,171 +213,180 @@@ Tensor Reshape(const Tensor &in, Shape
/// Copy 'num' elements of src to dst.
/// The first 'src_offset' ('dst_offset') elements will be skipped.
- void CopyDataToFrom(Tensor *dst, const Tensor &src, size_t num,
- size_t src_offset = 0, size_t dst_offset = 0);
-
- // ==================Simple Linear Algebra Operations=========================
- Tensor Abs(const Tensor &t);
- Tensor Exp(const Tensor &t);
- Tensor Log(const Tensor &t);
- Tensor ReLU(const Tensor &t);
- Tensor Sigmoid(const Tensor &t);
- Tensor Sign(const Tensor &t);
- Tensor Sqrt(const Tensor &t);
- Tensor Square(const Tensor &t);
- Tensor Tanh(const Tensor &t);
+ void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
+ const size_t src_offset = 0, const size_t dst_offset = 0);
+
+ // =============Element-wise operations====================================
+ Tensor Abs(const Tensor &in);
+ Tensor Exp(const Tensor &in);
+ Tensor Log(const Tensor &in);
+ Tensor ReLU(const Tensor &in);
+ Tensor Sigmoid(const Tensor &in);
+ Tensor Sign(const Tensor &in);
+ Tensor Sqrt(const Tensor &in);
+ Tensor Square(const Tensor &in);
+ Tensor Tanh(const Tensor &in);
+
+ /// Element-wise opeartion, out[i]=in[i]^x
+ template <typename SType>
+ Tensor Pow(const Tensor &in, const SType x);
+ /// Element-wise opeartion, out[i]=in[i]^x
+ template <typename SType>
+ void Pow(const Tensor &in, const SType x, Tensor *out);
+ /// Element-wise opeartion, out[i]=baes[i]^exp[i]
+ Tensor Pow(const Tensor &base, const Tensor &exp);
+ /// Element-wise opeartion, out[i]=baes[i]^exp[i]
+ void Pow(const Tensor &base, const Tensor &exp, Tensor *out);
+ /// Element-wise operation, out[i]= (in[i] < x) ? 1.f : 0.f
template <typename SType>
- SType Sum(const Tensor &t);
- /// Sum elements in the Tensor, currently only support vector and matrix.
- /// if 'axis' is 0, sum all rows into a single row
- /// if 'axis' is 1, sum all columns into a single column
- /// TODO(wangwei) support arbitrary Tensor like numpy.sum
- Tensor Sum(const Tensor &t, int axis);
+ Tensor operator<(const Tensor &in, const SType x);
+ template <typename SType>
+ void LT(const Tensor &in, const SType x, Tensor *out);
- /// Average elements in the Tensor, currently only support vector and matrix.
- /// if 'axis' is 0, average all rows into a single row
- /// if 'axis' is 1, average all columns into a single column
- /// TODO(wangwei) support arbitrary Tensor like numpy.average
- Tensor Average(const Tensor &t, int axis);
- /// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
- Tensor SoftMax(const Tensor &in);
- /// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
- void SoftMax(const Tensor &in, Tensor *out);
+ /// Element-wise operation, out[i]= (in[i] <= x) ? 1.f : 0.f
+ template <typename SType>
+ Tensor operator<=(const Tensor &in, const SType x);
+ template <typename SType>
+ void LE(const Tensor &in, const SType x, Tensor *out);
-
+ /// Element-wise operation, out[i]= (in[i] > x) ? 1.f : 0.f
+ template <typename SType>
+ Tensor operator>(const Tensor &in, const SType x);
+ template <typename SType>
+ void GT(const Tensor &in, const SType x, Tensor *out);
- /// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis] rows,
- /// and shape_[axis+1]*...*shape_[nDim()] columns.
- /// and do softmax along each row.
- // Tensor Softmax(const Tensor& t, int axis = -1);
- // void Softmax(const Tensor& t, Tensor* ret, int axis = -1);
-
- /// Element-wise operation, ret[i]= (t[i] < x) ? 1.f : 0.f
- template <typename DType>
- Tensor operator<(const Tensor &t, const DType x);
- template <typename DType>
- void LT(const Tensor &t, DType x, Tensor *ret);
-
- /// Element-wise operation, ret[i]= (t[i] <= x) ? 1.f : 0.f
- template <typename DType>
- Tensor operator<=(const Tensor &t, const DType x);
- template <typename DType>
- void LE(const Tensor &t, DType x, Tensor *ret);
-
- /// Element-wise operation, ret[i]= (t[i] > x) ? 1.f : 0.f
- template <typename DType>
- Tensor operator>(const Tensor &t, const DType x);
- template <typename DType>
- void GT(const Tensor &t, DType x, Tensor *ret);
-
- /// Element-wise operation, ret[i]= (t[i] >= x) ? 1.f : 0.f
- template <typename DType>
- Tensor operator>=(const Tensor &t, const DType x);
- template <typename DType>
- void GE(const Tensor &t, DType x, Tensor *ret);
-
- /// Element-wise opeartion, ret[i]=t[i]^x
- template <typename DType>
- Tensor Pow(const Tensor &t, DType x);
- /// Element-wise opeartion, ret[i]=t[i]^x
- template <typename DType>
- void Pow(const Tensor &t, DType x, Tensor *ret);
- /// Element-wise opeartion, ret[i]=baes[i]^exp[i]
- Tensor Pow(const Tensor &base, Tensor exp);
- /// Element-wise opeartion, ret[i]=baes[i]^exp[i]
- void Pow(const Tensor &base, const Tensor &exp, Tensor *ret);
+ /// Element-wise operation, out[i]= (in[i] >= x) ? 1.f : 0.f
+ template <typename SType>
+ Tensor operator>=(const Tensor &in, const SType x);
+ template <typename SType>
+ void GE(const Tensor &in, const SType x, Tensor *out);
Tensor operator+(const Tensor &lhs, const Tensor &rhs);
- void Add(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
+ void Add(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator-(const Tensor &lhs, const Tensor &rhs);
- void Sub(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
+ void Sub(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator*(const Tensor &lhs, const Tensor &rhs);
- void EltwiseMult(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
+ void EltwiseMult(const Tensor &lhs, const Tensor &rhs, Tensor *out);
Tensor operator/(const Tensor &lhs, const Tensor &rhs);
- void Div(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
-
- template <typename DType>
- Tensor operator+(const Tensor &t, DType x);
- template <typename DType>
- void Add(const Tensor &t, DType x, Tensor *ret);
-
- template <typename DType>
- Tensor operator-(const Tensor &t, DType x);
- template <typename DType>
- void Sub(const Tensor &t, DType x, Tensor *ret);
+ void Div(const Tensor &lhs, const Tensor &rhs, Tensor *out);
- template <typename DType>
- Tensor operator*(const Tensor &t, DType x);
- template <typename DType>
- void EltwiseMult(const Tensor &t, DType x, Tensor *ret);
-
- template <typename DType>
- Tensor operator/(const Tensor &t, DType x);
- template <typename DType>
- void Div(const Tensor &t, DType x, Tensor *ret);
-
- // ================Blas operations============================================
- // We fix the scalar argument type to be float.
+ template <typename SType>
+ Tensor operator+(const Tensor &in, const SType x);
+ template <typename SType>
+ void Add(const Tensor &in, const SType x, Tensor *out);
- // ===== Level 1
- // TODO(wangwei) make amax/amin/asum a member function of tensor
- // void Amax(Tensor, Context* ctx); Get the index of the max value in a vector
- // void Asum(Tensor Context* ctx);
+ template <typename SType>
+ Tensor operator-(const Tensor &in, const SType x);
+ template <typename SType>
+ void Sub(const Tensor &in, const SType x, Tensor *out);
- // template <typename DType>
- // void Axpy(DType x, const Blob& t, Blob* ret, Context* ctx);
+ template <typename SType>
+ Tensor operator*(const Tensor &in, const SType x);
+ template <typename SType>
+ void EltwiseMult(const Tensor &in, const SType x, Tensor *out);
- /// Do matrix vector multipication or matrix matrix multiplication depdending
- /// on the Tensor shape. result = A * B
- Tensor Mult(const Tensor &A, const Tensor &B);
- /// Do matrix vector multipication or matrix matrix multiplication depdending
- /// on the Tensor shape. C = A * B
- void Mult(const Tensor &A, const Tensor &B, Tensor *C);
+ /// For each element e of Tensor 'in', compute e / x
+ template <typename SType>
+ Tensor operator/(const Tensor &in, const SType x);
+ /// For each element e of Tensor 'in', compute e / x into out
+ template <typename SType>
+ void Div(const Tensor &in, const SType x, Tensor *out);
- /// Do matrix vector multipication or matrix matrix multiplication depdending
- /// on the Tensor shape. ret = alpha lhs * rhs + beta * ret
- void Mult(const float alpha, const Tensor &lhs, const Tensor &rhs,
- const float beta, Tensor *C);
+ /// For each element e of Tensor 'in', compute x/e
+ template <typename SType>
+ Tensor Div(const SType x, const Tensor &in);
+ /// For each element e of Tensor 'in', compute x/e into 'out'
+ template <typename SType>
+ void Div(const SType x, const Tensor &in, Tensor *out);
- // ================Random operations==========================================
- /// For each element x set x = 1 if random() < p; otherwise x = 1.
- void Bernoulli(float p, Tensor *t);
- /// Fill in Tensor 't' following uniform distribution.
- void Uniform(float low, float high, Tensor *t);
- /// Fill in Tensor 't' following Gaussian distribution.
- void Gaussian(float mean, float std, Tensor *t);
+ template <typename SType>
+ SType Sum(const Tensor &in);
-
+ // ============Matrix (row/column) operations==================================
+ /// Average elements in the Tensor, currently only support vector and matrix.
+ /// if 'axis' is 0, average all rows into a single row
+ /// if 'axis' is 1, average all columns into a single column
+ /// TODO(wangwei) support arbitrary Tensor like numpy.average
+ Tensor Average(const Tensor &in, const int axis);
-/// Sum elements in the Tensor, currently only support vector and matrix.
-/// if 'axis' is 0, sum all rows into a single row
-/// if 'axis' is 1, sum all columns into a single column
-/// TODO(wangwei) support arbitrary Tensor like numpy.sum
-Tensor Sum(const Tensor &in, const int axis);
-/// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis-1] rows,
-/// and shape_[axis]*...*shape_[nDim()] columns.
-/// and do softmax along each row.
-Tensor SoftMax(const Tensor &in, const int axis = 0);
-void SoftMax(const Tensor &in, const int axis, Tensor *out);
- // follow the consistency guide
- // https://issues.apache.org/jira/browse/SINGA-182
- // ============Matrix vector operations=======================================
/// Add column 'v' with each column of matrix M
void AddColumn(const Tensor &v, Tensor *M);
- void AddColumn(const float alpha, const float beta, const Tensor &v,
+ /// For each column 'c' of matrix out, do c=alpha*v + beta*c
+ template <typename SType>
+ void AddColumn(const SType alpha, const SType beta, const Tensor &v,
Tensor *out);
- /// Sub column 'v' by each column of matrix M
- void SubColumn(const Tensor &v, Tensor *M);
- /// Multiply column 'v' and each column of matrix M; write results into 'out'
- void MultColumn(const Tensor &v, Tensor *M);
- /// Divide column 'v' by each column of matrix M; write results into 'out'
- void DivColumn(const Tensor &v, Tensor *M);
-
/// Add row 'v' with each row of matrix M; write results into 'out'
void AddRow(const Tensor &v, Tensor *out);
- void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M);
- /// Sub row 'v' by each row of matrix M; write results into 'out'
- void SubRow(const Tensor &v, Tensor *M);
- /// Multiply row 'v' with each row of matrix M; write results into 'out'
- void MultRow(const Tensor &v, Tensor *M);
+ /// For each row 'r' of matrix out, do r=alpha*v + beta*r
+ template <typename SType>
+ void AddRow(const SType alpha, const SType beta, const Tensor &v, Tensor *M);
+ /// Divide column 'v' by each column of matrix M; write results into 'out'
+ void DivColumn(const Tensor &v, Tensor *M);
/// Divide row 'v' by each row of matrix M; write results into 'out'
void DivRow(const Tensor &v, Tensor *M);
-
- /// Sum all rows of matrix M into a single row as 'out'
- void SumRows(const Tensor &M, Tensor *out);
+ /// Multiply column 'v' and each column of matrix M; write results into 'out'
+ void MultColumn(const Tensor &v, Tensor *M);
+ /// Multiply row 'v' with each row of matrix M; write results into 'out'
+ void MultRow(const Tensor &v, Tensor *M);
++/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
++Tensor SoftMax(const Tensor &in);
++/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
++void SoftMax(const Tensor &in, Tensor *out);
+ /// Sub column 'v' by each column of matrix M
+ void SubColumn(const Tensor &v, Tensor *M);
+ /// Sub row 'v' by each row of matrix M; write results into 'out'
+ void SubRow(const Tensor &v, Tensor *M);
/// Sum all columns of matrix M into a single column as 'out'
void SumColumns(const Tensor &M, Tensor *out);
+ /// Sum all rows of matrix M into a single row as 'out'
+ void SumRows(const Tensor &M, Tensor *out);
+
++/// Sum elements in the Tensor, currently only support vector and matrix.
++/// if 'axis' is 0, sum all rows into a single row
++/// if 'axis' is 1, sum all columns into a single column
++/// TODO(wangwei) support arbitrary Tensor like numpy.sum
++Tensor Sum(const Tensor &in, const int axis);
++
+ // ================Random operations==========================================
+ /// For each element x set x = 1 if random() < p; otherwise x = 1.
+ template <typename SType>
+ void Bernoulli(const SType p, Tensor *out);
+ /// Fill in Tensor 't' following Gaussian distribution.
+ template <typename SType>
+ void Gaussian(const SType mean, const SType std, Tensor *out);
+ /// Fill in Tensor 't' following uniform distribution.
+ template <typename SType>
+ void Uniform(const SType low, const SType high, Tensor *out);
+
+ // ================Blas operations============================================
+ // TODO(wangwei) make amax/amin/asum a member function of tensor
- /// For each element x of Tensor 'in', compute alpha/x
+ /// out = alpha*in + out
template <typename SType>
- Tensor Div(const SType alpha, const Tensor &in);
+ void Axpy(SType alpha, const Tensor &in, Tensor *out);
- /// For each element x of Tensor 'in', compute alpha/x into 'out'
+ /// Do matrix vector multipication or matrix matrix multiplication depdending
+ /// on the Tensor shape. result = A * B
+ Tensor Mult(const Tensor &A, const Tensor &B);
+ /// Do matrix vector multipication or matrix matrix multiplication depdending
+ /// on the Tensor shape. C = A * B
+ void Mult(const Tensor &A, const Tensor &B, Tensor *C);
-
+ /// Do matrix vector multipication or matrix matrix multiplication depdending
+ /// on the Tensor shape. out = alpha lhs * rhs + beta * out
template <typename SType>
- void Div(const SType alpha, const Tensor &in, Tensor *out);
-
- /*
- /// Multiply each column of the lhs matrix with the rhs column
- Tensor MultColumn(const Tensor &lhs, const Tensor &rhs);
- void MultColumn(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
- /// Multiply each row of the lhs matrix with the rhs row
- Tensor MultRow(const Tensor &lhs, const Tensor &rhs);
- void MultRow(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
- /// Div each row of the lhs matrix with the rhs column
- Tensor DivColumn(const Tensor &lhs, const Tensor &rhs);
- void DivColumn(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
- /// Divide each row of the lhs matrix by the rhs row
- Tensor DivRow(const Tensor &lhs, const Tensor &rhs);
- void DivRow(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
- */
+ void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
+ Tensor *C);
+
++// *****************
++// Misc.
++// ****************
+/// Compute the cross entropy loss given the prediction probability 'p' and
+/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector
+/// or 2-d matrix. 'loss' is 1-d vector. The loss is computed into p.
- void ComputeCrossEntropy(const Tensor& t, Tensor* p);
++void ComputeCrossEntropy(const Tensor& p, const Tensor& t, Tensor* loss);
+/// Compute the dx, given prediction probability 'p' (p=softmax(x)) and
+/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector
+/// or 2-d matrix. 'grad' has the same shape as 'p'. dx is computed into p.
+void SoftmaxCrossEntropyBwd(const Tensor& t, Tensor* p);
} // namespace singa
#endif // SINGA_CORE_TENSOR_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/src/CMakeLists.txt
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/src/core/tensor/math_kernel.cu
----------------------------------------------------------------------
diff --cc src/core/tensor/math_kernel.cu
index f12763e,b618f9b..21ebdd8
--- a/src/core/tensor/math_kernel.cu
+++ b/src/core/tensor/math_kernel.cu
@@@ -309,142 -258,170 +258,202 @@@ __global__ void KernelLE(const int num
}
}
- __global__ static void kernel_set_value(float *data, float value, int n) {
- int index = blockIdx.x * blockDim.x + threadIdx.x;
- int num_threads = blockDim.x * gridDim.x;
- for (; index < n; index += num_threads) {
- data[index] = value;
+ __global__ void KernelLT(const int num, const float *in, const float x,
+ float *out) {
+ for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < num;
+ idx += blockDim.x * gridDim.x) {
+ out[idx] = in[idx] < x ? 1.0f : 0.0f;
+ }
+ }
++__global__ void KernelComputeCrossEntropy(const size_t batchsize,
++ const size_t dim, const float *p,
++ const int *t, float *loss) {
++ size_t sample = blockIdx.x * blockDim.x + threadIdx.x;
++ size_t num_threads = blockDim.x * gridDim.x;
++ for (; sample < batchsize; sample += num_threads) {
++ float prob_of_truth = p[sample * dim + t[sample]];
++ loss[sample] = -std::log(max(prob_of_truth, FLT_MIN));
+ }
+}
- __global__ void kernel_threshold(const float *src_data, float *des_data,
- float alpha, int n) {
- int index = blockIdx.x * blockDim.x + threadIdx.x;
- int num_threads = blockDim.x * gridDim.x;
- for (; index < n; index += num_threads) {
- des_data[index] = src_data[index] < alpha ? 1.0f : 0.0f;
++__global__ void KernelSoftmaxCrossEntropyBwd(const size_t batchsize,
++ const size_t dim, const float *p,
++ const int *t, float *grad) {
++ size_t sample = blockIdx.x * blockDim.x + threadIdx.x;
++ size_t num_threads = blockDim.x * gridDim.x;
++ for (; sample < batchsize; sample += num_threads) {
++ size_t pos = sample * dim + t[sample];
++ grad[pos] = p[pos] - 1.0f; // TODO(wangwei) Consider p and grad are diff
+ }
+}
- void sum(int n, const float *in, float *out) {
- int threads_per_block = n > CU1DBLOCK ? CU1DBLOCK : n;
- // here, we only need one block
- int num_blocks = 1;
+ // ********************************
+ // Functions call kernels
+ // ********************************
+
+ void set(const size_t n, const float v, float *out, cudaStream_t s) {
+ KernelSet <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, v, out);
+ }
+
+ void abs(const size_t n, const float *in, float *out, cudaStream_t s) {
+ KernelAbs <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, out);
+ }
- kernel_sum_vec << <num_blocks, threads_per_block>>> (in, out, n);
+ void sign(const size_t n, const float *in, float *out, cudaStream_t s) {
+ KernelSign <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, out);
}
- void sum_row(int rows, int cols, int stride, const float *in, float *out) {
- int threads_per_block = rows > CU1DBLOCK ? CU1DBLOCK : rows;
- int num_blocks = cols;
+ void exp(const size_t n, const float *in, float *out, cudaStream_t s) {
+ KernelExp <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, out);
+ }
+
+ void log(const size_t n, const float *in, float *out, cudaStream_t s) {
+ KernelLog <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, out);
+ }
- kernel_sum_row << <num_blocks, threads_per_block>>>
- (in, out, rows, cols, stride);
+ void sqrt(const size_t n, const float *in, float *out, cudaStream_t s) {
+ KernelSqrt <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, out);
}
- void sum_col(int rows, int cols, int stride, const float *in, float *out) {
- int threads_per_block = cols > CU1DBLOCK ? CU1DBLOCK : cols;
- int num_blocks = rows;
+ void square(const size_t n, const float *in, float *out, cudaStream_t s) {
+ KernelSquare <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, out);
+ }
- kernel_sum_col << <num_blocks, threads_per_block>>>
- (in, out, rows, cols, stride);
+ void tanh(const size_t n, const float *in, float *out, cudaStream_t s) {
+ KernelTanh <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, out);
}
- void add_row(int rows, int cols, int stride, const float *in_row,
- const float *in_mat, float *out) {
- dim3 threads_per_block(CU2DBLOCK_X, CU2DBLOCK_Y);
- dim3 num_blocks(
- cols / threads_per_block.x + (cols % threads_per_block.x == 0 ? 0 : 1),
- rows / threads_per_block.y + (rows % threads_per_block.y == 0 ? 0 : 1));
- kernel_add_vec_row << <num_blocks, threads_per_block>>>
- (in_row, in_mat, out, rows, cols, stride);
+
+ void relu(const size_t n, const float *in, float *out, cudaStream_t s) {
+ KernelRelu <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, out);
}
- void add(int n, const float *a, const float *b, float *out) {
- kernel_add << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (a, b, out, n);
+ void sigmoid(const int n, const float *in, float *out, cudaStream_t s) {
+ KernelSigmoid <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, out);
}
- void sub(int n, const float *a, const float *b, float *out) {
- kernel_sub << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (a, b, out, n);
+ void softplus(const size_t n, const float *in, float *out, cudaStream_t s) {
+ KernelSoftplus <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, out);
}
- void exp(int n, const float *in, float *out) {
- kernel_exp << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void clamp(const size_t n, const float low, const float high, const float *in,
+ float *out, cudaStream_t s) {
+ KernelClamp <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, low, high, in, out);
}
- void log(int n, const float *in, float *out) {
- kernel_log << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void pow(const size_t n, const float *in, const float x, float *out,
+ cudaStream_t s) {
+ KernelPow <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, x, out);
}
- void sigmoid(int n, const float *in, float *out) {
- kernel_sigmoid << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void add(const size_t n, const float *in, const float x, float *out,
+ cudaStream_t s) {
+ KernelAdd <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, x, out);
}
- void sigmoid_grad(int n, const float *in, float *out) {
- kernel_sigmoid_grad << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void mult(const size_t n, const float *in, const float x, float *out,
+ cudaStream_t s) {
+ KernelMult <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in, x, out);
}
- void relu(int n, const float *in, float *out) {
- kernel_relu << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void div(const size_t n, const float x, const float *in, float *out,
+ cudaStream_t s) {
+ KernelDiv <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, x, in, out);
}
- void relu_grad(int n, const float *in, float *out) {
- kernel_relu_grad << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void threshold(const size_t n, const float x, const float *in, float *out,
+ cudaStream_t s) {
+ KernelThreshold <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, x, in, out);
}
- void tanh(int n, const float *in, float *out) {
- kernel_tanh << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void gt(const size_t num, const float *in, const float x, float *out,
+ cudaStream_t s) {
+ KernelGT <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in, x, out);
+ }
+ void ge(const size_t num, const float *in, const float x, float *out,
+ cudaStream_t s) {
+ KernelGE <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in, x, out);
+ }
+ void lt(const size_t num, const float *in, const float x, float *out,
+ cudaStream_t s) {
+ KernelLT <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in, x, out);
+ }
+ void le(const size_t num, const float *in, const float x, float *out,
+ cudaStream_t s) {
+ KernelLE <<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in, x, out);
}
- void tanh_grad(int n, const float *in, float *out) {
- kernel_tanh_grad << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void pow(const size_t n, const float *in1, const float *in2, float *out,
+ cudaStream_t s) {
+ KernelPow <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in1, in2, out);
}
- void softplus(int n, const float *in, float *out) {
- kernel_softplus << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void add(const size_t n, const float *in1, const float *in2, float *out,
+ cudaStream_t s) {
+ KernelAdd <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in1, in2, out);
}
- void softplus_grad(int n, const float *in, float *out) {
- kernel_softplus_grad << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void sub(const size_t n, const float *in1, const float *in2, float *out,
+ cudaStream_t s) {
+ KernelSub <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in1, in2, out);
}
- void square(int n, const float *in, float *out) {
- kernel_square << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void mult(const size_t n, const float *in1, const float *in2, float *out,
+ cudaStream_t s) {
+ KernelMult <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in1, in2, out);
}
- void square_grad(int n, const float *in, float *out) {
- kernel_square_grad << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void div(const size_t n, const float *in1, const float *in2, float *out,
+ cudaStream_t s) {
+ KernelDiv <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (n, in1, in2, out);
}
- void sqrt(int n, const float *in, float *out) {
- kernel_sqrt << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
+ void sum(const size_t n, const float *in, float *out, cudaStream_t s) {
+ int threads_per_block = n > CU1DBLOCK ? CU1DBLOCK : n;
+ // here, we only need one block
+ int num_blocks = 1;
+ KernelSum <<<num_blocks, threads_per_block>>> (n, in, out);
}
+
- void pow(int n, const float *a, const float *b, float *out) {
- kernel_pow << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (a, b, out, n);
++void ComputeCrossEntropy(size_t batchsize, const size_t dim, const float *p,
++ const int *t, float *loss, cudaStream_t stream) {
++ KernelComputeCrossEntropy <<<ceil(batchsize / CU1DBLOCKF), CU1DBLOCKF>>>
++ (batchsize, dim, p, t, loss);
+}
+
- void mult(int n, const float *a, const float *b, float *out) {
- kernel_mult << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (a, b, out, n);
++void SoftmaxCrossEntropyBwd(size_t batchsize, const size_t dim, const float *p,
++ const int *t, float *grad, cudaStream_t stream) {
++ KernelSoftmaxCrossEntropyBwd <<<ceil(batchsize / CU1DBLOCKF), CU1DBLOCKF>>>
++ (batchsize, dim, p, t, grad);
++}
+ /*
+ void square_grad(int n, const float *in, float *out, cudaStream_t s) {
+ kernel_square_grad <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
}
- void mult(int n, const float *a, const float x, float *out) {
- kernel_mult << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (a, x, out, n);
+ void tanh_grad(int n, const float *in, float *out, cudaStream_t s) {
+ kernel_tanh_grad <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
}
- void div(int n, const float *a, const float *b, float *out) {
- kernel_div << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (a, b, out, n);
+
+ void relu_grad(int n, const float *in, float *out, cudaStream_t s) {
+ kernel_relu_grad <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
}
- void set_value(int n, float v, float *out) {
- kernel_set_value << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (out, v, n);
+
+ void sigmoid_grad(int n, const float *in, float *out, cudaStream_t s) {
+ kernel_sigmoid_grad <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
}
- void threshold(int n, float alpha, const float *in, float *out) {
- kernel_threshold << <ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, alpha, n);
+ void softplus_grad(int n, const float *in, float *out, cudaStream_t s) {
+ kernel_softplus_grad <<<ceil(n / CU1DBLOCKF), CU1DBLOCKF>>> (in, out, n);
}
- // follow the consistency guide for math API
- __global__ void KernelDiv(const size_t num, const float alpha, const float *in,
- float *out) {
- for (size_t idx = blockIdx.x * blockDim.x + threadIdx.x; idx < num;
- idx += blockDim.x * gridDim.x) {
- out[idx] = alpha / in[idx];
+
+ __global__ void kernel_sum_col(const float *src_mat_data, float *dst_vec_data,
+ int rows, int cols, int stride) {
+ int index = blockIdx.x * blockDim.x + threadIdx.x;
+ int num_threads = blockDim.x * gridDim.x;
+ for (; index < rows; index += num_threads) {
+ dst_vec_data[index] = 0.0f;
+ for (int k = 0; k < cols; k++) {
+ dst_vec_data[index] += src_mat_data[index * stride + k];
+ }
}
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/src/core/tensor/math_kernel.h
----------------------------------------------------------------------
diff --cc src/core/tensor/math_kernel.h
index 09953e4,d8a58a5..976b78f
--- a/src/core/tensor/math_kernel.h
+++ b/src/core/tensor/math_kernel.h
@@@ -31,72 -31,66 +31,73 @@@ namespace singa
// TODO(wangwei) make all function templates.
namespace cuda {
- void sum(int n, const float *in, float *out);
- void sum_row(int rows, int cols, int stride, const float *in, float *out);
-
- void sum_col(int rows, int cols, int stride, const float *in, float *out);
-
- void add_row(int rows, int cols, int stride, const float *in_row,
- const float *in_mat, float *out);
-
- void add(int n, const float *a, const float *b, float *out);
-
- void sub(int n, const float *a, const float *b, float *out);
-
- void exp(int n, const float *in, float *out);
-
- void log(int n, const float *in, float *out);
-
- void sigmoid(int n, const float *in, float *out);
-
- void sigmoid_grad(int n, const float *in, float *out);
-
- void relu(int n, const float *in, float *out);
-
- void relu_grad(int n, const float *in, float *out);
-
- void tanh(int n, const float *in, float *out);
-
- void tanh_grad(int n, const float *in, float *out);
+ // 0 input
+ void set(const size_t n, const float v, float *out, cudaStream_t s);
+
+ // 1 input
+ void abs(const size_t n, const float *in, float *out, cudaStream_t s);
+ void sign(const size_t n, const float *in, float *out, cudaStream_t s);
+ void exp(const size_t n, const float *in, float *out, cudaStream_t s);
+ void log(const size_t n, const float *in, float *out, cudaStream_t s);
+ void sqrt(const size_t n, const float *in, float *out, cudaStream_t s);
+ void square(const size_t n, const float *in, float *out, cudaStream_t s);
+ void tanh(const size_t n, const float *in, float *out, cudaStream_t s);
+ void relu(const size_t n, const float *in, float *out, cudaStream_t s);
+ void sigmoid(const int n, const float *in, float *out, cudaStream_t s);
+ void softplus(const size_t n, const float *in, float *out, cudaStream_t s);
+ void clamp(const size_t n, const float low, const float high, const float *in,
+ float *out, cudaStream_t s);
+
+ void pow(const size_t n, const float *in, const float x, float *out,
+ cudaStream_t s);
- void softplus(int n, const float *in, float *out);
+ void add(const size_t n, const float *in, const float x, float *out,
+ cudaStream_t s);
- void softplus_grad(int n, const float *in, float *out);
+ void mult(const size_t n, const float *in, const float x, float *out,
+ cudaStream_t s);
- void square(int n, const float *in, float *out);
+ void div(const size_t n, const float x, const float *in, float *out,
+ cudaStream_t s);
- void square_grad(int n, const float *in, float *out);
+ void threshold(const size_t n, const float x, const float *in, float *out,
+ cudaStream_t s);
- void sqrt(int n, const float *in, float *out);
+ void gt(const size_t num, const float *in, const float x, float *out,
+ cudaStream_t s);
+ void ge(const size_t num, const float *in, const float x, float *out,
+ cudaStream_t s);
+ void lt(const size_t num, const float *in, const float x, float *out,
+ cudaStream_t s);
+ void le(const size_t num, const float *in, const float x, float *out,
+ cudaStream_t s);
- void pow(int n, const float *a, const float *b, float *out);
+ // 2 inputs
+ void pow(const size_t n, const float *in1, const float *in2, float *out,
+ cudaStream_t s);
- void mult(int n, const float *a, const float *b, float *out);
+ void add(const size_t n, const float *in1, const float *in2, float *out,
+ cudaStream_t s);
- void mult(int n, const float *a, const float x, float *out);
+ void sub(const size_t n, const float *in1, const float *in2, float *out,
+ cudaStream_t s);
- void div(int n, const float *a, const float *b, float *out);
+ void mult(const size_t n, const float *in1, const float *in2, float *out,
+ cudaStream_t s);
- void set_value(int n, float v, float *out);
+ void div(const size_t n, const float *in1, const float *in2, float *out,
+ cudaStream_t s);
- void threshold(int n, float alpha, const float *in, float *out);
+ void sum(const size_t n, const float *in, float *out, cudaStream_t s);
- // follow the consistency guide for math API
+void ComputeCrossEntropy(const size_t batchsize, const size_t dim,
+ const float *p, const int *t, float *loss,
+ cudaStream_t stream);
- void Div(const size_t num, const float x, const float *in, float *out,
- cudaStream_t s);
- void GT(size_t num, const float *in, const float x, float *out, cudaStream_t s);
- void GE(size_t num, const float *in, const float x, float *out, cudaStream_t s);
- void LT(size_t num, const float *in, const float x, float *out, cudaStream_t s);
- void LE(size_t num, const float *in, const float x, float *out, cudaStream_t s);
- void Set(const size_t num, const float x, float *out, cudaStream_t s);
+void SoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim,
+ const float *p, const int *t, float *grad,
+ cudaStream_t stream);
+
} // cuda
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --cc src/core/tensor/tensor.cc
index 1ac25c6,e62386a..4e0d94b
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@@ -592,42 -557,8 +556,8 @@@ void AddRow(const SType alpha, const ST
Mult(alpha, one, vmat, beta, M);
}
}
- void ComputeCrossEntropy(const Tensor& t, Tensor* p) {
- CHECK_LE(p->nDim(), 2u);
- CHECK_LE(t.nDim(), 2u); // TODO(wangwei) consider multi-labels.
- size_t batchsize = 1;
- if (p->nDim() == 2u) batchsize = p->shape(0);
- size_t dim = p->Size() / batchsize;
- TYPE_LANG_SWITCH(p->data_type(), DType, p->device()->lang(), Lang, {
- p->device()->Exec([batchsize, dim, t, p](Context *ctx) {
- ComputeCrossEntropy<DType, Lang>(batchsize, dim, p->blob(), t.blob(),
- p->blob(), ctx);
- }, {p->blob(), t.blob()}, {p->blob()});
- });
- }
-
- template <typename SType> Tensor Div(const SType alpha, const Tensor &in) {
- Tensor out(in.shape(), in.device(), in.data_type());
- Div(alpha, in, &out);
- return out;
- }
-
- template Tensor Div<float>(const float, const Tensor &);
-
- template <typename SType>
- void Div(const SType alpha, const Tensor &in, Tensor *out) {
- CheckDataTypeAndLang(in, *out);
- CHECK(in.shape() == out->shape());
- TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, {
- // TODO(wangwei) type cast SType to DType;
- in.device()->Exec(
- [alpha, in, out](Context *ctx) {
- Div<DType, Lang>(in.Size(), alpha, in.blob(), out->blob(), ctx);
- },
- {in.blob()}, {out->blob()});
- });
- }
- template void Div<float>(const float, const Tensor &, Tensor *);
-template <>
++template
+ void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M);
/// Divide column 'v' by each column of matrix M; write results into 'out'
void DivColumn(const Tensor &v, Tensor *M) {
@@@ -725,4 -639,92 +638,122 @@@ void SumRows(const Tensor &M, Tensor *v
Mult(X, one, v);
}
}
+ // ====================Random operations=====================================
+ template <typename SType>
+ void Bernoulli(const SType p, Tensor *out) {
+ TYPE_LANG_SWITCH(out->data_type(), DType, out->device()->lang(), Lang, {
+ auto prob = TypeCast<SType, DType>(p);
+ out->device()->Exec([prob, out](Context *ctx) {
+ Bernoulli<DType, Lang>(out->Size(), prob, out->blob(), ctx);
+ }, {}, {out->blob()}, true);
+ });
+ }
+ template void Bernoulli<float>(const float p, Tensor *out);
+
+ template <typename SType>
+ void Uniform(const SType low, const SType high, Tensor *out) {
+ TYPE_LANG_SWITCH(out->data_type(), DType, out->device()->lang(), Lang, {
+ auto l = TypeCast<SType, DType>(low);
+ auto h = TypeCast<SType, DType>(high);
+ out->device()->Exec([l, h, out](Context *ctx) {
+ Uniform<DType, Lang>(out->Size(), l, h, out->blob(), ctx);
+ }, {}, {out->blob()}, true);
+ });
+ }
+ template void Uniform<float>(const float low, const float high, Tensor *out);
+
+ template <typename SType>
+ void Gaussian(const SType mean, const SType std, Tensor *out) {
+ TYPE_LANG_SWITCH(out->data_type(), DType, out->device()->lang(), Lang, {
+ auto m = TypeCast<SType, DType>(mean);
+ auto s = TypeCast<SType, DType>(std);
+ out->device()->Exec([m, s, out](Context *ctx) {
+ Gaussian<DType, Lang>(out->Size(), m, s, out->blob(), ctx);
+ }, {}, {out->blob()}, true);
+ });
+ }
+ template void Gaussian<float>(const float mean, const float std, Tensor *out);
+
+ // ================Blas operations============================================
+ template <typename SType>
+ void Axpy(const SType alpha, const Tensor &in, Tensor *out) {
+ TYPE_LANG_SWITCH(in.data_type(), DType, in.device()->lang(), Lang, {
+ auto a = TypeCast<SType, DType>(alpha);
+ out->device()->Exec([a, in, out](Context *ctx) {
+ Axpy<DType, Lang>(in.Size(), a, in.blob(), out->blob(), ctx);
+ }, {in.blob(), out->blob()}, {out->blob()});
+ });
+ }
-template <>
-void Axpy(const float alpha, const Tensor &in, Tensor *out);
++template void Axpy(const float alpha, const Tensor &in, Tensor *out);
+
+ Tensor Mult(const Tensor &A, const Tensor &B) {
+ Shape s;
+ s.push_back(A.shape(0));
+ if (B.nDim() == 2) s.push_back(B.shape(1));
+ Tensor out(s, A.device(), A.data_type());
+ Mult(A, B, &out);
+ return out;
+ }
+
+ void Mult(const Tensor &A, const Tensor &B, Tensor *out) {
+ Mult(1.0f, A, B, 0.0f, out);
+ }
+
+ template <typename SType>
+ void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
+ Tensor *C) {
+ CHECK_EQ(A.shape().size(), 2u);
+ if (B.nDim() == 1u) {
+ TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, {
+ auto a = TypeCast<SType, DType>(alpha);
+ auto b = TypeCast<SType, DType>(beta);
+ C->device()->Exec([a, A, b, B, C](Context *ctx) {
+ GEMV<DType, Lang>(A.transpose(), A.shape(0), A.shape(1), a, A.blob(),
+ B.blob(), b, C->blob(), ctx);
+ }, {A.blob(), B.blob()}, {C->blob()});
+ });
+ } else {
+ CHECK(!C->transpose());
+ TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, {
+ auto a = TypeCast<SType, DType>(alpha);
+ auto b = TypeCast<SType, DType>(beta);
+ C->device()->Exec([a, A, b, B, C](Context *ctx) {
+ GEMM<DType, Lang>(A.transpose(), B.transpose(), A.shape(0), B.shape(1),
+ A.shape(1), a, A.blob(), B.blob(), b, C->blob(), ctx);
+ }, {A.blob(), B.blob()}, {C->blob()});
+ });
+ }
+ }
+
++
++// ************************
++// Misc.
++// ***********************
++void ComputeCrossEntropy(const Tensor &p, const Tensor &t, Tensor *loss) {
++ CHECK_LE(p.nDim(), 2u);
++ CHECK_LE(t.nDim(), 2u); // TODO(wangwei) consider multi-labels.
++ size_t batchsize = 1;
++ if (p.nDim() == 2u) batchsize = p.shape(0);
++ size_t dim = p.Size() / batchsize;
++ TYPE_LANG_SWITCH(p.data_type(), DType, p.device()->lang(), Lang, {
++ p.device()->Exec([batchsize, dim, t, p, loss](Context *ctx) {
++ ComputeCrossEntropy<DType, Lang>(batchsize, dim, p.blob(), t.blob(),
++ loss->blob(), ctx);
++ }, {p.blob(), t.blob()}, {loss->blob()});
++ });
++}
++void SoftmaxCrossEntropyBwd(const Tensor &t, Tensor *p) {
++ CHECK_LE(p->nDim(), 2u);
++ CHECK_LE(t.nDim(), 2u); // TODO(wangwei) consider multi-labels.
++ size_t batchsize = 1;
++ if (p->nDim() == 2u)
++ batchsize = p->shape(0);
++ size_t dim = p->Size() / batchsize;
++ TYPE_LANG_SWITCH(p->data_type(), DType, p->device()->lang(), Lang, {
++ p->device()->Exec([batchsize, dim, t, p](Context *ctx) {
++ SoftmaxCrossEntropyBwd<DType, Lang>(batchsize, dim, p->blob(), t.blob(),
++ p->blob(), ctx);
++ }, {p->blob(), t.blob()}, {p->blob()});
++ });
++}
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/src/core/tensor/tensor_math.h
----------------------------------------------------------------------
diff --cc src/core/tensor/tensor_math.h
index bcf4908,b86e1cb..12490d1
--- a/src/core/tensor/tensor_math.h
+++ b/src/core/tensor/tensor_math.h
@@@ -269,109 -293,74 +293,95 @@@ void Scale(const size_t num, const DTyp
template <typename DType, typename Lang>
void Dot(const size_t num, const Blob *in1, const Blob *in2, DType *out,
Context *ctx) {
- LOG(FATAL) << "Not Implemented";
+ LOG(FATAL) << "Dot Not Implemented";
}
- // ===== Level 2
- /// ret = alpha * op(A) * v + beta * ret.
- /// op(A) = A if trans = false; A^T otherwise; rows(op(A)) = m, cols(op(A)) = n.
+ /// out = alpha * A * v + beta * out.
+ /// transA indicates if the internal data layout is transposed of A
template <typename DType, typename Lang>
- void GEMV(bool trans, int m, int n, DType alpha, const Blob *A, const Blob *v,
- DType beta, Blob *ret, Context *ctx) {
- LOG(FATAL) << "Not Implemented";
+ void GEMV(bool trans, const size_t m, const size_t n, const DType alpha,
+ const Blob *A, const Blob *v, const DType beta, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "GEMV Not Implemented";
}
- // ===== Level 3
-
- // ================Random functions===========================================
- /// Each element of ret would be 1 with prob p and 0 with 1-p. 0<= p <= 1
- // Get the random generator from 'ctx'
- // If DType is not float, then convert the threshold to DType
- template <typename DType, typename Lang>
- void Bernoulli(int count, float p, Blob *ret, Context *ctx) {
- LOG(FATAL) << "Not Implemented";
- }
- // The random generator should be extracted from ctx.
- // If DType is not float, then convert the low and high to DType
+ /// multiply a matrix with a diagnoal matrix constructed using values from 'v'.
+ /// if matrix_lef_side is true, do M*v; else do v*M
template <typename DType, typename Lang>
- void Uniform(int count, float low, float high, Blob *ret, Context *ctx) {
- LOG(FATAL) << "Not Implemented";
+ void DGMM(const bool side_right, const size_t nrow, const size_t ncol,
+ const Blob *M, const Blob *v, Blob *out, Context *ctx) {
+ LOG(FATAL) << "DGMM Not Implemented";
}
- // The random generator should be extracted from ctx.
- // If DType is not float, then convert the mean and std to DType
+
+ /// C = alpha * A * B + beta * C.
+ /// transA indicates if the internal data layout is transposed of A
template <typename DType, typename Lang>
- void Gaussian(int count, float mean, float std, Blob *ret, Context *ctx) {
- LOG(FATAL) << "Not Implemented";
+ void GEMM(const bool transA, const bool transB, const size_t nrowA,
+ const size_t ncolB, const size_t ncolA, const DType alpha,
+ const Blob *A, const Blob *B, const DType beta, Blob *C,
+ Context *ctx) {
+ LOG(FATAL) << "GEMM Not Implemented";
}
- // ========follow the consistency guide of math API
-
+/// Divide alpha by each element of 'in'.
+// following the consistency guide.
+template <typename DType, typename Lang>
+void ComputeCrossEntropy(const size_t batchsize, const size_t dim,
+ const Blob *p, const Blob *t, Blob *loss,
+ Context *ctx) {
+ LOG(FATAL) << "Not Implemented";
+}
- template <typename DType, typename Lang>
- void Div(const size_t num, const DType alpha, const Blob *in, Blob *out,
- Context *ctx) {
- LOG(FATAL) << "Not Implemented";
- }
+
- /// multiply a matrix with a diagnoal matrix constructed using values from 'v'.
- /// if matrix_lef_side is true, do M*v; else do v*M
+template <typename DType, typename Lang>
- void DGMM(const bool side_right, const size_t nrow, const size_t ncol,
- const Blob *M, const Blob *v, Blob *out, Context *ctx) {
++void SoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim,
++ const Blob *p, const Blob *t, Blob *grad,
++ Context *ctx) {
+ LOG(FATAL) << "Not Implemented";
+}
+
- /// C = alpha * A * B + beta * C.
- /// transA indicates if the internal data layout is transposed of A
+ // **************************************
+ // Matrix functions
+ // **************************************
+ /*
+ /// Add the vector v to every column of A as the column of out
template <typename DType, typename Lang>
- void GEMM(const bool transA, const bool transB, const size_t nrowA,
- const size_t ncolB, const size_t ncolA, const DType alpha,
- const Blob *A, const Blob *B, const DType beta, Blob *C,
- Context *ctx) {
- LOG(FATAL) << "Not Implemented";
+ void AddCol(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v,
+ Blob *out, Context *ctx) {
+ LOG(FATAL) << "AddCol Not Implemented";
}
- /// ret[i]=(input[i]<x)?1.f:0.f
+ // TODO(wangwei) unify AddRow and AddCol.
+ /// Add the vector v to every row of A as the row of out
template <typename DType, typename Lang>
- void LT(const size_t num, const Blob *in, const DType x, Blob *out,
- Context *ctx) {
- LOG(FATAL) << "Not Implemented";
+ void AddRow(const size_t nrow, const size_t ncol, const Blob *A, const Blob *v,
+ Blob *out, Context *ctx) {
+ LOG(FATAL) << "AddRow Not Implemented";
}
- /// ret[i]=(input[i]<=x)?1.f:0.f
- template <typename DType, typename Lang>
- void LE(const size_t num, const Blob *in, const DType x, Blob *out,
- Context *ctx) {
- LOG(FATAL) << "Not Implemented";
- }
- /// ret[i]=(input[i]>x)?1.f:0.f
+ /// outer-product.
+ /// in1 and in2 are vectors of len m and n. out is matrix of shape m * n
template <typename DType, typename Lang>
- void GT(const size_t num, const Blob *in, const DType x, Blob *out,
- Context *ctx) {
- LOG(FATAL) << "Not Implemented";
+ void Outer(const size_t m, const size_t n, const Blob *in1, const Blob *in2,
+ Blob *out, Context *ctx) {
+ LOG(FATAL) << "Outer Not Implemented";
}
- /// ret[i]=(input[i]>=x)?1.f:0.f
+
+ /// Sum the columns of the in matrix into a vector
template <typename DType, typename Lang>
- void GE(const size_t num, const Blob *in, const DType x, Blob *out,
- Context *ctx) {
- LOG(FATAL) << "Not Implemented";
+ void SumColumns(const size_t nrow, const size_t ncol, const Blob *in, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "SumColumns Not Implemented";
}
+template <typename DType, typename Lang>
+void Set(const size_t num, const DType x, Blob *out, Context *ctx) {
+ LOG(FATAL) << "Not Implemented";
+}
+
+ // TODO(wangwei) unify SumRow and SumCol.
+ /// Sum the rows of the in matrix into a vector
template <typename DType, typename Lang>
- void SoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim,
- const Blob *p, const Blob *t, Blob *grad,
- Context *ctx) {
- LOG(FATAL) << "Not Implemented";
+ void SumRows(const size_t nrow, const size_t ncol, const Blob *in, Blob *out,
+ Context *ctx) {
+ LOG(FATAL) << "SumRows Not Implemented";
}
-
+ */
} // namespace singa
#endif // SINGA_CORE_MATH_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/src/core/tensor/tensor_math_cpp.h
----------------------------------------------------------------------
diff --cc src/core/tensor/tensor_math_cpp.h
index 907c656,0b280a3..c5d092b
--- a/src/core/tensor/tensor_math_cpp.h
+++ b/src/core/tensor/tensor_math_cpp.h
@@@ -323,32 -420,196 +422,229 @@@ void GEMM<float, lang::Cpp>(const bool
cblas_sgemm(CblasRowMajor, transa, transb, nrowA, ncolB, ncolA, alpha, APtr,
lda, BPtr, ldb, beta, CPtr, ldc);
}
- #endif // USE_CBLAS
+
+ #else
template <>
- void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out,
- Context *ctx) {
+ void Amax<float, lang::Cpp>(const size_t num, const Blob *in, size_t *out,
+ Context *ctx) {
+ size_t maxPos = 0;
+ float maxVal = 0;
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
+ if (i == 0) {
+ maxVal = inPtr[i];
+ } else if (inPtr[i] > maxVal) {
+ maxVal = inPtr[i];
+ maxPos = i;
+ }
+ }
+ *out = maxPos;
+ }
+ template <>
+ void Amin<float, lang::Cpp>(const size_t num, const Blob *in, size_t *out,
+ Context *ctx) {
+ size_t minPos = 0;
+ float minVal = 0;
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
+ if (i == 0) {
+ minVal = inPtr[i];
+ } else if (inPtr[i] > minVal) {
+ minVal = inPtr[i];
+ minPos = i;
+ }
+ }
+ *out = minPos;
+ }
+
+ template <>
+ void Asum<float, lang::Cpp>(const size_t num, const Blob *in, float *out,
+ Context *ctx) {
+ float sum = 0;
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
+ sum += fabs(inPtr[i]);
+ }
+ }
+
+ template <>
+ void Axpy<float, lang::Cpp>(const size_t num, const float alpha, const Blob *in,
+ Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
- for (size_t i = 0; i < num; i++) outPtr[i] = x;
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] += alpha * inPtr[i];
+ }
}
+
+ template <>
+ void Scale<float, lang::Cpp>(const size_t num, const float x, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ for (size_t i = 0; i < num; i++) {
+ outPtr[i] *= x;
+ }
+ }
+
+ template <>
+ void Dot<float, lang::Cpp>(const size_t num, const Blob *in1, const Blob *in2,
+ float *out, Context *ctx) {
+ float sum = 0;
+ const float *in1Ptr = static_cast<const float *>(in1->data());
+ const float *in2Ptr = static_cast<const float *>(in2->data());
+ for (size_t i = 0; i < num; i++) {
+ sum += in1Ptr[i] * in2Ptr[i];
+ }
+ }
+
+ template <>
+ void GEMV<float, lang::Cpp>(bool trans, const size_t m, const size_t n,
+ const float alpha, const Blob *A, const Blob *v,
+ const float beta, Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *APtr = static_cast<const float *>(A->data());
+ const float *vPtr = static_cast<const float *>(v->data());
+ for (size_t r = 0; r < m; r++) {
+ float sum = 0;
+ for (size_t c = 0; c < n; c++) {
+ size_t idx = trans ? c * m + r : r * n + c;
+ sum += APtr[idx] * vPtr[c];
+ }
+ outPtr[r] = alpha * sum + beta * outPtr[r];
+ }
+ }
+
+ #endif // USE_CBLAS
++template <>
++void ComputeCrossEntropy<float, lang::Cpp>(const size_t batchsize,
++ const size_t dim, const Blob *p,
++ const Blob *t, Blob *loss,
++ Context *ctx) {
++ const float *pPtr = static_cast<const float *>(p->data());
++ const int *tPtr = static_cast<const int *>(t->data());
++ float *lossPtr = static_cast<float *>(loss->mutable_data());
++ for (size_t i = 0; i < batchsize; i++) {
++ int truth_idx = tPtr[i];
++ CHECK_GE(truth_idx, 0);
++ float prob_of_truth = pPtr[i * dim + truth_idx];
++ lossPtr[i] = -std::log(std::max(prob_of_truth, FLT_MIN));
++ }
++}
++
+template <>
+void SoftmaxCrossEntropyBwd<float, lang::Cpp>(const size_t batchsize,
+ const size_t dim, const Blob *p,
+ const Blob *t,
+ Blob *grad, Context *ctx) {
+ CHECK_EQ(p, grad) << "Use the same pointer to optimize performance";
+ // const float* pPtr = static_cast<const float*>(p->data());
- const float *tPtr = static_cast<const float *>(t->data());
++ const int *tPtr = static_cast<const int *>(t->data());
+ float *gradPtr = static_cast<float *>(grad->mutable_data());
+
+ for (size_t i = 0; i < batchsize; i++) {
+ int truth_idx = static_cast<int>(tPtr[i]);
+ CHECK_GE(truth_idx, 0);
+ gradPtr[i * dim + truth_idx] -= 1.0;
+ }
+}
+
+ // =========Matrix operations ================================================
+ /*
+ template <>
+ void AddCol<float, lang::Cpp>(const size_t nrow, const size_t ncol,
+ const Blob *A, const Blob *v, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *APtr = static_cast<const float *>(A->data());
+ const float *vPtr = static_cast<const float *>(v->data());
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[offset + c] = APtr[offset + c] + vPtr[r];
+ }
+ }
+ }
+
+ template <>
+ void AddRow<float, lang::Cpp>(const size_t nrow, const size_t ncol,
+ const Blob *A, const Blob *v, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *APtr = static_cast<const float *>(A->data());
+ const float *vPtr = static_cast<const float *>(v->data());
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[offset + c] = APtr[offset + c] + vPtr[c];
+ }
+ }
+ }
+ template <>
+ void Outer<float, lang::Cpp>(const size_t m, const size_t n, const Blob *in1,
+ const Blob *in2, Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *in1Ptr = static_cast<const float *>(in1->data());
+ const float *in2Ptr = static_cast<const float *>(in2->data());
+ for (size_t r = 0; r < m; r++) {
+ size_t offset = r * n;
+ for (size_t c = 0; c < n; c++) {
+ outPtr[offset + c] = in1Ptr[r] * in2Ptr[c];
+ }
+ }
+ }
+ template <>
+ void Softmax<float, lang::Cpp>(const size_t nrow, const size_t ncol,
+ const Blob *in, Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ float *bPtr = new float[ncol];
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ float denom = 0.f;
+ for (size_t c = 0; c < ncol; c++) {
+ bPtr[c] = exp(inPtr[offset + c]);
+ denom += bPtr[c];
+ }
+ for (size_t c = 0; c < ncol; c++) {
+ size_t idx = offset + c;
+ outPtr[idx] = bPtr[c] / denom;
+ }
+ }
+ delete bPtr;
+ }
+
+ template <>
+ void SumColumns<float, lang::Cpp>(const size_t nrow, const size_t ncol,
+ const Blob *in, Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[c] = 0.f;
+ }
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[c] += inPtr[offset + c];
+ }
+ }
+ }
+
+ template <>
+ void SumRows<float, lang::Cpp>(const size_t nrow, const size_t ncol,
+ const Blob *in, Blob *out, Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ const float *inPtr = static_cast<const float *>(in->data());
+ for (size_t r = 0; r < nrow; r++) {
+ size_t offset = r * ncol;
+ outPtr[r] = 0.f;
+ for (size_t c = 0; c < ncol; c++) {
+ outPtr[r] += inPtr[offset + c];
+ }
+ }
+ }
+ */
} // namespace singa
#endif // SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/src/core/tensor/tensor_math_cuda.h
----------------------------------------------------------------------
diff --cc src/core/tensor/tensor_math_cuda.h
index c69620c,e2597d5..9a8839e
--- a/src/core/tensor/tensor_math_cuda.h
+++ b/src/core/tensor/tensor_math_cuda.h
@@@ -131,55 -398,6 +398,28 @@@ void GEMM<float, lang::Cuda>(const boo
BPtr, ldb, APtr, lda, &beta, CPtr, ldc));
}
+template <>
- void GE<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
- Blob* out, Context *ctx) {
- float* outPtr = static_cast<float*>(out->mutable_data());
- const float* inPtr = static_cast<const float*>(in->data());
- cuda::GE(num, inPtr, x, outPtr, ctx->stream);
- }
- template <>
- void GT<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
- Blob* out, Context *ctx) {
- float* outPtr = static_cast<float*>(out->mutable_data());
- const float* inPtr = static_cast<const float*>(in->data());
- cuda::GT(num, inPtr, x, outPtr, ctx->stream);
- }
- template <>
- void LE<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
- Blob* out, Context *ctx) {
- float* outPtr = static_cast<float*>(out->mutable_data());
- const float* inPtr = static_cast<const float*>(in->data());
- cuda::LE(num, inPtr, x, outPtr, ctx->stream);
- }
- template <>
- void LT<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
- Blob* out, Context *ctx) {
- float* outPtr = static_cast<float*>(out->mutable_data());
- const float* inPtr = static_cast<const float*>(in->data());
- cuda::LT(num, inPtr, x, outPtr, ctx->stream);
- }
-
- template<>
- void Set<float, lang::Cuda>(const size_t num, const float x, Blob *out,
- Context *ctx) {
- float *outPtr = static_cast<float *>(out->mutable_data());
- cuda::Set(num, x, outPtr, ctx->stream);
++void ComputeCrossEntropy<float, lang::Cuda>(const size_t batchsize,
++ const size_t dim, const Blob *p,
++ const Blob *t, Blob *loss,
++ Context *ctx) {
++ const float *pPtr = static_cast<const float *>(p->data());
++ const int *tPtr = static_cast<const int *>(t->data());
++ float *lossPtr = static_cast<float *>(loss->mutable_data());
++ cuda::ComputeCrossEntropy(batchsize, dim, pPtr, tPtr, lossPtr, ctx->stream);
+}
-
+template <>
+void SoftmaxCrossEntropyBwd<float, lang::Cuda>(const size_t batchsize,
+ const size_t dim, const Blob *p,
+ const Blob *t, Blob *grad,
+ Context *ctx) {
+ CHECK_EQ(p, grad) << "Use the same pointer to optimize performance";
+ const float *pPtr = static_cast<const float *>(p->data());
+ const int *tPtr = static_cast<const int *>(t->data());
+ float *gradPtr = static_cast<float *>(grad->mutable_data());
+ cuda::SoftmaxCrossEntropyBwd(batchsize, dim, pPtr, tPtr, gradPtr,
+ ctx->stream);
+}
-
} // namespace singa
#endif // USE_CUDA
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/src/model/layer/softmax.cc
----------------------------------------------------------------------
diff --cc src/model/layer/softmax.cc
index 8af1d76,b379fc1..f554f25
--- a/src/model/layer/softmax.cc
+++ b/src/model/layer/softmax.cc
@@@ -25,14 -25,16 +25,17 @@@ void Softmax::Setup(const LayerConf& co
}
const Tensor Softmax::Forward(int flag, const Tensor& input) {
+ Tensor output;
if (input.nDim() == 1) {
- buf_.push(SoftMax(input));
- Tensor tmp = Reshape(input, Shape{1, input.Size()});
- output = SoftMax(tmp, 0);
++ output = SoftMax(input);
} else {
- output = SoftMax(input, axis_);
+ size_t nrow = Product(input.shape(), 0, axis_);
+ const Tensor& tmp = Reshape(input, Shape{nrow, input.Size() / nrow});
- buf_.push(SoftMax(tmp));
++ output = SoftMax(tmp);
}
- return buf_.top();
+ if (flag & kTrain)
+ buf_.push(output);
+ return output;
}
const std::pair<Tensor, vector<Tensor>> Softmax::Backward(int flag,
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/src/model/loss/softmax_cross_entropy.cc
----------------------------------------------------------------------
diff --cc src/model/loss/softmax_cross_entropy.cc
index 4ca323a,0000000..bed3348
mode 100644,000000..100644
--- a/src/model/loss/softmax_cross_entropy.cc
+++ b/src/model/loss/softmax_cross_entropy.cc
@@@ -1,53 -1,0 +1,53 @@@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stack>
+#include "singa/model/loss.h"
+
+namespace singa {
+
-
- Tensor SoftmaxCrossEntropy::Forward(const Tensor& prediction, const Tensor& target) {
++Tensor SoftmaxCrossEntropy::Forward(const Tensor& prediction,
++ const Tensor& target) {
+ CHECK(buf_.empty()) << "Do not call Forward successively for more than twice."
+ << " The calling pattern is [Forward|Evaluate] Backward";
+ size_t batchsize = 1;
+ if (prediction.nDim() > 1) batchsize = prediction.shape().at(0);
+ size_t dim = prediction.Size() / batchsize;
+ const Tensor& input = Reshape(prediction, Shape{batchsize, dim});
+ Tensor prob = SoftMax(input);
+
+ // buffer intermediate data
+ buf_.push(prob);
+ buf_.push(target);
- Tensor loss = prob.Clone();
++ Tensor loss(Shape{batchsize}, prob.device(), prob.data_type());
+
- ComputeCrossEntropy(target, &loss);
++ ComputeCrossEntropy(prob, target, &loss);
+ return loss;
+}
+
+Tensor SoftmaxCrossEntropy::Backward() {
+ const Tensor target = buf_.top();
+ buf_.pop();
+ Tensor prob = buf_.top();
+ buf_.pop();
+ SoftmaxCrossEntropyBwd(target, &prob);
+ return prob;
+}
+} // namespace singa
+
+
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/test/singa/test_cross_entropy.cc
----------------------------------------------------------------------
diff --cc test/singa/test_cross_entropy.cc
index 6b8cb69,0000000..0eb36e5
mode 100644,000000..100644
--- a/test/singa/test_cross_entropy.cc
+++ b/test/singa/test_cross_entropy.cc
@@@ -1,114 -1,0 +1,116 @@@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "gtest/gtest.h"
+#include "singa/core/tensor.h"
+#include "singa/core/device.h"
+#include "singa/model/loss.h"
+#include "singa_config.h"
+
+using singa::Tensor;
+class TestSoftmaxCrossEntropy : public ::testing::Test {
+ protected:
+ virtual void SetUp() {
+ p.Reshape(singa::Shape{2, 4});
+ t.Reshape(singa::Shape{2, 1});
+ }
+ const float pdat[8] = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
- const float tdat[2] = {0.0, 2.0};
++ const int tdat[2] = {0, 2};
+
+ singa::Tensor p, t;
+};
+
+TEST_F(TestSoftmaxCrossEntropy, CppForward) {
+ p.CopyDataFromHostPtr(pdat, 8);
++ t.AsType(singa::kInt);
+ t.CopyDataFromHostPtr(tdat, 2);
+
+ singa::SoftmaxCrossEntropy cross_entropy;
+ const Tensor& loss = cross_entropy.Forward(p, t);
+ auto ldat = loss.data<const float*>();
+
+ const float result_test = -log(0.25);
+ EXPECT_FLOAT_EQ(ldat[0], result_test);
+ EXPECT_FLOAT_EQ(ldat[1], result_test);
+}
+
+TEST_F(TestSoftmaxCrossEntropy, CppBackward) {
+ p.CopyDataFromHostPtr(pdat, 8);
++ t.AsType(singa::kInt);
+ t.CopyDataFromHostPtr(tdat, 2);
+
+ singa::SoftmaxCrossEntropy cross_entropy;
+ cross_entropy.Forward(p, t);
+ const Tensor& grad = cross_entropy.Backward();
+
+ auto gdat = grad.data<const float*>();
+ EXPECT_FLOAT_EQ(gdat[0], -0.75);
+ EXPECT_FLOAT_EQ(gdat[1], 0.25);
+ EXPECT_FLOAT_EQ(gdat[2], 0.25);
+ EXPECT_FLOAT_EQ(gdat[3], 0.25);
+ EXPECT_FLOAT_EQ(gdat[4], 0.25);
+ EXPECT_FLOAT_EQ(gdat[5], 0.25);
+ EXPECT_FLOAT_EQ(gdat[6], -0.75);
+ EXPECT_FLOAT_EQ(gdat[7], 0.25);
+}
+
+#ifdef USE_CUDA
+
+TEST_F(TestSoftmaxCrossEntropy, CudaForward) {
+ singa::SoftmaxCrossEntropy cross_entropy;
+ singa::CudaGPU dev;
+ p.ToDevice(&dev);
+ t.ToDevice(&dev);
+ p.CopyDataFromHostPtr(pdat, 8);
+ t.CopyDataFromHostPtr(tdat, 2);
+
+ Tensor loss = cross_entropy.Forward(p, t);
+ loss.ToHost();
+ auto ldat = loss.data<const float*>();
+
+ const float result_test = -log(0.25);
+ EXPECT_FLOAT_EQ(ldat[0], result_test);
+ EXPECT_FLOAT_EQ(ldat[1], result_test);
+}
+
+TEST_F(TestSoftmaxCrossEntropy, CudaBackward) {
+ singa::SoftmaxCrossEntropy cross_entropy;
+ singa::CudaGPU dev;
+ p.ToDevice(&dev);
+ t.ToDevice(&dev);
+ p.CopyDataFromHostPtr(pdat, 8);
+ t.CopyDataFromHostPtr(tdat, 2);
+
+ cross_entropy.Forward(p, t);
+ Tensor grad = cross_entropy.Backward();
+
+ grad.ToHost();
+ auto gdat = grad.data<const float*>();
+ EXPECT_FLOAT_EQ(gdat[0], -0.75);
+ EXPECT_FLOAT_EQ(gdat[1], 0.25);
+ EXPECT_FLOAT_EQ(gdat[2], 0.25);
+ EXPECT_FLOAT_EQ(gdat[3], 0.25);
+ EXPECT_FLOAT_EQ(gdat[4], 0.25);
+ EXPECT_FLOAT_EQ(gdat[5], 0.25);
+ EXPECT_FLOAT_EQ(gdat[6], -0.75);
+ EXPECT_FLOAT_EQ(gdat[7], 0.25);
+}
+#endif // USE_CUDA
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/test/singa/test_mse.cc
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/test/singa/test_softmax.cc
----------------------------------------------------------------------
diff --cc test/singa/test_softmax.cc
index 09dfcd9,c087605..fff8510
--- a/test/singa/test_softmax.cc
+++ b/test/singa/test_softmax.cc
@@@ -36,13 -36,14 +36,14 @@@ TEST(Softmax, Setup)
EXPECT_EQ(2, sft.Axis());
}
+ #ifdef USE_CBLAS
TEST(Softmax, Forward) {
const float x[] = {1.0f, 2.0f, 0.0f, -2.0f, -3.0f, -1.0};
-- size_t n = sizeof(x) / sizeof(float);
size_t row = 2;
size_t col = 3;
++ size_t n = row * col;
singa::Tensor in(singa::Shape{row, col});
-- in.CopyDataFromHostPtr<float>(x, n);
++ in.CopyDataFromHostPtr<float>(x, row * col);
int axis = 1;
Softmax sft;
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/21e4b2d7/test/singa/test_tensor_math.cc
----------------------------------------------------------------------
diff --cc test/singa/test_tensor_math.cc
index 8368c55,38a9291..1092d69
--- a/test/singa/test_tensor_math.cc
+++ b/test/singa/test_tensor_math.cc
@@@ -20,6 -22,263 +22,261 @@@ class TestTensorMath : public ::testing
const float dat2[6] = {1.1f, 2.1f, 3.1f, 4.1f, 5.1f, 6.1f};
};
+ TEST_F(TestTensorMath, MemberAbs) {
+ Tensor aa = a.Clone();
+ Tensor bb = b.Clone();
+ Tensor cc = aa - bb;
+ const float *dptr = cc.data<const float *>();
+ EXPECT_NEAR(-0.1, dptr[0], 1e-5);
+ EXPECT_NEAR(-0.1, dptr[1], 1e-5);
+ EXPECT_NEAR(-0.1, dptr[2], 1e-5);
+
+ Tensor p = Abs(cc);
+ const float *dptr1 = p.data<const float *>();
+ EXPECT_NEAR(0.1, dptr1[0], 1e-5);
+ EXPECT_NEAR(0.1, dptr1[1], 1e-5);
+ EXPECT_NEAR(0.1, dptr1[2], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, MemberExp) {
+ Tensor p = Exp(a);
+ const float *dptr1 = p.data<const float *>();
+ EXPECT_NEAR(exp(1.0f), dptr1[0], 1e-5);
+ EXPECT_NEAR(exp(2.0f), dptr1[1], 1e-5);
+ EXPECT_NEAR(exp(3.0f), dptr1[2], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, MemberLog) {
+ Tensor p = Log(a);
+ const float *dptr1 = p.data<const float *>();
+ EXPECT_NEAR(log(1.0f), dptr1[0], 1e-5);
+ EXPECT_NEAR(log(2.0f), dptr1[1], 1e-5);
+ EXPECT_NEAR(log(3.0f), dptr1[2], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, MemberReLU) {
+ Tensor aa = a.Clone();
+ Tensor cc = aa - 2.0f;
+ const float *dptr = cc.data<const float *>();
+ EXPECT_NEAR(-1.0f, dptr[0], 1e-5);
+ EXPECT_NEAR(0.0f, dptr[1], 1e-5);
+ EXPECT_NEAR(1.0f, dptr[2], 1e-5);
+
+ Tensor p = ReLU(cc);
+ const float *dptr1 = p.data<const float *>();
+ EXPECT_NEAR(0.0f, dptr1[0], 1e-5);
+ EXPECT_NEAR(0.0f, dptr1[1], 1e-5);
+ EXPECT_NEAR(1.0f, dptr1[2], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, MemberSigmoid) {
+ Tensor p = Sigmoid(a);
+ const float *dptr1 = p.data<const float *>();
+ EXPECT_NEAR(1.0f / (1.0f + exp(-1.0f)), dptr1[0], 1e-5);
+ EXPECT_NEAR(1.0f / (1.0f + exp(-2.0f)), dptr1[1], 1e-5);
+ EXPECT_NEAR(1.0f / (1.0f + exp(-3.0f)), dptr1[2], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, MemberSign) {
+ Tensor aa = a.Clone();
+ Tensor cc = aa - 2.0f;
+ const float *dptr = cc.data<const float *>();
+ EXPECT_NEAR(-1.0f, dptr[0], 1e-5);
+ EXPECT_NEAR(0.0f, dptr[1], 1e-5);
+ EXPECT_NEAR(1.0f, dptr[2], 1e-5);
+
+ Tensor p = Sign(cc);
+ const float *dptr1 = p.data<const float *>();
+ EXPECT_EQ(0.0f, dptr1[0]);
+ EXPECT_EQ(0.0f, dptr1[1]);
+ EXPECT_EQ(1.0f, dptr1[2]);
+ }
+
+ TEST_F(TestTensorMath, MemberSqrt) {
+ Tensor p = Sqrt(a);
+ const float *dptr1 = p.data<const float *>();
+ EXPECT_NEAR(sqrt(1.0), dptr1[0], 1e-5);
+ EXPECT_NEAR(sqrt(2.0), dptr1[1], 1e-5);
+ EXPECT_NEAR(sqrt(3.0), dptr1[2], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, MemberSquare) {
+ Tensor p = Square(a);
+ const float *dptr1 = p.data<const float *>();
+ EXPECT_NEAR(1.0, dptr1[0], 1e-5);
+ EXPECT_NEAR(4.0, dptr1[1], 1e-5);
+ EXPECT_NEAR(9.0, dptr1[2], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, MemberTanh) {
+ Tensor p = Tanh(a);
+ const float *dptr1 = p.data<const float *>();
+ EXPECT_NEAR(tanh(1.0), dptr1[0], 1e-5);
+ EXPECT_NEAR(tanh(2.0), dptr1[1], 1e-5);
+ EXPECT_NEAR(tanh(3.0), dptr1[2], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, Sum) {
+ Tensor p1 = Sum(e, 0);
+ const float *dptr1 = p1.data<const float *>();
+ EXPECT_FLOAT_EQ(9.0f, dptr1[0]);
+ EXPECT_FLOAT_EQ(12.0f, dptr1[1]);
+
+ Tensor p2(Shape{3, 1});
+ p2 = Sum(e, 1);
+ const float *dptr2 = p2.data<const float *>();
+ EXPECT_FLOAT_EQ(3.0f, dptr2[0]);
+ EXPECT_FLOAT_EQ(7.0f, dptr2[1]);
+ EXPECT_FLOAT_EQ(11.0f, dptr2[2]);
+ }
+
+ TEST_F(TestTensorMath, SoftMax) {
- Tensor p1(Shape{3, 2});
- p1 = SoftMax(e, 0);
++ Tensor p1 = SoftMax(Reshape(e, Shape{1, 6}));
+ const float *dptr1 = p1.data<const float *>();
+ float sum = 0;
+ for (int i = 0; i < 6; i++) sum += exp(i + 1);
+ EXPECT_NEAR(exp(1) / sum, dptr1[0], 1e-5);
+ EXPECT_NEAR(exp(3) / sum, dptr1[2], 1e-5);
+ EXPECT_NEAR(exp(5) / sum, dptr1[4], 1e-5);
+ EXPECT_NEAR(exp(2) / sum, dptr1[1], 1e-5);
+ EXPECT_NEAR(exp(4) / sum, dptr1[3], 1e-5);
+ EXPECT_NEAR(exp(6) / sum, dptr1[5], 1e-5);
+
- Tensor p2(Shape{3, 2});
- p2 = SoftMax(e, 1);
++ Tensor p2 = SoftMax(e);
+ const float *dptr2 = p2.data<const float *>();
+ EXPECT_NEAR(exp(1) / (exp(1) + exp(2)), dptr2[0], 1e-5);
+ EXPECT_NEAR(exp(2) / (exp(1) + exp(2)), dptr2[1], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, MemberLT) {
+ Tensor p1 = a < 2.0f;
+ const float *dptr1 = p1.data<const float *>();
+ EXPECT_FLOAT_EQ(1.0f, dptr1[0]);
+ EXPECT_FLOAT_EQ(0.0f, dptr1[1]);
+ EXPECT_FLOAT_EQ(0.0f, dptr1[2]);
+ }
+
+ TEST_F(TestTensorMath, MemberLE) {
+ Tensor p1 = a <= 2.0f;
+ const float *dptr1 = p1.data<const float *>();
+ EXPECT_FLOAT_EQ(1.0f, dptr1[0]);
+ EXPECT_FLOAT_EQ(1.0f, dptr1[1]);
+ EXPECT_FLOAT_EQ(0.0f, dptr1[2]);
+ }
+
+ TEST_F(TestTensorMath, MemberGT) {
+ Tensor p1 = a > 2.0f;
+ const float *dptr1 = p1.data<const float *>();
+ EXPECT_FLOAT_EQ(0.0f, dptr1[0]);
+ EXPECT_FLOAT_EQ(0.0f, dptr1[1]);
+ EXPECT_FLOAT_EQ(1.0f, dptr1[2]);
+ }
+
+ TEST_F(TestTensorMath, MemberGE) {
+ Tensor p1 = a >= 2.0f;
+ const float *dptr1 = p1.data<const float *>();
+ EXPECT_FLOAT_EQ(0.0f, dptr1[0]);
+ EXPECT_FLOAT_EQ(1.0f, dptr1[1]);
+ EXPECT_FLOAT_EQ(1.0f, dptr1[2]);
+ }
+
+ TEST_F(TestTensorMath, MemberPow) {
+ Tensor p1 = Pow(b, 3.0f);
+ const float *dptr1 = p1.data<const float *>();
+ EXPECT_FLOAT_EQ(pow(1.1f, 3.0f), dptr1[0]);
+ EXPECT_FLOAT_EQ(pow(2.1f, 3.0f), dptr1[1]);
+ EXPECT_FLOAT_EQ(pow(3.1f, 3.0f), dptr1[2]);
+
+ // TODO(Yuchen): check pow(tensor a, tensor b) and add testcase after the
+ // function is complete
+ // Tensor p2 = Pow(a,b);
+ // const float *dptr2 = p2.data<const float *>();
+ // EXPECT_FLOAT_EQ(pow(1.0f,1.1f), dptr2[0]);
+ // EXPECT_FLOAT_EQ(pow(2.0f,2.1f), dptr2[1]);
+ // EXPECT_FLOAT_EQ(pow(3.0f,3.1f), dptr2[2]);
+ }
+
+ TEST_F(TestTensorMath, MemberSub) {
+ Tensor p1 = a - b;
+ const float *dptr1 = p1.data<const float *>();
+ EXPECT_NEAR(-0.1, dptr1[0], 1e-5);
+ EXPECT_NEAR(-0.1, dptr1[1], 1e-5);
+ EXPECT_NEAR(-0.1, dptr1[2], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, MemberEltwiseMult) {
+ Tensor p1 = a * b;
+ const float *dptr1 = p1.data<const float *>();
+ EXPECT_NEAR(1.0 * 1.1, dptr1[0], 1e-5);
+ EXPECT_NEAR(2.0 * 2.1, dptr1[1], 1e-5);
+ EXPECT_NEAR(3.0 * 3.1, dptr1[2], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, MemberDiv) {
+ Tensor p1 = a / b;
+ const float *dptr1 = p1.data<const float *>();
+ EXPECT_NEAR(1.0 / 1.1, dptr1[0], 1e-5);
+ EXPECT_NEAR(2.0 / 2.1, dptr1[1], 1e-5);
+ EXPECT_NEAR(3.0 / 3.1, dptr1[2], 1e-5);
+
+ Tensor p2 = Div(10.0f, b);
+ const float *dptr2 = p2.data<const float *>();
+ EXPECT_NEAR(10.0 / 1.1, dptr2[0], 1e-5);
+ EXPECT_NEAR(10.0 / 2.1, dptr2[1], 1e-5);
+ EXPECT_NEAR(10.0 / 3.1, dptr2[2], 1e-5);
+
+ Tensor p3 = a / 8.0f;
+ const float *dptr3 = p3.data<const float *>();
+ EXPECT_NEAR(1.0 / 8.0, dptr3[0], 1e-5);
+ EXPECT_NEAR(2.0 / 8.0, dptr3[1], 1e-5);
+ EXPECT_NEAR(3.0 / 8.0, dptr3[2], 1e-5);
+ }
+
+ TEST_F(TestTensorMath, MemberBernoulli) {
+ Tensor p1(Shape{10000});
+ Bernoulli(0.3f, &p1);
+ const float *dptr1 = p1.data<const float *>();
+ float sum = 0;
+ for (int i = 0; i < 10000; i++) sum += dptr1[i];
+ float mean = sum / 10000;
+ EXPECT_NEAR(mean, 0.3f, 1e-2);
+
+ sum = 0;
+ for (int i = 0; i < 10000; i++) sum += (dptr1[i] - mean) * (dptr1[i] - mean);
+ float variance = sum / 9999;
+ EXPECT_NEAR(variance, 0.3 * 0.7, 1e-2);
+ }
+
+ TEST_F(TestTensorMath, MemberUniform) {
+ Tensor p1(Shape{10000});
+ Uniform(0.1f, 0.2f, &p1);
+ const float *dptr1 = p1.data<const float *>();
+ float sum = 0;
+ for (int i = 0; i < 10000; i++) sum += dptr1[i];
+ float mean = sum / 10000;
+ EXPECT_NEAR(mean, 0.15f, 1e-3);
+
+ sum = 0;
+ for (int i = 0; i < 10000; i++) sum += (dptr1[i] - mean) * (dptr1[i] - mean);
+ float variance = sum / 9999;
+ EXPECT_NEAR(variance, 0.01f / 12, 1e-3);
+ }
+
+ TEST_F(TestTensorMath, MemberGaussian) {
+ Tensor p1(Shape{50000});
+ Gaussian(0.0f, 1.0f, &p1);
+ const float *dptr1 = p1.data<const float *>();
+ float sum = 0;
+ for (int i = 0; i < 50000; i++) sum += dptr1[i];
+ float mean = sum / 50000;
+ EXPECT_NEAR(mean, 0.0, 1e-2);
+
+ sum = 0;
+ for (int i = 0; i < 50000; i++) sum += (dptr1[i] - mean) * (dptr1[i] - mean);
+ float variance = sum / 49999;
+ EXPECT_NEAR(variance, 1.0, 1e-2);
+ }
+
TEST_F(TestTensorMath, MemberAddTensor) {
Tensor aa = a.Clone();
aa += a;
[2/3] incubator-singa git commit: SINGA-184 Add Cross Entropy loss
computation
Posted by ka...@apache.org.
SINGA-184 Add Cross Entropy loss computation
Update softmaxcrossentropy layer to support both cpp and cuda devices;
Fix bugs from crossentropy fwd and bwd; need the cuda version exp();
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/ec17acab
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/ec17acab
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/ec17acab
Branch: refs/heads/dev
Commit: ec17acab49d595fdc48b2dae6f71901b5a4c8191
Parents: efd7b62
Author: Wei Wang <wa...@comp.nus.edu.sg>
Authored: Fri May 27 17:25:01 2016 +0800
Committer: Wei Wang <wa...@comp.nus.edu.sg>
Committed: Mon Jun 13 11:12:05 2016 +0800
----------------------------------------------------------------------
include/singa/core/tensor.h | 17 +++--
include/singa/model/loss.h | 47 ++++++++++++
src/CMakeLists.txt | 3 +-
src/core/tensor/math_kernel.cu | 37 +++++++++-
src/core/tensor/math_kernel.h | 9 ++-
src/core/tensor/tensor.cc | 52 +++++++++----
src/core/tensor/tensor_math.h | 24 ++++--
src/core/tensor/tensor_math_cpp.h | 50 +++++++++++--
src/core/tensor/tensor_math_cuda.h | 41 ++++++++---
src/model/layer/softmax.cc | 7 +-
src/model/loss/cross_entropy.h | 105 ---------------------------
src/model/loss/mse.cc | 41 +++++++++++
src/model/loss/mse.h | 66 -----------------
src/model/loss/softmax_cross_entropy.cc | 53 ++++++++++++++
test/singa/test_cross_entropy.cc | 64 ++++++++++++++--
test/singa/test_mse.cc | 6 +-
test/singa/test_softmax.cc | 9 +--
17 files changed, 393 insertions(+), 238 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index bb8d7f8..865e1e4 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -239,11 +239,10 @@ Tensor Sum(const Tensor &t, int axis);
/// if 'axis' is 1, average all columns into a single column
/// TODO(wangwei) support arbitrary Tensor like numpy.average
Tensor Average(const Tensor &t, int axis);
-/// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis-1] rows,
-/// and shape_[axis]*...*shape_[nDim()] columns.
-/// and do softmax along each row.
-Tensor SoftMax(const Tensor &t, int axis = 0);
-void SoftMax(const Tensor &t, int axis, Tensor *ret);
+/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
+Tensor SoftMax(const Tensor &in);
+/// Do softmax for each row. 'in' could be a 1-d or 2-d Tensor.
+void SoftMax(const Tensor &in, Tensor *out);
/// Regarding the internal data as 2d, with shape_[0]*...*shape_[axis] rows,
/// and shape_[axis+1]*...*shape_[nDim()] columns.
@@ -398,6 +397,14 @@ Tensor DivRow(const Tensor &lhs, const Tensor &rhs);
void DivRow(const Tensor &lhs, const Tensor &rhs, Tensor *ret);
*/
+/// Compute the cross entropy loss given the prediction probability 'p' and
+/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector
+/// or 2-d matrix. 'loss' is 1-d vector. The loss is computed into p.
+void ComputeCrossEntropy(const Tensor& t, Tensor* p);
+/// Compute the dx, given prediction probability 'p' (p=softmax(x)) and
+/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector
+/// or 2-d matrix. 'grad' has the same shape as 'p'. dx is computed into p.
+void SoftmaxCrossEntropyBwd(const Tensor& t, Tensor* p);
} // namespace singa
#endif // SINGA_CORE_TENSOR_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/include/singa/model/loss.h
----------------------------------------------------------------------
diff --git a/include/singa/model/loss.h b/include/singa/model/loss.h
index 6a23067..d188de0 100644
--- a/include/singa/model/loss.h
+++ b/include/singa/model/loss.h
@@ -18,6 +18,7 @@
#ifndef SINGA_MODEL_LOSS_H_
#define SINGA_MODEL_LOSS_H_
+#include <stack>
#include "singa/proto/model.pb.h"
#include "singa/core/tensor.h"
namespace singa {
@@ -54,6 +55,52 @@ class Loss {
/// Compute the gradients of the loss values w.r.t. the prediction.
virtual Tensor Backward() = 0;
};
+
+
+
+// ============= Mean Squared Error ===========================================
+/// MSE is for mean squared error or squared euclidean distance.
+class MSE : public Loss<Tensor> {
+ public:
+ /// Compute the loss values for each sample/instance given the prediction
+ /// and the target, which is 0.5/||prediction-target||^2
+ /// Users can call Average(const Tensor&) to get the average
+ /// loss value over all samples in the batch.
+ Tensor Forward(const Tensor& prediction, const Tensor& target) override;
+
+ /// Compute the gradients of the loss values w.r.t. the prediction,
+ /// which is (prediction-target)/batchsize
+ Tensor Backward() override;
+
+ private:
+ // to buffer intermediate data, i.e., prediction-target
+ std::stack<Tensor> buf_;
+};
+
+
+// ===============Softamx Cross Entropy =======================================
+/// Softmax + cross entropy for multi-category classification
+class SoftmaxCrossEntropy : public Loss<Tensor> {
+ public:
+ /// Compute the loss values for each sample/instance given the prediction
+ /// and the target, which is -log(p[idx_truth]), idx_truth is the truth
+ /// category's index and p[] is the probability for each category, computed
+ /// from Softmax(prediction).
+ /// Users can call Average(const Tensor&) to get the average
+ /// loss value over all samples in the batch.
+ Tensor Forward(const Tensor& prediction, const Tensor& target) override;
+
+ /// Compute the gradients of the loss values w.r.t. the prediction,
+ /// which is: p[idx] - 1 if idx is the truth category's index; else,
+ /// p[idx]
+ Tensor Backward() override;
+
+ private:
+ // to buffer intermediate data, i.e., probability for each category and
+ // the target (ground truth)
+ std::stack<Tensor> buf_;
+};
+
} // namespace singa
#endif // SINGA_MODEL_LOSS_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 28066de..23cae85 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -21,7 +21,7 @@ AUX_SOURCE_DIRECTORY(core/tensor core_source)
FILE(GLOB_RECURSE cuda_source core "*.cu")
set(FLAGS_BACKUP ${CMAKE_CXX_FLAGS})
set(CMAKE_CXX_FLAGS "")
-CUDA_COMPILE(cuda_objs SHARED ${cuda_source} OPTIONS "-Xcompiler -fPIC")
+CUDA_COMPILE(cuda_objs SHARED ${cuda_source} OPTIONS "-Xcompiler -fPIC ")
#message(STATUS "FLAGS ${CMAKE_CXX_FLAGS}")
#message(STATUS "CORE ${cuda_source}")
#message(STATUS "OBJ ${cuda_objs}")
@@ -36,6 +36,7 @@ LIST(APPEND SINGA_LINKER_LIBS singa_core)
AUX_SOURCE_DIRECTORY(model model_source)
AUX_SOURCE_DIRECTORY(model/layer model_source)
AUX_SOURCE_DIRECTORY(model/optimizer model_source)
+AUX_SOURCE_DIRECTORY(model/loss model_source)
#MESSAGE(STATUS "MODEL ${model_source}")
ADD_LIBRARY(singa_model SHARED ${model_source})
TARGET_LINK_LIBRARIES(singa_model ${SINGA_LINKER_LIBS})
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/math_kernel.cu
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.cu b/src/core/tensor/math_kernel.cu
index aed6add..f12763e 100644
--- a/src/core/tensor/math_kernel.cu
+++ b/src/core/tensor/math_kernel.cu
@@ -485,8 +485,26 @@ __global__ void KernelSet(const size_t num, const float x, float *out) {
}
}
-void Set(const size_t num, const float x, float *out, cudaStream_t s) {
- KernelSet << <ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, x, out);
+__global__
+void KernelComputeCrossEntropy(const size_t batchsize, const size_t dim, const float* p,
+ const int* t, float* loss) {
+ size_t sample = blockIdx.x * blockDim.x + threadIdx.x;
+ size_t num_threads = blockDim.x * gridDim.x;
+ for (; sample < batchsize; sample += num_threads) {
+ float prob_of_truth = p[sample * dim + t[sample]];
+ loss[sample] -= std::log(max(prob_of_truth, FLT_MIN));
+ }
+}
+
+__global__
+void KernelSoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim, const float* p,
+ const int* t, float* grad) {
+ size_t sample = blockIdx.x * blockDim.x + threadIdx.x;
+ size_t num_threads = blockDim.x * gridDim.x;
+ for (; sample < batchsize; sample += num_threads) {
+ size_t pos = sample * dim + t[sample];
+ grad[pos] = p[pos] - 1.0f; // TODO(wangwei) Consider p and grad are diff
+ }
}
void Div(const size_t num, float alpha, const float *in, float *out,
cudaStream_t s) {
@@ -510,6 +528,21 @@ void LE(const size_t num, const float *in, const float x, float *out,
KernelLE << <ceil(num / CU1DBLOCKF), CU1DBLOCKF>>> (num, in, x, out);
}
+void ComputeCrossEntropy(size_t batchsize, const size_t dim, const float* p,
+ const int *t, float *loss, cudaStream_t stream) {
+ KernelComputeCrossEntropy<<<ceil(batchsize/CU1DBLOCKF), CU1DBLOCKF>>>(batchsize,
+ dim, p, t, loss);
+}
+
+void Set(const size_t num, const float x, float *out, cudaStream_t s) {
+ KernelSet<<<ceil(num / CU1DBLOCKF), CU1DBLOCKF>>>(num, x, out);
+}
+
+void SoftmaxCrossEntropyBwd(size_t batchsize, const size_t dim, const float* p,
+ const int *t, float *grad, cudaStream_t stream) {
+ KernelSoftmaxCrossEntropyBwd<<<ceil(batchsize/CU1DBLOCKF), CU1DBLOCKF>>>(batchsize,
+ dim, p, t, grad);
+}
} // namespace cuda
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/math_kernel.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.h b/src/core/tensor/math_kernel.h
index 5c906a9..09953e4 100644
--- a/src/core/tensor/math_kernel.h
+++ b/src/core/tensor/math_kernel.h
@@ -83,13 +83,20 @@ void set_value(int n, float v, float *out);
void threshold(int n, float alpha, const float *in, float *out);
// follow the consistency guide for math API
+void ComputeCrossEntropy(const size_t batchsize, const size_t dim,
+ const float *p, const int *t, float *loss,
+ cudaStream_t stream);
void Div(const size_t num, const float x, const float *in, float *out,
cudaStream_t s);
-void Set(const size_t num, const float x, float *out, cudaStream_t s);
void GT(size_t num, const float *in, const float x, float *out, cudaStream_t s);
void GE(size_t num, const float *in, const float x, float *out, cudaStream_t s);
void LT(size_t num, const float *in, const float x, float *out, cudaStream_t s);
void LE(size_t num, const float *in, const float x, float *out, cudaStream_t s);
+void Set(const size_t num, const float x, float *out, cudaStream_t s);
+void SoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim,
+ const float *p, const int *t, float *grad,
+ cudaStream_t stream);
+
} // cuda
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index 5ae375c..1ac25c6 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -77,10 +77,9 @@ void Tensor::ResetLike(const Tensor &t) {
}
}
-void Tensor::Reshape(const Shape &shape) {
- if (Product(shape_) != Product(shape)) {
- if (blob_ != nullptr && blob_->DecRefCount() == 0)
- device_->FreeBlob(blob_);
+void Tensor::Reshape(const Shape& shape) {
+ if (Product(shape) != Product(shape_)) {
+ if (blob_ != nullptr && blob_->DecRefCount() == 0) device_->FreeBlob(blob_);
blob_ = device_->NewBlob(Product(shape) * SizeOf(data_type_));
}
shape_ = shape;
@@ -403,22 +402,21 @@ Tensor Average(const Tensor &t, int axis) {
}
}
-Tensor SoftMax(const Tensor &in, int axis) {
+Tensor SoftMax(const Tensor &in) {
Tensor out(in.shape(), in.device(), in.data_type());
- SoftMax(in, axis, &out);
+ SoftMax(in, &out);
return out;
}
-void SoftMax(const Tensor &in, int axis, Tensor *out) {
+void SoftMax(const Tensor &in, Tensor *out) {
+ CHECK_LE(in.nDim(), 2u);
+ Exp(in, out);
size_t nrow = 1, ncol = in.Size(), size = ncol;
- CHECK_GE(axis, 0);
- if (axis > 0) {
- nrow = Product(in.shape(), 0, axis);
- CHECK_EQ(size % nrow, 0u) << "Size = " << size << " nrow = " << nrow;
+ if (in.nDim() == 2u) {
+ nrow = in.shape(0);
ncol = size / nrow;
+ out->Reshape(Shape{nrow, ncol});
}
- Exp(in, out);
- out->Reshape(Shape{nrow, ncol});
Tensor sum(Shape{nrow}, in.device(), in.data_type());
SumColumns(*out, &sum);
DivColumn(sum, out);
@@ -594,6 +592,19 @@ void AddRow(const float alpha, const float beta, const Tensor &v, Tensor *M) {
Mult(alpha, one, vmat, beta, M);
}
}
+void ComputeCrossEntropy(const Tensor& t, Tensor* p) {
+ CHECK_LE(p->nDim(), 2u);
+ CHECK_LE(t.nDim(), 2u); // TODO(wangwei) consider multi-labels.
+ size_t batchsize = 1;
+ if (p->nDim() == 2u) batchsize = p->shape(0);
+ size_t dim = p->Size() / batchsize;
+ TYPE_LANG_SWITCH(p->data_type(), DType, p->device()->lang(), Lang, {
+ p->device()->Exec([batchsize, dim, t, p](Context *ctx) {
+ ComputeCrossEntropy<DType, Lang>(batchsize, dim, p->blob(), t.blob(),
+ p->blob(), ctx);
+ }, {p->blob(), t.blob()}, {p->blob()});
+ });
+}
template <typename SType> Tensor Div(const SType alpha, const Tensor &in) {
Tensor out(in.shape(), in.device(), in.data_type());
@@ -665,7 +676,20 @@ void MultRow(const Tensor &v, Tensor *M) {
{M->blob(), v.blob()}, {M->blob()});
});
}
-
+void SoftmaxCrossEntropyBwd(const Tensor &t, Tensor *p) {
+ CHECK_LE(p->nDim(), 2u);
+ CHECK_LE(t.nDim(), 2u); // TODO(wangwei) consider multi-labels.
+ size_t batchsize = 1;
+ if (p->nDim() == 2u)
+ batchsize = p->shape(0);
+ size_t dim = p->Size() / batchsize;
+ TYPE_LANG_SWITCH(p->data_type(), DType, p->device()->lang(), Lang, {
+ p->device()->Exec([batchsize, dim, t, p](Context *ctx) {
+ SoftmaxCrossEntropyBwd<DType, Lang>(batchsize, dim, p->blob(), t.blob(),
+ p->blob(), ctx);
+ }, {p->blob(), t.blob()}, {p->blob()});
+ });
+}
void SubColumn(const Tensor &v, Tensor *M) { AddColumn(-1, 1, v, M); }
void SubRow(const Tensor &v, Tensor *M) { AddRow(-1, 1, v, M); }
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor_math.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h
index ff865e0..bcf4908 100644
--- a/src/core/tensor/tensor_math.h
+++ b/src/core/tensor/tensor_math.h
@@ -110,12 +110,6 @@ void Sigmoid(int count, const Blob *input, Blob *ret, Context *ctx) {
LOG(FATAL) << "Not Implemented";
}
-/// Do softmax for each row invidually
-template <typename DType, typename Lang>
-void Softmax(int nrow, int ncol, const Blob *input, Blob *ret, Context *ctx) {
- LOG(FATAL) << "Not Implemented";
-}
-
// TODO(wangwei) unify SumRow and SumCol.
/// Sum the rows of the input matrix into a vector
template <typename DType, typename Lang>
@@ -312,11 +306,14 @@ void Gaussian(int count, float mean, float std, Blob *ret, Context *ctx) {
// ========follow the consistency guide of math API
+/// Divide alpha by each element of 'in'.
+// following the consistency guide.
template <typename DType, typename Lang>
-void Set(const size_t num, const DType x, Blob *out, Context *ctx) {
+void ComputeCrossEntropy(const size_t batchsize, const size_t dim,
+ const Blob *p, const Blob *t, Blob *loss,
+ Context *ctx) {
LOG(FATAL) << "Not Implemented";
}
-/// Divide alpha by each element of 'in'.
template <typename DType, typename Lang>
void Div(const size_t num, const DType alpha, const Blob *in, Blob *out,
Context *ctx) {
@@ -364,6 +361,17 @@ void GE(const size_t num, const Blob *in, const DType x, Blob *out,
Context *ctx) {
LOG(FATAL) << "Not Implemented";
}
+template <typename DType, typename Lang>
+void Set(const size_t num, const DType x, Blob *out, Context *ctx) {
+ LOG(FATAL) << "Not Implemented";
+}
+
+template <typename DType, typename Lang>
+void SoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim,
+ const Blob *p, const Blob *t, Blob *grad,
+ Context *ctx) {
+ LOG(FATAL) << "Not Implemented";
+}
} // namespace singa
#endif // SINGA_CORE_MATH_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor_math_cpp.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h
index 693f09c..907c656 100644
--- a/src/core/tensor/tensor_math_cpp.h
+++ b/src/core/tensor/tensor_math_cpp.h
@@ -17,7 +17,9 @@
*/
#ifndef SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_
#define SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_
+
#include "./tensor_math.h"
+#include <cfloat>
#include "singa/core/common.h"
#include <math.h>
@@ -210,6 +212,22 @@ void Gaussian<float, lang::Cpp>(int count, float mean, float std, Blob *ret,
// follow the consistency guide of math API
template <>
+void ComputeCrossEntropy<float, lang::Cpp>(const size_t batchsize,
+ const size_t dim, const Blob *p,
+ const Blob *t, Blob *loss,
+ Context *ctx) {
+ const float *pPtr = static_cast<const float *>(p->data());
+ const float *tPtr = static_cast<const float *>(t->data());
+ float *lossPtr = static_cast<float *>(loss->mutable_data());
+ for (size_t i = 0; i < batchsize; i++) {
+ int truth_idx = static_cast<int>(tPtr[i]);
+ CHECK_GE(truth_idx, 0);
+ float prob_of_truth = pPtr[i * dim + truth_idx];
+ lossPtr[i] = -std::log(std::max(prob_of_truth, FLT_MIN));
+ }
+}
+
+template <>
void Div<float, lang::Cpp>(const size_t num, const float alpha, const Blob *in,
Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
@@ -249,13 +267,6 @@ void DGMM<float, lang::Cpp>(const bool side_right, const size_t nrow,
}
}
}
-
-template <>
-void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out,
- Context *ctx) {
- float *outPtr = static_cast<float *>(out->mutable_data());
- for (size_t i = 0; i < num; i++) outPtr[i] = x;
-}
template <>
void LE<float, lang::Cpp>(const size_t num, const Blob *in, const float x,
Blob *out, Context *ctx) {
@@ -312,9 +323,32 @@ void GEMM<float, lang::Cpp>(const bool transA, const bool transB,
cblas_sgemm(CblasRowMajor, transa, transb, nrowA, ncolB, ncolA, alpha, APtr,
lda, BPtr, ldb, beta, CPtr, ldc);
}
-
#endif // USE_CBLAS
+template <>
+void Set<float, lang::Cpp>(const size_t num, const float x, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ for (size_t i = 0; i < num; i++) outPtr[i] = x;
+}
+template <>
+void SoftmaxCrossEntropyBwd<float, lang::Cpp>(const size_t batchsize,
+ const size_t dim, const Blob *p,
+ const Blob *t,
+ Blob *grad, Context *ctx) {
+ CHECK_EQ(p, grad) << "Use the same pointer to optimize performance";
+ // const float* pPtr = static_cast<const float*>(p->data());
+ const float *tPtr = static_cast<const float *>(t->data());
+ float *gradPtr = static_cast<float *>(grad->mutable_data());
+
+ for (size_t i = 0; i < batchsize; i++) {
+ int truth_idx = static_cast<int>(tPtr[i]);
+ CHECK_GE(truth_idx, 0);
+ gradPtr[i * dim + truth_idx] -= 1.0;
+ }
+}
+
+
} // namespace singa
#endif // SINGA_CORE_TENSOR_TENSOR_MATH_CPP_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/core/tensor/tensor_math_cuda.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h
index 4a2ba66..c69620c 100644
--- a/src/core/tensor/tensor_math_cuda.h
+++ b/src/core/tensor/tensor_math_cuda.h
@@ -75,6 +75,17 @@ void Sum<float, lang::Cuda>(int count, const Blob *input, float *ret,
// follow the consistency guide of math API
template <>
+void ComputeCrossEntropy<float, lang::Cuda>(const size_t batchsize,
+ const size_t dim, const Blob *p,
+ const Blob *t, Blob *loss,
+ Context *ctx) {
+ const float *pPtr = static_cast<const float *>(p->data());
+ const int *tPtr = static_cast<const int *>(t->data());
+ float *lossPtr = static_cast<float *>(loss->mutable_data());
+ cuda::ComputeCrossEntropy(batchsize, dim, pPtr, tPtr, lossPtr, ctx->stream);
+}
+
+template <>
void Div<float, lang::Cuda>(const size_t num, const float alpha, const Blob *in,
Blob *out, Context *ctx) {
float *outPtr = static_cast<float *>(out->mutable_data());
@@ -82,19 +93,13 @@ void Div<float, lang::Cuda>(const size_t num, const float alpha, const Blob *in,
cuda::Div(num, alpha, inPtr, outPtr, ctx->stream);
}
-template <>
-void Set<float, lang::Cuda>(const size_t num, const float x, Blob *out,
- Context *ctx) {
- float *outPtr = static_cast<float *>(out->mutable_data());
- cuda::Set(num, x, outPtr, ctx->stream);
-}
// NOTE: cublas uses column major order.
// http://peterwittek.com/cublas-matrix-c-style.html
template <>
void DGMM<float, lang::Cuda>(const bool side_right, const size_t nrow,
const size_t ncol, const Blob *M, const Blob *v,
Blob *out, Context *ctx) {
- auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
+ auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
const float *MPtr = static_cast<const float *>(M->data());
const float *vPtr = static_cast<const float *>(v->data());
float *outPtr = static_cast<float *>(out->mutable_data());
@@ -121,7 +126,7 @@ void GEMM<float, lang::Cuda>(const bool transA, const bool transB,
const float *APtr = static_cast<const float *>(A->data());
const float *BPtr = static_cast<const float *>(B->data());
float *CPtr = static_cast<float *>(C->mutable_data());
- auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
+ auto handle = ctx->cublas_handle; // TODO(wangwei) set cudastream
CUBLAS_CHECK(cublasSgemm(handle, transb, transa, ncolB, nrowA, ncolA, &alpha,
BPtr, ldb, APtr, lda, &beta, CPtr, ldc));
}
@@ -155,9 +160,25 @@ void LT<float, lang::Cuda>(const size_t num, const Blob* in, const float x,
cuda::LT(num, inPtr, x, outPtr, ctx->stream);
}
+template<>
+void Set<float, lang::Cuda>(const size_t num, const float x, Blob *out,
+ Context *ctx) {
+ float *outPtr = static_cast<float *>(out->mutable_data());
+ cuda::Set(num, x, outPtr, ctx->stream);
+}
-
-
+template <>
+void SoftmaxCrossEntropyBwd<float, lang::Cuda>(const size_t batchsize,
+ const size_t dim, const Blob *p,
+ const Blob *t, Blob *grad,
+ Context *ctx) {
+ CHECK_EQ(p, grad) << "Use the same pointer to optimize performance";
+ const float *pPtr = static_cast<const float *>(p->data());
+ const int *tPtr = static_cast<const int *>(t->data());
+ float *gradPtr = static_cast<float *>(grad->mutable_data());
+ cuda::SoftmaxCrossEntropyBwd(batchsize, dim, pPtr, tPtr, gradPtr,
+ ctx->stream);
+}
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/layer/softmax.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/softmax.cc b/src/model/layer/softmax.cc
index 813ebf0..8af1d76 100644
--- a/src/model/layer/softmax.cc
+++ b/src/model/layer/softmax.cc
@@ -26,10 +26,11 @@ void Softmax::Setup(const LayerConf& conf) {
const Tensor Softmax::Forward(int flag, const Tensor& input) {
if (input.nDim() == 1) {
- Tensor tmp = Reshape(input, Shape{1, input.Size()});
- buf_.push(SoftMax(tmp, 0));
+ buf_.push(SoftMax(input));
} else {
- buf_.push(SoftMax(input, axis_));
+ size_t nrow = Product(input.shape(), 0, axis_);
+ const Tensor& tmp = Reshape(input, Shape{nrow, input.Size() / nrow});
+ buf_.push(SoftMax(tmp));
}
return buf_.top();
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/cross_entropy.h
----------------------------------------------------------------------
diff --git a/src/model/loss/cross_entropy.h b/src/model/loss/cross_entropy.h
deleted file mode 100644
index 815b795..0000000
--- a/src/model/loss/cross_entropy.h
+++ /dev/null
@@ -1,105 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef SRC_MODEL_LOSS_CROSS_ENTROPY_H_
-#define SRC_MODEL_LOSS_CROSS_ENTROPY_H_
-#include <stack>
-#include "singa/model/loss.h"
-
-namespace singa {
-
-/// Cross entropy is for cross entropy loss.
-class CrossEntropy : public Loss<Tensor> {
- public:
- /// Compute the loss values for each sample/instance given the prediction
- /// and the target, which is sum {-log(prob_of_truth)}
- /// Users can call Average(const Tensor&) to get the average
- /// loss value over all samples in the batch.
- Tensor Forward(const Tensor& prediction, const Tensor& target) override;
-
- /// Compute the gradients of the loss values w.r.t. the prediction,
- /// which is: if the entry x corresponds to ground truth,
- /// then softmax(x) - 1; else, softmax(x)
- Tensor Backward() override;
-
- private:
- // to buffer intermediate data, i.e., softmax(prediction), target
- std::stack<Tensor> buf_;
-};
-
-Tensor CrossEntropy::Forward(const Tensor& prediction, const Tensor& target) {
- CHECK(buf_.empty()) << "Do not call Forward successively for more than twice."
- << " The calling pattern is [Forward|Evaluate] Backward";
-
- size_t batchsize = 1;
- if (prediction.nDim() > 1) batchsize = prediction.shape().at(0);
- size_t dim = prediction.Size() / batchsize;
- // a temporal Softmax layer for forward computation
-// LayerConf conf; // TODO(kaiping): this is currently commented
-// Softmax softmax_tmp;
-// softmax_tmp.Setup(conf);
-// Tensor softmax = softmax_tmp.Forward(0, prediction);
-
- Tensor softmax(Shape{batchsize, dim}); // TODO(kaiping): Delete
-// softmax.SetValue<float>(0.5f); // TODO(kaiping): Delete
-
- softmax.Reshape(Shape{batchsize, dim});
- // buffer intermediate data
- buf_.push(softmax);
- buf_.push(target);
-
- // Compute loss for each sample
- Tensor loss(Shape{batchsize, 1});
- float * pre_ptr = reinterpret_cast<float*>(softmax.blob()->mutable_data());
- float * truth_ptr = reinterpret_cast<float*>(target.blob()->mutable_data());
- float * loss_ptr = reinterpret_cast<float*>(loss.blob()->mutable_data());
- for (size_t i = 0; i < batchsize; i++) {
- int ilabel = static_cast<int>(truth_ptr[i]);
- CHECK_GE(ilabel, 0);
- float prob_of_truth = pre_ptr[ilabel];
- loss_ptr[i] = -log(prob_of_truth);
- pre_ptr += dim; // change to the next sample
- }
- return loss;
-}
-
-Tensor CrossEntropy::Backward() {
- const Tensor& target = buf_.top();
- buf_.pop();
- Tensor softmax = buf_.top();
- buf_.pop();
-
- size_t batchsize = 1;
- if (softmax.nDim() > 1)
- batchsize = softmax.shape().at(0);
- size_t dim = softmax.Size() / batchsize;
- float * truth_ptr = reinterpret_cast<float*>(target.blob()->mutable_data());
- float * pre_ptr = reinterpret_cast<float*>(softmax.blob()->mutable_data());
- for (size_t i = 0; i < batchsize; i++) {
- int ilabel = static_cast<int>(truth_ptr[i]);
- // CHECK_GE(ilabel, 0);
- pre_ptr[ilabel] -= 1.0;
- pre_ptr += dim; // change to the next sample
- }
- return softmax;
-}
-} // namespace singa
-
-#endif // SRC_MODEL_LOSS_CROSS_ENTROPY_H_
-
-
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/mse.cc
----------------------------------------------------------------------
diff --git a/src/model/loss/mse.cc b/src/model/loss/mse.cc
new file mode 100644
index 0000000..a4bbb72
--- /dev/null
+++ b/src/model/loss/mse.cc
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "singa/model/loss.h"
+
+namespace singa {
+
+Tensor MSE::Forward(const Tensor& prediction, const Tensor& target) {
+ CHECK(buf_.empty()) << "Do not call Forward successively for more than twice."
+ << " The calling pattern is [Forward|Evaluate] Backward";
+ Tensor t = prediction - target;
+ size_t batchsize = 1;
+ if (t.nDim() > 1) batchsize = t.shape().at(0);
+ size_t dim = t.Size() / batchsize;
+ t.Reshape(Shape{batchsize, dim});
+ buf_.push(t);
+ // TODO(wangwei) use CastType for operator/
+ return Sum(Square(t), 1) * 0.5f;
+}
+
+Tensor MSE::Backward() {
+ Tensor ret = buf_.top();
+ buf_.pop();
+ return ret * (1.0f / ret.shape().at(0));
+}
+} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/mse.h
----------------------------------------------------------------------
diff --git a/src/model/loss/mse.h b/src/model/loss/mse.h
deleted file mode 100644
index 1a022f9..0000000
--- a/src/model/loss/mse.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef SINGA_MODEL_LOSS_MSE_H_
-#define SINGA_MODEL_LOSS_MSE_H_
-#include <stack>
-#include "singa/model/loss.h"
-
-namespace singa {
-
-/// MSE is for mean squared error or squared euclidean distance.
-class MSE : public Loss<Tensor> {
- public:
- /// Compute the loss values for each sample/instance given the prediction
- /// and the target, which is 0.5/||prediction-target||^2
- /// Users can call Average(const Tensor&) to get the average
- /// loss value over all samples in the batch.
- Tensor Forward(const Tensor& prediction, const Tensor& target) override;
-
- /// Compute the gradients of the loss values w.r.t. the prediction,
- /// which is (prediction-target)/batchsize
- Tensor Backward() override;
-
- private:
- // to buffer intermediate data, i.e., prediction-target
- std::stack<Tensor> buf_;
-};
-
-Tensor MSE::Forward(const Tensor& prediction, const Tensor& target) {
- CHECK(buf_.empty()) << "Do not call Forward successively for more than twice."
- << " The calling pattern is [Forward|Evaluate] Backward";
- Tensor t = prediction - target;
- size_t batchsize = 1;
- if (t.nDim() > 1) batchsize = t.shape().at(0);
- size_t dim = t.Size() / batchsize;
- t.Reshape(Shape{batchsize, dim});
- buf_.push(t);
- // TODO(wangwei) use CastType for operator/
- return Sum(Square(t), 1) * 0.5f;
-}
-
-Tensor MSE::Backward() {
- Tensor ret = buf_.top();
- buf_.pop();
- return ret * (1.0f / ret.shape().at(0));
-}
-} // namespace singa
-
-#endif // SINGA_MODEL_LOSS_H_
-
-
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/src/model/loss/softmax_cross_entropy.cc
----------------------------------------------------------------------
diff --git a/src/model/loss/softmax_cross_entropy.cc b/src/model/loss/softmax_cross_entropy.cc
new file mode 100644
index 0000000..4ca323a
--- /dev/null
+++ b/src/model/loss/softmax_cross_entropy.cc
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stack>
+#include "singa/model/loss.h"
+
+namespace singa {
+
+
+Tensor SoftmaxCrossEntropy::Forward(const Tensor& prediction, const Tensor& target) {
+ CHECK(buf_.empty()) << "Do not call Forward successively for more than twice."
+ << " The calling pattern is [Forward|Evaluate] Backward";
+ size_t batchsize = 1;
+ if (prediction.nDim() > 1) batchsize = prediction.shape().at(0);
+ size_t dim = prediction.Size() / batchsize;
+ const Tensor& input = Reshape(prediction, Shape{batchsize, dim});
+ Tensor prob = SoftMax(input);
+
+ // buffer intermediate data
+ buf_.push(prob);
+ buf_.push(target);
+ Tensor loss = prob.Clone();
+
+ ComputeCrossEntropy(target, &loss);
+ return loss;
+}
+
+Tensor SoftmaxCrossEntropy::Backward() {
+ const Tensor target = buf_.top();
+ buf_.pop();
+ Tensor prob = buf_.top();
+ buf_.pop();
+ SoftmaxCrossEntropyBwd(target, &prob);
+ return prob;
+}
+} // namespace singa
+
+
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/test/singa/test_cross_entropy.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cross_entropy.cc b/test/singa/test_cross_entropy.cc
index 9bb2321..6b8cb69 100644
--- a/test/singa/test_cross_entropy.cc
+++ b/test/singa/test_cross_entropy.cc
@@ -22,16 +22,15 @@
#include "gtest/gtest.h"
#include "singa/core/tensor.h"
#include "singa/core/device.h"
-#include "../src/model/loss/cross_entropy.h"
+#include "singa/model/loss.h"
+#include "singa_config.h"
using singa::Tensor;
-class TestCrossEntropy : public ::testing::Test {
+class TestSoftmaxCrossEntropy : public ::testing::Test {
protected:
virtual void SetUp() {
p.Reshape(singa::Shape{2, 4});
t.Reshape(singa::Shape{2, 1});
- p.CopyDataFromHostPtr(pdat, sizeof(pdat) / sizeof(float));
- t.CopyDataFromHostPtr(tdat, sizeof(pdat) / sizeof(float));
}
const float pdat[8] = {0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1};
const float tdat[2] = {0.0, 2.0};
@@ -39,8 +38,11 @@ class TestCrossEntropy : public ::testing::Test {
singa::Tensor p, t;
};
-TEST_F(TestCrossEntropy, CppForward) {
- singa::CrossEntropy cross_entropy;
+TEST_F(TestSoftmaxCrossEntropy, CppForward) {
+ p.CopyDataFromHostPtr(pdat, 8);
+ t.CopyDataFromHostPtr(tdat, 2);
+
+ singa::SoftmaxCrossEntropy cross_entropy;
const Tensor& loss = cross_entropy.Forward(p, t);
auto ldat = loss.data<const float*>();
@@ -49,8 +51,11 @@ TEST_F(TestCrossEntropy, CppForward) {
EXPECT_FLOAT_EQ(ldat[1], result_test);
}
-TEST_F(TestCrossEntropy, CppBackward) {
- singa::CrossEntropy cross_entropy;
+TEST_F(TestSoftmaxCrossEntropy, CppBackward) {
+ p.CopyDataFromHostPtr(pdat, 8);
+ t.CopyDataFromHostPtr(tdat, 2);
+
+ singa::SoftmaxCrossEntropy cross_entropy;
cross_entropy.Forward(p, t);
const Tensor& grad = cross_entropy.Backward();
@@ -64,3 +69,46 @@ TEST_F(TestCrossEntropy, CppBackward) {
EXPECT_FLOAT_EQ(gdat[6], -0.75);
EXPECT_FLOAT_EQ(gdat[7], 0.25);
}
+
+#ifdef USE_CUDA
+
+TEST_F(TestSoftmaxCrossEntropy, CudaForward) {
+ singa::SoftmaxCrossEntropy cross_entropy;
+ singa::CudaGPU dev;
+ p.ToDevice(&dev);
+ t.ToDevice(&dev);
+ p.CopyDataFromHostPtr(pdat, 8);
+ t.CopyDataFromHostPtr(tdat, 2);
+
+ Tensor loss = cross_entropy.Forward(p, t);
+ loss.ToHost();
+ auto ldat = loss.data<const float*>();
+
+ const float result_test = -log(0.25);
+ EXPECT_FLOAT_EQ(ldat[0], result_test);
+ EXPECT_FLOAT_EQ(ldat[1], result_test);
+}
+
+TEST_F(TestSoftmaxCrossEntropy, CudaBackward) {
+ singa::SoftmaxCrossEntropy cross_entropy;
+ singa::CudaGPU dev;
+ p.ToDevice(&dev);
+ t.ToDevice(&dev);
+ p.CopyDataFromHostPtr(pdat, 8);
+ t.CopyDataFromHostPtr(tdat, 2);
+
+ cross_entropy.Forward(p, t);
+ Tensor grad = cross_entropy.Backward();
+
+ grad.ToHost();
+ auto gdat = grad.data<const float*>();
+ EXPECT_FLOAT_EQ(gdat[0], -0.75);
+ EXPECT_FLOAT_EQ(gdat[1], 0.25);
+ EXPECT_FLOAT_EQ(gdat[2], 0.25);
+ EXPECT_FLOAT_EQ(gdat[3], 0.25);
+ EXPECT_FLOAT_EQ(gdat[4], 0.25);
+ EXPECT_FLOAT_EQ(gdat[5], 0.25);
+ EXPECT_FLOAT_EQ(gdat[6], -0.75);
+ EXPECT_FLOAT_EQ(gdat[7], 0.25);
+}
+#endif // USE_CUDA
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/test/singa/test_mse.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_mse.cc b/test/singa/test_mse.cc
index 67f583c..a6bd1c3 100644
--- a/test/singa/test_mse.cc
+++ b/test/singa/test_mse.cc
@@ -22,8 +22,9 @@
#include "gtest/gtest.h"
#include "singa/core/tensor.h"
#include "singa/core/device.h"
-#include "../src/model/loss/mse.h"
+#include "singa/model/loss.h"
#include "singa_config.h"
+
using singa::Tensor;
class TestMSE : public ::testing::Test {
protected:
@@ -66,6 +67,8 @@ TEST_F(TestMSE, CppBackward) {
EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i]));
}
#endif
+
+#ifdef USE_CUDA
TEST_F(TestMSE, CudaForward) {
singa::MSE mse;
singa::CudaGPU dev;
@@ -98,3 +101,4 @@ TEST_F(TestMSE, CudaBackward) {
for (size_t i = 0; i < grad.Size(); i++)
EXPECT_FLOAT_EQ(gdat[i], (1.0f / p.shape().at(0)) * (pdat[i] - tdat[i]));
}
+#endif
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/ec17acab/test/singa/test_softmax.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_softmax.cc b/test/singa/test_softmax.cc
index da2a6ef..09dfcd9 100644
--- a/test/singa/test_softmax.cc
+++ b/test/singa/test_softmax.cc
@@ -55,7 +55,6 @@ TEST(Softmax, Forward) {
const float* yptr = out.data<const float*>();
EXPECT_EQ(n, out.Size());
- float* y = new float[n];
float* sigma = new float[row];
for (size_t i = 0; i < row; i++)
sigma[i] = 0.f;
@@ -63,11 +62,9 @@ TEST(Softmax, Forward) {
sigma[i / col] += exp(x[i]);
//EXPECT_EQ(0, sigma[1]);
for (size_t i = 0; i < row; i++)
- for (size_t j = 0; j < col; j++)
- y[i * col + j] = exp(x[i * col + j]) / sigma[i];
- EXPECT_FLOAT_EQ(y[0], yptr[0]);
- EXPECT_FLOAT_EQ(y[4], yptr[4]);
- EXPECT_FLOAT_EQ(y[5], yptr[5]);
+ for (size_t j = 0; j < col; j++) {
+ EXPECT_FLOAT_EQ(yptr[i * col + j], exp(x[i * col + j]) / sigma[i]);
+ }
}
TEST(Softmax, Backward) {