You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by zh...@apache.org on 2016/11/29 05:31:10 UTC
[1/2] incubator-singa git commit: SINGA-275 - Add Cross Entropy Loss
for multiple labels
Repository: incubator-singa
Updated Branches:
refs/heads/master 15a619b25 -> 848111181
SINGA-275 - Add Cross Entropy Loss for multiple labels
Updated the softmax cross entorpy loss layer and the tensor functions to enable
the ground truth be an binary array for each instance;
Added unittests for cross entropy with multiple labels per instance;
For input of a batch of instances, the ground truth tensor could be either an integer array, one value per
instance, or a binary matrix one row per instance.
For a single instance input, the feature tensor is 1-d array, and the
ground truth tensor is a 1-d array (with a single integer value or a
binary array)
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d1110c0b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d1110c0b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d1110c0b
Branch: refs/heads/master
Commit: d1110c0b7101fff6999db1dd5cccb14bf8370578
Parents: 15a619b
Author: Wei Wang <wa...@gmail.com>
Authored: Sun Nov 27 00:21:14 2016 +0800
Committer: Wei Wang <wa...@gmail.com>
Committed: Sun Nov 27 12:57:39 2016 +0800
----------------------------------------------------------------------
include/singa/core/tensor.h | 9 +++-
include/singa/model/loss.h | 22 +++++---
python/singa/loss.py | 19 +++++++
src/core/tensor/math_kernel.cu | 49 ++++++++++++-----
src/core/tensor/math_kernel.h | 12 ++---
src/core/tensor/tensor.cc | 16 +++---
src/core/tensor/tensor_math.h | 14 +++--
src/core/tensor/tensor_math_cpp.h | 50 ++++++++++++++----
src/core/tensor/tensor_math_cuda.h | 11 ++--
src/model/loss/softmax_cross_entropy.cc | 3 +-
test/python/test_optimizer.py | 1 -
test/singa/test_cross_entropy.cc | 79 +++++++++++++++++++++++++++-
12 files changed, 225 insertions(+), 60 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index a39217b..28d1619 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -440,8 +440,13 @@ void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
// Misc.
// ****************
/// Compute the cross entropy loss given the prediction probability 'p' and
-/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector
-/// or 2-d matrix. 'loss' is 1-d vector. The loss is computed into p.
+/// the target (ground truth) labels 't'. 'p' could be either a 1-d vector for
+/// a single instance or a 2-d matrix for a batch of instances. t[i]
+/// could be the ground truth label index or a label weighted
+/// array of the i-th instance. For example, if there are 3 candidate labels for
+/// each instance, t[i] could be 2 or [0, 0, 1]. If one instance could have
+/// multiple labels, then t[i] could be [1, 0, 1].
+/// The loss is computed into p.
void ComputeCrossEntropy(const Tensor &p, const Tensor &t, Tensor *loss);
/// Compute the dx, given prediction probability 'p' (p=softmax(x)) and
/// the target (ground truth) labels 't'. 'p' and 't' are either 1-d vector
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/include/singa/model/loss.h
----------------------------------------------------------------------
diff --git a/include/singa/model/loss.h b/include/singa/model/loss.h
index 4ee41cb..0a433e7 100644
--- a/include/singa/model/loss.h
+++ b/include/singa/model/loss.h
@@ -66,7 +66,8 @@ class MSE : public Loss {
/// and the target, which is 0.5/||prediction-target||^2
/// Users can call Average(const Tensor&) to get the average
/// loss value over all samples in the batch.
- Tensor Forward(int flag, const Tensor& prediction, const Tensor& target) override;
+ Tensor Forward(int flag, const Tensor& prediction,
+ const Tensor& target) override;
/// Compute the gradients of the loss values w.r.t. the prediction,
/// which is (prediction-target)/batchsize
@@ -83,16 +84,23 @@ class MSE : public Loss {
class SoftmaxCrossEntropy : public Loss {
public:
/// Compute the loss values for each sample/instance given the prediction
- /// and the target, which is -log(p[idx_truth]), idx_truth is the truth
- /// category's index and p[] is the probability for each category, computed
- /// from Softmax(prediction).
+ /// and the target.
+ ///
+ /// If the target consists one integer per instance, i.e. the label index
+ /// (dentoed as idx_truth), the loss is -log(p[idx_truth]), p[] is the
+ /// probability for each category, computed from Softmax(prediction).
+ /// If the target consists one array per instance (e.g., for multiple
+ /// labels), the loss is -\sum_i (t[i] * log(p[i]) / \sum_j t[j], t[i]
+ /// is the weight of the i-th label (e.g., 1: the instance has this label, 0:
+ /// the instance does not have this label).
+ ///
/// Users can call Average(const Tensor&) to get the average
/// loss value over all samples in the batch.
- Tensor Forward(int flag, const Tensor& prediction, const Tensor& target) override;
+ Tensor Forward(int flag, const Tensor& prediction,
+ const Tensor& target) override;
/// Compute the gradients of the loss values w.r.t. the prediction,
- /// which is: p[idx] - 1 if idx is the truth category's index; else,
- /// p[idx]
+ /// which is: p[i] - t[i]/\sum_j t[j]
Tensor Backward() override;
private:
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/python/singa/loss.py
----------------------------------------------------------------------
diff --git a/python/singa/loss.py b/python/singa/loss.py
index 526e4d0..f3330dc 100644
--- a/python/singa/loss.py
+++ b/python/singa/loss.py
@@ -38,9 +38,11 @@ Example usage::
from . import singa_wrap as singa
+from proto import model_pb2
import tensor
+
class Loss(object):
'''Base loss class.
@@ -64,6 +66,11 @@ class Loss(object):
Returns:
a tensor of floats for the loss values, one per sample
'''
+ if type(flag) is bool:
+ if flag:
+ flag = model_pb2.kTrain
+ else:
+ flag = model_pb2.kEval
return tensor.from_raw_tensor(
self.swig_loss.Forward(flag, x.singa_tensor, y.singa_tensor))
@@ -84,6 +91,12 @@ class Loss(object):
Returns:
the averaged loss for all samples in x.
'''
+ if type(flag) is bool:
+ if flag:
+ flag = model_pb2.kTrain
+ else:
+ flag = model_pb2.kEval
+
return self.swig_loss.Evaluate(flag, x.singa_tensor, y.singa_tensor)
@@ -92,6 +105,12 @@ class SoftmaxCrossEntropy(Loss):
It converts the inputs via SoftMax function and then
computes the cross-entropy loss against the ground truth values.
+
+ For each sample, the ground truth could be a integer as the label index;
+ or a binary array, indicating the label distribution. The ground truth
+ tensor thus could be a 1d or 2d tensor.
+ The data/feature tensor could 1d (for a single sample) or 2d for a batch of
+ samples.
'''
def __init__(self):
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/src/core/tensor/math_kernel.cu
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.cu b/src/core/tensor/math_kernel.cu
index d3f3335..482f223 100644
--- a/src/core/tensor/math_kernel.cu
+++ b/src/core/tensor/math_kernel.cu
@@ -308,25 +308,50 @@ __global__ void KernelRowMax(const size_t nrow, const size_t ncol, const float *
outPtr[idx] = maxval;
}
}
-__global__ void KernelComputeCrossEntropy(const size_t batchsize,
+__global__ void KernelComputeCrossEntropy(const bool int_target, const size_t batchsize,
const size_t dim, const float *p,
const int *t, float *loss) {
size_t sample = blockIdx.x * blockDim.x + threadIdx.x;
size_t num_threads = blockDim.x * gridDim.x;
- for (; sample < batchsize; sample += num_threads) {
- float prob_of_truth = p[sample * dim + t[sample]];
- loss[sample] = -std::log(max(prob_of_truth, FLT_MIN));
+ if (int_target) {
+ for (; sample < batchsize; sample += num_threads) {
+ float prob_of_truth = p[sample * dim + t[sample]];
+ loss[sample] = -std::log(max(prob_of_truth, FLT_MIN));
+ }
+ } else {
+ for (; sample < batchsize; sample += num_threads) {
+ float sum = 0.f;
+ for (size_t j = 0; j < dim; j++) {
+ sum += t[sample * dim + j];
+ }
+ loss[sample] = 0;
+ for (size_t j = 0, offset = sample * dim; j < dim; j++, offset++) {
+ loss[sample] -= t[offset] / sum * std::log(max(p[offset], FLT_MIN));
+ }
+ }
}
}
-__global__ void KernelSoftmaxCrossEntropyBwd(const size_t batchsize,
+__global__ void KernelSoftmaxCrossEntropyBwd(const bool int_target, const size_t batchsize,
const size_t dim, const float *p,
const int *t, float *grad) {
size_t sample = blockIdx.x * blockDim.x + threadIdx.x;
size_t num_threads = blockDim.x * gridDim.x;
- for (; sample < batchsize; sample += num_threads) {
- size_t pos = sample * dim + t[sample];
- grad[pos] = p[pos] - 1.0f; // TODO(wangwei) Consider p and grad are diff
+ if (int_target) {
+ for (; sample < batchsize; sample += num_threads) {
+ size_t pos = sample * dim + t[sample];
+ grad[pos] = p[pos] - 1.0f; // TODO(wangwei) Consider p and grad are diff
+ }
+ } else {
+ for (; sample < batchsize; sample += num_threads) {
+ float sum = 0.f;
+ for (size_t j = 0; j < dim; j++) {
+ sum += t[sample * dim + j];
+ }
+ for (size_t j = 0, offset = sample * dim; j < dim; j++, offset++) {
+ grad[offset] -= t[offset] / sum;
+ }
+ }
}
}
@@ -473,16 +498,16 @@ void sum(const size_t n, const float *in, float *out, cudaStream_t s) {
}
*/
-void ComputeCrossEntropy(size_t batchsize, const size_t dim, const float *p,
+void ComputeCrossEntropy(const bool int_target, size_t batchsize, const size_t dim, const float *p,
const int *t, float *loss, cudaStream_t stream) {
KernelComputeCrossEntropy <<<ceil(batchsize / CU1DBLOCKF), CU1DBLOCKF>>>
- (batchsize, dim, p, t, loss);
+ (int_target, batchsize, dim, p, t, loss);
}
-void SoftmaxCrossEntropyBwd(size_t batchsize, const size_t dim, const float *p,
+void SoftmaxCrossEntropyBwd(const bool int_target, size_t batchsize, const size_t dim, const float *p,
const int *t, float *grad, cudaStream_t stream) {
KernelSoftmaxCrossEntropyBwd <<<ceil(batchsize / CU1DBLOCKF), CU1DBLOCKF>>>
- (batchsize, dim, p, t, grad);
+ (int_target, batchsize, dim, p, t, grad);
}
void RowMax(const size_t nrow, const size_t ncol, const float *inPtr,
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/src/core/tensor/math_kernel.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/math_kernel.h b/src/core/tensor/math_kernel.h
index cb0cb6a..7c7e84c 100644
--- a/src/core/tensor/math_kernel.h
+++ b/src/core/tensor/math_kernel.h
@@ -103,12 +103,12 @@ void div(const size_t n, const float *in1, const float *in2, float *out,
// void sum(const size_t n, const float *in, float *out, cudaStream_t s);
-void ComputeCrossEntropy(const size_t batchsize, const size_t dim,
- const float *p, const int *t, float *loss,
- cudaStream_t stream);
-void SoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim,
- const float *p, const int *t, float *grad,
- cudaStream_t stream);
+void ComputeCrossEntropy(bool int_target, const size_t batchsize,
+ const size_t dim, const float *p, const int *t,
+ float *loss, cudaStream_t stream);
+void SoftmaxCrossEntropyBwd(bool int_target, const size_t batchsize,
+ const size_t dim, const float *p, const int *t,
+ float *grad, cudaStream_t stream);
void RowMax(const size_t nrow, const size_t ncol, const float *inPtr,
float *outPtr, cudaStream_t stream);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index 83e1a00..4898594 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -995,31 +995,33 @@ void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
// ************************
// Misc.
-// ***********************
+// ************************
void ComputeCrossEntropy(const Tensor &p, const Tensor &t, Tensor *loss) {
CHECK_LE(p.nDim(), 2u);
- CHECK_LE(t.nDim(), 2u); // TODO(wangwei) consider multi-labels.
+ CHECK_LE(t.nDim(), 2u);
size_t batchsize = 1;
if (p.nDim() == 2u) batchsize = p.shape(0);
size_t dim = p.Size() / batchsize;
TYPE_LANG_SWITCH(p.data_type(), DType, p.device()->lang(), Lang, {
p.device()->Exec([batchsize, dim, t, p, loss](Context *ctx) {
- ComputeCrossEntropy<DType, Lang>(batchsize, dim, p.block(), t.block(),
- loss->block(), ctx);
+ bool int_target = t.Size() == batchsize;
+ ComputeCrossEntropy<DType, Lang>(int_target, batchsize, dim, p.block(),
+ t.block(), loss->block(), ctx);
}, {p.block(), t.block()}, {loss->block()});
});
}
void SoftmaxCrossEntropyBwd(const Tensor &t, Tensor *p) {
CHECK_LE(p->nDim(), 2u);
- CHECK_LE(t.nDim(), 2u); // TODO(wangwei) consider multi-labels.
+ CHECK_LE(t.nDim(), 2u);
size_t batchsize = 1;
if (p->nDim() == 2u) batchsize = p->shape(0);
size_t dim = p->Size() / batchsize;
TYPE_LANG_SWITCH(p->data_type(), DType, p->device()->lang(), Lang, {
p->device()->Exec([batchsize, dim, t, p](Context *ctx) {
- SoftmaxCrossEntropyBwd<DType, Lang>(batchsize, dim, p->block(), t.block(),
- p->block(), ctx);
+ bool int_target = t.Size() == batchsize;
+ SoftmaxCrossEntropyBwd<DType, Lang>(int_target, batchsize, dim,
+ p->block(), t.block(), p->block(), ctx);
}, {p->block(), t.block()}, {p->block()});
});
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/src/core/tensor/tensor_math.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h
index bf913c0..6d42211 100644
--- a/src/core/tensor/tensor_math.h
+++ b/src/core/tensor/tensor_math.h
@@ -347,19 +347,17 @@ void GEMM(const bool transA, const bool transB, const size_t nrowA,
LOG(FATAL) << "GEMM Not Implemented";
}
-/// Divide alpha by each element of 'in'.
-// following the consistency guide.
template <typename DType, typename Lang>
-void ComputeCrossEntropy(const size_t batchsize, const size_t dim,
- const Block *p, const Block *t, Block *loss,
- Context *ctx) {
+void ComputeCrossEntropy(bool int_target, const size_t batchsize,
+ const size_t dim, const Block *p, const Block *t,
+ Block *loss, Context *ctx) {
LOG(FATAL) << "Not Implemented";
}
template <typename DType, typename Lang>
-void SoftmaxCrossEntropyBwd(const size_t batchsize, const size_t dim,
- const Block *p, const Block *t, Block *grad,
- Context *ctx) {
+void SoftmaxCrossEntropyBwd(bool int_target, const size_t batchsize,
+ const size_t dim, const Block *p, const Block *t,
+ Block *grad, Context *ctx) {
LOG(FATAL) << "Not Implemented";
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/src/core/tensor/tensor_math_cpp.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h
index f7e2b37..5167fba 100644
--- a/src/core/tensor/tensor_math_cpp.h
+++ b/src/core/tensor/tensor_math_cpp.h
@@ -566,23 +566,39 @@ void GEMV<float, lang::Cpp>(bool trans, const size_t m, const size_t n,
#endif // USE_CBLAS
template <>
-void ComputeCrossEntropy<float, lang::Cpp>(const size_t batchsize,
+void ComputeCrossEntropy<float, lang::Cpp>(bool int_target,
+ const size_t batchsize,
const size_t dim, const Block *p,
const Block *t, Block *loss,
Context *ctx) {
const float *pPtr = static_cast<const float *>(p->data());
const int *tPtr = static_cast<const int *>(t->data());
float *lossPtr = static_cast<float *>(loss->mutable_data());
- for (size_t i = 0; i < batchsize; i++) {
- int truth_idx = tPtr[i];
- CHECK_GE(truth_idx, 0);
- float prob_of_truth = pPtr[i * dim + truth_idx];
- lossPtr[i] = -std::log((std::max)(prob_of_truth, FLT_MIN));
+ if (int_target) {
+ for (size_t i = 0; i < batchsize; i++) {
+ int truth_idx = tPtr[i];
+ CHECK_GE(truth_idx, 0);
+ float prob_of_truth = pPtr[i * dim + truth_idx];
+ lossPtr[i] = -std::log((std::max)(prob_of_truth, FLT_MIN));
+ }
+ } else {
+ for (size_t i = 0;i < batchsize; i++) {
+ float sum = 0.f;
+ for (size_t j = 0; j < dim; j++) {
+ sum += tPtr[i * dim + j];
+ }
+ float loss = 0.f;
+ for (size_t j = 0, offset = i * dim; j < dim; j++, offset++) {
+ loss -= tPtr[offset] / sum * std::log((std::max)(pPtr[offset], FLT_MIN));
+ }
+ lossPtr[i] = loss;
+ }
}
}
template <>
-void SoftmaxCrossEntropyBwd<float, lang::Cpp>(const size_t batchsize,
+void SoftmaxCrossEntropyBwd<float, lang::Cpp>(bool int_target,
+ const size_t batchsize,
const size_t dim, const Block *p,
const Block *t, Block *grad,
Context *ctx) {
@@ -591,10 +607,22 @@ void SoftmaxCrossEntropyBwd<float, lang::Cpp>(const size_t batchsize,
const int *tPtr = static_cast<const int *>(t->data());
float *gradPtr = static_cast<float *>(grad->mutable_data());
- for (size_t i = 0; i < batchsize; i++) {
- int truth_idx = static_cast<int>(tPtr[i]);
- CHECK_GE(truth_idx, 0);
- gradPtr[i * dim + truth_idx] -= 1.0;
+ if (int_target) {
+ for (size_t i = 0; i < batchsize; i++) {
+ int truth_idx = static_cast<int>(tPtr[i]);
+ CHECK_GE(truth_idx, 0);
+ gradPtr[i * dim + truth_idx] -= 1.0;
+ }
+ } else {
+ for (size_t i = 0; i < batchsize; i++) {
+ float sum = 0.f;
+ for (size_t j = 0; j < dim; j++) {
+ sum += tPtr[i * dim + j];
+ }
+ for (size_t j = 0, offset = i * dim; j < dim; j++, offset++) {
+ gradPtr[offset] -= tPtr[offset] / sum;
+ }
+ }
}
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/src/core/tensor/tensor_math_cuda.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h
index 4daa97a..8a9e47a 100644
--- a/src/core/tensor/tensor_math_cuda.h
+++ b/src/core/tensor/tensor_math_cuda.h
@@ -432,17 +432,20 @@ void GEMM<float, lang::Cuda>(const bool transA, const bool transB,
}
template <>
-void ComputeCrossEntropy<float, lang::Cuda>(const size_t batchsize,
+void ComputeCrossEntropy<float, lang::Cuda>(bool int_target,
+ const size_t batchsize,
const size_t dim, const Block* p,
const Block* t, Block* loss,
Context* ctx) {
const float* pPtr = static_cast<const float*>(p->data());
const int* tPtr = static_cast<const int*>(t->data());
float* lossPtr = static_cast<float*>(loss->mutable_data());
- cuda::ComputeCrossEntropy(batchsize, dim, pPtr, tPtr, lossPtr, ctx->stream);
+ cuda::ComputeCrossEntropy(int_target, batchsize, dim, pPtr, tPtr, lossPtr,
+ ctx->stream);
}
template <>
-void SoftmaxCrossEntropyBwd<float, lang::Cuda>(const size_t batchsize,
+void SoftmaxCrossEntropyBwd<float, lang::Cuda>(bool int_target,
+ const size_t batchsize,
const size_t dim, const Block* p,
const Block* t, Block* grad,
Context* ctx) {
@@ -450,7 +453,7 @@ void SoftmaxCrossEntropyBwd<float, lang::Cuda>(const size_t batchsize,
const float* pPtr = static_cast<const float*>(p->data());
const int* tPtr = static_cast<const int*>(t->data());
float* gradPtr = static_cast<float*>(grad->mutable_data());
- cuda::SoftmaxCrossEntropyBwd(batchsize, dim, pPtr, tPtr, gradPtr,
+ cuda::SoftmaxCrossEntropyBwd(int_target, batchsize, dim, pPtr, tPtr, gradPtr,
ctx->stream);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/src/model/loss/softmax_cross_entropy.cc
----------------------------------------------------------------------
diff --git a/src/model/loss/softmax_cross_entropy.cc b/src/model/loss/softmax_cross_entropy.cc
index 3411fbe..6c5d3a8 100644
--- a/src/model/loss/softmax_cross_entropy.cc
+++ b/src/model/loss/softmax_cross_entropy.cc
@@ -26,7 +26,8 @@ Tensor SoftmaxCrossEntropy::Forward(int flag, const Tensor& prediction,
CHECK(buf_.empty()) << "Do not call Forward successively for more than twice."
<< " The calling pattern is [Forward|Evaluate] Backward";
size_t batchsize = 1;
- if (prediction.nDim() > 1) batchsize = prediction.shape().at(0);
+ if (prediction.nDim() == 2)
+ batchsize = prediction.shape(0);
size_t dim = prediction.Size() / batchsize;
const Tensor& input = Reshape(prediction, Shape{batchsize, dim});
Tensor prob = SoftMax(input);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/test/python/test_optimizer.py
----------------------------------------------------------------------
diff --git a/test/python/test_optimizer.py b/test/python/test_optimizer.py
index afdf337..bb3613d 100644
--- a/test/python/test_optimizer.py
+++ b/test/python/test_optimizer.py
@@ -20,7 +20,6 @@ import os
import unittest
import numpy as np
-sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))
import singa.tensor as tensor
import singa.optimizer as opt
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d1110c0b/test/singa/test_cross_entropy.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_cross_entropy.cc b/test/singa/test_cross_entropy.cc
index d63695e..3d704c8 100644
--- a/test/singa/test_cross_entropy.cc
+++ b/test/singa/test_cross_entropy.cc
@@ -31,11 +31,13 @@ class TestSoftmaxCrossEntropy : public ::testing::Test {
virtual void SetUp() {
p.Reshape(singa::Shape{2, 4});
t.Reshape(singa::Shape{2, 1});
+ ta.Reshape(singa::Shape{2, 4});
}
const float pdat[8] = {0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f, 0.1f };
const int tdat[2] = {0, 2};
+ const int tary[8] = {1, 0, 0, 0, 0, 0, 1, 0};
- singa::Tensor p, t;
+ singa::Tensor p, t, ta;
};
TEST_F(TestSoftmaxCrossEntropy, CppForward) {
@@ -52,6 +54,20 @@ TEST_F(TestSoftmaxCrossEntropy, CppForward) {
EXPECT_FLOAT_EQ(ldat[1], result_test);
}
+TEST_F(TestSoftmaxCrossEntropy, CppForwardAryTarget) {
+ p.CopyDataFromHostPtr(pdat, 8);
+ ta.AsType(singa::kInt);
+ ta.CopyDataFromHostPtr(tary, 8);
+
+ singa::SoftmaxCrossEntropy cross_entropy;
+ const Tensor& loss = cross_entropy.Forward(singa::kEval, p, ta);
+ auto ldat = loss.data<float>();
+
+ const float result_test = (float) -log(0.25);
+ EXPECT_FLOAT_EQ(ldat[0], result_test);
+ EXPECT_FLOAT_EQ(ldat[1], result_test);
+}
+
TEST_F(TestSoftmaxCrossEntropy, CppBackward) {
p.CopyDataFromHostPtr(pdat, 8);
t.AsType(singa::kInt);
@@ -72,6 +88,25 @@ TEST_F(TestSoftmaxCrossEntropy, CppBackward) {
EXPECT_FLOAT_EQ(gdat[7], 0.25);
}
+TEST_F(TestSoftmaxCrossEntropy, CppBackwardAryTarget) {
+ p.CopyDataFromHostPtr(pdat, 8);
+ ta.AsType(singa::kInt);
+ ta.CopyDataFromHostPtr(tary, 8);
+
+ singa::SoftmaxCrossEntropy cross_entropy;
+ cross_entropy.Forward(singa::kTrain, p, ta);
+ const Tensor& grad = cross_entropy.Backward();
+
+ auto gdat = grad.data<float>();
+ EXPECT_FLOAT_EQ(gdat[0], -0.75);
+ EXPECT_FLOAT_EQ(gdat[1], 0.25);
+ EXPECT_FLOAT_EQ(gdat[2], 0.25);
+ EXPECT_FLOAT_EQ(gdat[3], 0.25);
+ EXPECT_FLOAT_EQ(gdat[4], 0.25);
+ EXPECT_FLOAT_EQ(gdat[5], 0.25);
+ EXPECT_FLOAT_EQ(gdat[6], -0.75);
+ EXPECT_FLOAT_EQ(gdat[7], 0.25);
+}
#ifdef USE_CUDA
TEST_F(TestSoftmaxCrossEntropy, CudaForward) {
@@ -91,6 +126,24 @@ TEST_F(TestSoftmaxCrossEntropy, CudaForward) {
EXPECT_FLOAT_EQ(ldat[1], result_test);
}
+TEST_F(TestSoftmaxCrossEntropy, CudaForwardAryTarget) {
+ singa::SoftmaxCrossEntropy cross_entropy;
+ auto dev = std::make_shared<singa::CudaGPU>();
+ p.ToDevice(dev);
+ ta.ToDevice(dev);
+ p.CopyDataFromHostPtr(pdat, 8);
+ ta.CopyDataFromHostPtr(tary, 8);
+
+ Tensor loss = cross_entropy.Forward(singa::kEval, p, ta);
+ loss.ToHost();
+ auto ldat = loss.data<float>();
+
+ const float result_test = -log(0.25);
+ EXPECT_FLOAT_EQ(ldat[0], result_test);
+ EXPECT_FLOAT_EQ(ldat[1], result_test);
+}
+
+
TEST_F(TestSoftmaxCrossEntropy, CudaBackward) {
singa::SoftmaxCrossEntropy cross_entropy;
auto dev = std::make_shared<singa::CudaGPU>();
@@ -113,4 +166,28 @@ TEST_F(TestSoftmaxCrossEntropy, CudaBackward) {
EXPECT_FLOAT_EQ(gdat[6], -0.75);
EXPECT_FLOAT_EQ(gdat[7], 0.25);
}
+
+TEST_F(TestSoftmaxCrossEntropy, CudaBackwardAryTarget) {
+ singa::SoftmaxCrossEntropy cross_entropy;
+ auto dev = std::make_shared<singa::CudaGPU>();
+ p.ToDevice(dev);
+ ta.ToDevice(dev);
+ p.CopyDataFromHostPtr(pdat, 8);
+ ta.CopyDataFromHostPtr(tary, 8);
+
+ cross_entropy.Forward(singa::kTrain, p, ta);
+ Tensor grad = cross_entropy.Backward();
+
+ grad.ToHost();
+ auto gdat = grad.data<float>();
+ EXPECT_FLOAT_EQ(gdat[0], -0.75);
+ EXPECT_FLOAT_EQ(gdat[1], 0.25);
+ EXPECT_FLOAT_EQ(gdat[2], 0.25);
+ EXPECT_FLOAT_EQ(gdat[3], 0.25);
+ EXPECT_FLOAT_EQ(gdat[4], 0.25);
+ EXPECT_FLOAT_EQ(gdat[5], 0.25);
+ EXPECT_FLOAT_EQ(gdat[6], -0.75);
+ EXPECT_FLOAT_EQ(gdat[7], 0.25);
+}
+
#endif // USE_CUDA
[2/2] incubator-singa git commit: Check and fix cudnn engine for
concat and slice layer
Posted by zh...@apache.org.
Check and fix cudnn engine for concat and slice layer
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/84811118
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/84811118
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/84811118
Branch: refs/heads/master
Commit: 848111181f7c3d6844c53461c0f9dfd43db47b13
Parents: d1110c0
Author: RUAN0007 <ru...@gmail.com>
Authored: Tue Nov 29 10:46:39 2016 +0800
Committer: RUAN0007 <ru...@gmail.com>
Committed: Tue Nov 29 10:46:39 2016 +0800
----------------------------------------------------------------------
python/singa/layer.py | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/84811118/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/python/singa/layer.py b/python/singa/layer.py
index 0244454..95b78c9 100644
--- a/python/singa/layer.py
+++ b/python/singa/layer.py
@@ -814,7 +814,10 @@ class Concat(Layer):
self.in_shapes = input_sample_shapes
self.axis = axis
self.conf.concat_conf.axis = axis
- self.layer = _create_layer(engine, 'Concat')
+ if engine == "cudnn":
+ self.layer = _create_layer('singacuda', 'Concat')
+ else:
+ self.layer = _create_layer(engine, 'Concat')
if input_sample_shapes is not None:
self.setup(input_sample_shapes)
@@ -836,7 +839,10 @@ class Slice(Layer):
self.axis = axis
self.conf.slice_conf.axis = axis
self.conf.slice_conf.slice_point.extend(slice_point)
- self.layer = _create_layer(engine, 'Slice')
+ if engine == "cudnn":
+ self.layer = _create_layer('singacuda', 'Slice')
+ else:
+ self.layer = _create_layer(engine, 'Slice')
if input_sample_shape is not None:
self.setup(input_sample_shape)