You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2018/07/11 08:29:36 UTC
[4/4] incubator-singa git commit: SINGA-380) Fix bugs from Reshape
SINGA-380) Fix bugs from Reshape
Update reshape API in C++ and Python.
C++ Tensor method reshape changes original tensor;
All other reshape method returns a new tensor (which shares memory with the original tensor if possible).
APIs for transpose are updated in the same way.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b30d7ea5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b30d7ea5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b30d7ea5
Branch: refs/heads/master
Commit: b30d7ea55cd58bb0858aa354833c1ba9a3242470
Parents: 58e6640
Author: Wang Wei <wa...@gmail.com>
Authored: Mon Jul 9 23:52:10 2018 +0800
Committer: wang wei <wa...@comp.nus.edu.sg>
Committed: Wed Jul 11 15:24:27 2018 +0800
----------------------------------------------------------------------
examples/autograd/mnist_cnn.py | 17 +-
examples/cifar10/cnn-parallel.cc | 8 +-
examples/cifar10/vgg-parallel.cc | 8 +-
examples/imagenet/alexnet/alexnet.cc | 2 +-
examples/imagenet/alexnet/ilsvrc12.h | 16 +-
include/singa/core/tensor.h | 162 ++++----
python/singa/autograd.py | 273 +++++++-------
python/singa/tensor.py | 109 +++---
src/api/core_tensor.i | 19 +-
src/core/tensor/tensor.cc | 297 ++++-----------
src/core/tensor/tensor_math.h | 2 +-
src/core/tensor/tensor_math_cuda.h | 323 ++++------------
src/io/image_transformer.cc | 573 ++++++++++++++---------------
src/model/layer/batchnorm.cc | 15 +-
src/model/layer/convolution.cc | 8 +-
src/model/layer/cudnn_batchnorm.cc | 4 +-
src/model/layer/dense.cc | 14 +-
src/model/layer/flatten.cc | 3 +-
src/model/layer/lrn.cc | 9 +-
src/model/layer/opencl_convolution.cc | 58 +--
src/model/layer/rnn.cc | 2 +-
src/model/operation/convolution.cc | 67 ++--
src/model/updater/local_updater.cc | 4 +-
23 files changed, 849 insertions(+), 1144 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/examples/autograd/mnist_cnn.py
----------------------------------------------------------------------
diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py
index 43a22ba..f78ccc8 100755
--- a/examples/autograd/mnist_cnn.py
+++ b/examples/autograd/mnist_cnn.py
@@ -84,7 +84,7 @@ if __name__ == '__main__':
dev = device.get_default_device()
else:
print('Using GPU')
- dev = device.create_cuda_gpu()
+ dev = device.create_cuda_gpu_on(1)
train, test = load_data(args.file_path)
@@ -92,7 +92,7 @@ if __name__ == '__main__':
num_classes = 10
epochs = 1
- sgd = optimizer.SGD(0.001)
+ sgd = optimizer.SGD(0.01)
x_train = preprocess(train[0])
y_train = to_categorical(train[1], num_classes)
@@ -111,7 +111,6 @@ if __name__ == '__main__':
def forward(x, t):
-
y = conv1(x)
y = autograd.relu(y)
y = autograd.max_pool_2d(y)
@@ -124,11 +123,11 @@ if __name__ == '__main__':
return loss, y
autograd.training = True
- for epoch in range(50):
+ for epoch in range(epochs):
for i in range(batch_number):
inputs = tensor.Tensor(device=dev, data=x_train[ i * 100:(1 + i) * 100], stores_grad=False)
targets = tensor.Tensor(device=dev, data=y_train[i * 100:(1 + i) * 100], requires_grad=False, stores_grad=False)
-
+
loss, y = forward(inputs, targets)
accuracy_rate = accuracy(tensor.to_numpy(y),
@@ -136,12 +135,6 @@ if __name__ == '__main__':
if (i % 5 == 0):
print('accuracy is:', accuracy_rate, 'loss is:',
tensor.to_numpy(loss)[0])
-
+
for p, gp in autograd.backward(loss):
sgd.apply(epoch, gp, p, '')
-
-
-
-
-
-
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/examples/cifar10/cnn-parallel.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/cnn-parallel.cc b/examples/cifar10/cnn-parallel.cc
index 8cc3352..4bee575 100644
--- a/examples/cifar10/cnn-parallel.cc
+++ b/examples/cifar10/cnn-parallel.cc
@@ -154,20 +154,20 @@ void Train(float lr, int num_epoch, string data_dir) {
train_y = train.second;
LOG(INFO) << "Slicing training data...";
- train_x_1.Reshape(Shape{nsamples / 2, train.first.shape(1),
+ train_x_1 = Tensor(Shape{nsamples / 2, train.first.shape(1),
train.first.shape(2), train.first.shape(3)});
LOG(INFO) << "Copying first data slice...";
CopyDataToFrom(&train_x_1, train_x, train_x.Size() / 2);
- train_x_2.Reshape(Shape{nsamples / 2, train.first.shape(1),
+ train_x_2 = Tensor(Shape{nsamples / 2, train.first.shape(1),
train.first.shape(2), train.first.shape(3)});
LOG(INFO) << "Copying second data slice...";
CopyDataToFrom(&train_x_2, train_x, train_x.Size() / 2, 0,
train_x.Size() / 2);
- train_y_1.Reshape(Shape{nsamples / 2});
+ train_y_1 = Tensor(Shape{nsamples / 2});
train_y_1.AsType(kInt);
LOG(INFO) << "Copying first label slice...";
CopyDataToFrom(&train_y_1, train_y, train_y.Size() / 2);
- train_y_2.Reshape(Shape{nsamples / 2});
+ train_y_2 = Tensor(Shape{nsamples / 2});
train_y_2.AsType(kInt);
LOG(INFO) << "Copying second label slice...";
CopyDataToFrom(&train_y_2, train_y, train_y.Size() / 2, 0,
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/examples/cifar10/vgg-parallel.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/vgg-parallel.cc b/examples/cifar10/vgg-parallel.cc
index 90e9fce..33c533b 100644
--- a/examples/cifar10/vgg-parallel.cc
+++ b/examples/cifar10/vgg-parallel.cc
@@ -223,20 +223,20 @@ void Train(float lr, int num_epoch, string data_dir) {
train_y = train.second;
LOG(INFO) << "Slicing training data...";
- train_x_1.Reshape(Shape{nsamples / 2, train.first.shape(1),
+ train_x_1 = Tensor(Shape{nsamples / 2, train.first.shape(1),
train.first.shape(2), train.first.shape(3)});
LOG(INFO) << "Copying first data slice...";
CopyDataToFrom(&train_x_1, train_x, train_x.Size() / 2);
- train_x_2.Reshape(Shape{nsamples / 2, train.first.shape(1),
+ train_x_2 = Tensor(Shape{nsamples / 2, train.first.shape(1),
train.first.shape(2), train.first.shape(3)});
LOG(INFO) << "Copying second data slice...";
CopyDataToFrom(&train_x_2, train_x, train_x.Size() / 2, 0,
train_x.Size() / 2);
- train_y_1.Reshape(Shape{nsamples / 2});
+ train_y_1 = Tensor(Shape{nsamples / 2});
train_y_1.AsType(kInt);
LOG(INFO) << "Copying first label slice...";
CopyDataToFrom(&train_y_1, train_y, train_y.Size() / 2);
- train_y_2.Reshape(Shape{nsamples / 2});
+ train_y_2 = Tensor(Shape{nsamples / 2});
train_y_2.AsType(kInt);
LOG(INFO) << "Copying second label slice...";
CopyDataToFrom(&train_y_2, train_y, train_y.Size() / 2, 0,
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/examples/imagenet/alexnet/alexnet.cc
----------------------------------------------------------------------
diff --git a/examples/imagenet/alexnet/alexnet.cc b/examples/imagenet/alexnet/alexnet.cc
index 4ac1130..2d8db2d 100644
--- a/examples/imagenet/alexnet/alexnet.cc
+++ b/examples/imagenet/alexnet/alexnet.cc
@@ -174,7 +174,7 @@ void TrainOneEpoch(FeedForwardNet &net, ILSVRC &data,
size_t b = 0;
size_t n_read;
Timer timer, ttr;
- Tensor prefetch_x, prefetch_y;
+ Tensor prefetch_x(Shape{batchsize, 3, kCropSize, kCropSize}), prefetch_y(Shape{batchsize}, kInt);
string binfile = bin_folder + "/train1.bin";
timer.Tick();
data.LoadData(kTrain, binfile, batchsize, &prefetch_x, &prefetch_y, &n_read,
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/examples/imagenet/alexnet/ilsvrc12.h
----------------------------------------------------------------------
diff --git a/examples/imagenet/alexnet/ilsvrc12.h b/examples/imagenet/alexnet/ilsvrc12.h
index 74fffbb..05b3451 100644
--- a/examples/imagenet/alexnet/ilsvrc12.h
+++ b/examples/imagenet/alexnet/ilsvrc12.h
@@ -43,6 +43,12 @@
using std::string;
using namespace singa::io;
namespace singa {
+
+ /// size for resizing
+const size_t kImageSize = 256;
+const size_t kImageNBytes = 3 * kImageSize * kImageSize;
+/// size for cropping
+const size_t kCropSize = 227;
/// For reading ILSVRC2012 image data as tensors.
class ILSVRC {
public:
@@ -105,11 +111,7 @@ class ILSVRC {
void WriteMean(Tensor &mean, string path);
private:
- /// size for resizing
- const size_t kImageSize = 256;
- const size_t kImageNBytes = 3 * kImageSize * kImageSize;
- /// size for cropping
- const size_t kCropSize = 227;
+
Tensor mean;
string last_read_file = "";
@@ -299,9 +301,7 @@ std::thread ILSVRC::AsyncLoadData(int flag, string file, size_t read_size,
size_t ILSVRC::LoadData(int flag, string file, size_t read_size, Tensor *x,
Tensor *y, size_t *n_read, int nthreads) {
- x->Reshape(Shape{read_size, 3, kCropSize, kCropSize});
- y->AsType(kInt);
- y->Reshape(Shape{read_size});
+
if (file != last_read_file) {
if (reader != nullptr) {
reader->Close();
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index 5921762..a73821c 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -57,47 +57,38 @@ class Tensor {
public:
~Tensor();
Tensor();
- explicit Tensor(Shape &&shape, DataType dtype = kFloat32);
+
+ /// Constructor using default device.
explicit Tensor(const Shape &shape, DataType dtype = kFloat32);
- Tensor(Shape &&shape,
- std::shared_ptr<Device> dev,
- DataType dtype = kFloat32);
+ /// Constructor with shape, device and data type
Tensor(const Shape &shape,
std::shared_ptr<Device> dev,
DataType dtype = kFloat32);
- /// Copy Tensor to share the internal data. No deep copy.
+ /// Copy constructor. No deep copy.
Tensor(const Tensor &from);
- /// Copy Tensor to share the internal data. No deep copy.
- /// For 2 tensors sharing same block but different strides.
- Tensor(const Tensor &from, Shape &new_shape, vector<int> &new_strides);
- /// Copy Tensor to share the internal data. No deep copy.
+
+ /// Move constructor. No deep copy.
Tensor(Tensor &&from);
+ // --------------------------------------------------------------------------
+ // ---Following methods return info of the class without making any changes--
+ // --------------------------------------------------------------------------
+
/// For functions in xx_math.cc to access the block.
/// Users should not operate against Block directly.
/// block_ is allocated in constructors.
Block *block() const { return block_; }
- void SetBlock(Block *block);
std::shared_ptr<Device> device() const { return device_; }
- /// return immutable Tensor values with given type.
+ /// Return immutable Tensor values with given type.
template <typename SType>
const SType *data() const {
return static_cast<const SType *>(block()->data());
}
- /// used for swig code to convert Tensor into numpy array.
- /// It gets data into 'value'
- template <typename SType>
- void GetValue(SType *value, const size_t num) {
- CHECK(device_ == defaultDevice);
- const SType* ptr = data<SType>();
- for (size_t i = 0; i < num; i++) value[i] = ptr[i];
- }
-
/// data type, including kFloat16, kFloat32, kInt
const DataType data_type() const { return data_type_; }
@@ -113,28 +104,55 @@ class Tensor {
bool empty() const { return nDim() == 0; }
/// Check if the tensor's last stride==1
- bool transpose() const { return (strides_.back() != 1); }
+ bool transpose() const {
+ if (!strides_.empty()) {
+ auto last = strides_.front();
+ for (auto s : strides_) {
+ if (s > last)
+ return true;
+ last = s;
+ }
+ }
+ return false;
+ }
const vector<int>& strides() const { return strides_; }
- /// return true if the content of the tensor is initialized
+ /// Return true if the content of the tensor is initialized
bool initailized() const {
return block_ != nullptr && block_->initialized();
}
- /// return number of total elements
+ /// Return number of total elements
size_t Size() const {
if (block_ == nullptr) return 0u;
CHECK_EQ(block_->size() % SizeOf(data_type_), 0u);
return block_->size() / SizeOf(data_type_);
}
- /// return memory size (i.e., Bytes)
+ /// Return memory size (i.e., Bytes)
size_t MemSize() const { return block_->size(); }
- /// Reset the tensor shape, it may reallocate block, if MemSize() changes.
- Tensor Reshape(const Shape &shape);
- Tensor Reshape(Shape &&shape);
+ /// used for swig code to convert Tensor into numpy array.
+ /// It gets data into 'value'
+ template <typename SType>
+ void GetValue(SType *value, const size_t num) {
+ CHECK(device_ == defaultDevice);
+ const SType* ptr = data<SType>();
+ for (size_t i = 0; i < num; i++) value[i] = ptr[i];
+ }
+
+ /// Serialize data, shape and transpose to protobuf object.
+ void ToProto(singa::TensorProto *proto) const;
+
+ /// Return average L1 norm
+ float L1() const;
+
+ /// Return average L2 norm
+ float L2() const;
+ // --------------------------------------------------------------------------
+ // ---Following methods changes the internal members
+ // --------------------------------------------------------------------------
/// Reset the shape, device, and data type as given tensor.
/// If block size changes, then reallocate a new block.
@@ -155,6 +173,8 @@ class Tensor {
template <typename SType>
void SetValue(const SType x);
+ void SetShape(const Shape& shape);
+
/// For init the tensor values, copy 'num' elements from 'src' to the internal
/// memory with 'offset' (elements).
template <typename SType>
@@ -165,46 +185,41 @@ class Tensor {
/// Meta data would not be copied!
void CopyData(const Tensor &other);
- void RepeatData(vector<size_t> repeats, int axis, int total_repeats, const Tensor &other);
-
/// Deserialize data, shape and transpose from protobuf object.
void FromProto(const singa::TensorProto &proto);
- /// Serialize data, shape and transpose to protobuf object.
- void ToProto(singa::TensorProto *proto) const;
- /// return an exactly the same Tensor with data been deep copied to the given
- /// device. If 'device' is nullptr, then clone it one the current device.
- Tensor Clone(std::shared_ptr<Device> device = nullptr) const;
+ /// TODO(wangwei) merge RepeatData into Repeat?
+ void RepeatData(const vector<size_t>& repeats, int axis, int total_repeats,
+ const Tensor &other);
- Tensor Repeat(vector<size_t> repeats, int axis, std::shared_ptr<Device> device = nullptr) ;
+ // --------------------------------------------------------------------------
+ // ---Following methods returns a new Tensor without change original tensor
+ // --------------------------------------------------------------------------
- // Tensor operations
-
- /// Matrix transpose. Valid only if shape.size() == 2.
- /// No data copy, just set the transpose_ filed of the returned tensor.
- Tensor T() const;
-
- /// Reverse the shape vector
- Tensor Transpose() const;
+ Tensor Repeat(const vector<size_t>& repeats, int axis,
+ std::shared_ptr<Device> device = nullptr);
- /// Change the axes
- Tensor Transpose(const vector<size_t> &axes) const;
+ /// return an exactly the same Tensor with data been deep copied to the given
+ /// device. If 'device' is nullptr, then clone it one the current device.
+ Tensor Clone(std::shared_ptr<Device> device = nullptr) const;
- /// Copy the meta info with data block shared.
+ // --------------------------------------------------------------------------
+ // ---Following methods change the tensor and return itself
+ // --------------------------------------------------------------------------
+ /// Copy assignment
Tensor &operator=(const Tensor &in);
- /// Copy the meta info with data block shared.
+ /// Move assignment
Tensor &operator=(Tensor &&in);
Tensor &operator+=(const Tensor &in);
- // void operator+=(Tensor&& in);
+
Tensor &operator-=(const Tensor &in);
- // void operator-=(Tensor&& in);
+
Tensor &operator*=(const Tensor &in);
- // void operator*=(Tensor&& in);
+
Tensor &operator/=(const Tensor &in);
- // void operator/=(Tensor&& in);
// Scalar operations.
@@ -224,10 +239,19 @@ class Tensor {
template <typename SType>
Tensor &operator/=(const SType x);
- /// Return average L1 norm
- float L1() const;
- /// Return average L2 norm
- float L2() const;
+ /// change the shape (and stride); the block may be reallocated.
+ Tensor &Reshape(const Shape &shape);
+
+ /// Matrix transpose. Valid only if shape.size() == 2.
+ Tensor& T();
+
+ /// Reverse the shape vector
+ Tensor& Transpose();
+
+ /// Change the axes
+ Tensor& Transpose(const vector<size_t> &axes);
+
+ protected:
//generate strides automatically if stride field is not passed
void generate_strides() {
@@ -259,10 +283,10 @@ class Tensor {
vector<int> strides_ = {};
}; //end of tensor class
+
inline size_t Product(const Shape &shape, int start = 0, size_t len = 0) {
if (len == 0) len = shape.size();
- if (len == 0)
- return 0;
+ if (len == 0) return 0;
CHECK_LE(len, shape.size());
size_t v = 1;
for (unsigned int i = start; i < len; i++) v *= shape[i];
@@ -275,24 +299,31 @@ inline void CheckDataTypeAndLang(const Tensor &in1, const Tensor &in2) {
CHECK_EQ(in1.device()->lang(), in2.device()->lang());
}
+
template <typename FromType, typename ToType>
ToType TypeCast(const FromType &x) {
// TODO(wangwei) cast fp16; prevent some casts, e.g., float to char
return static_cast<ToType>(x);
}
+
+/// Reshape the given tensor and generate a new tensor,
+/// which shares the memory with in if possible
Tensor Reshape(const Tensor &in, const Shape &s);
-Tensor Reshape(const Tensor &in, Shape &&s);
-// For tensors with sparse content, e.g., missing columns or rows.
-// class SparseTensor : public Tensor {};
+/// Reverse the shape vector
+Tensor Transpose(const Tensor& in);
+
+/// Change the axes
+Tensor Transpose(const Tensor& in, const vector<size_t> &axes);
/// Copy 'num' elements of src to dst.
/// The first 'src_offset' ('dst_offset') elements will be skipped.
void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
const size_t dst_offset = 0, const size_t src_offset = 0);
-void RepeatDataToFrom(bool broadcast_flag, vector<size_t> repeats, int axis,
+
+void RepeatDataToFrom(bool broadcast_flag, const vector<size_t>& repeats, int axis,
Tensor *dst, const Tensor &in, const size_t num);
// =============Element-wise operations====================================
@@ -411,6 +442,8 @@ void Div(const SType x, const Tensor &in, Tensor *out);
template <typename SType = float>
SType Sum(const Tensor &in);
+
+
// ============Matrix (row/column) operations==================================
/// Average elements in the Tensor, currently only support vector and matrix.
/// if 'axis' is 0, average all rows into a single row
@@ -510,8 +543,8 @@ void SoftmaxCrossEntropyBwd(const Tensor &t, Tensor *p);
/// To be called by pysinga autograd operations;
/// swig ignores the const qualifier http://www.swig.org/Doc3.0/SWIGPlus.html#SWIGPlus_const
-const Tensor CrossEntropyFwd(const Tensor& p, const Tensor& t);
-const Tensor SoftmaxCrossEntropyBwd(const Tensor& p, const Tensor& t);
+Tensor CrossEntropyFwd(const Tensor& p, const Tensor& t);
+Tensor SoftmaxCrossEntropyBwd(const Tensor& p, const Tensor& t);
/// Return a tensor consisting of rows ([start, end)) from 'in'. It copies the
/// values from 'in'. 'in' ia a 2D Tensor.
@@ -519,7 +552,8 @@ Tensor CopyRows(const Tensor &in, const size_t start, const size_t end);
/// Alias of CopyRows
Tensor SliceRows(const Tensor &in, const size_t start, const size_t end);
/// Slice the input tensor along the give axis to generate a new tensor
-Tensor SliceOn(const Tensor &in, const size_t start, const size_t end, int axis);
+Tensor SliceOn(const Tensor &in, const size_t start, const size_t end,
+ int axis);
/// Return a tensor consisting of columns ([start, end)) from 'in'. It copies
/// the values from 'in'. 'in' is a 2D Tensor.
Tensor CopyColumns(const Tensor &in, const size_t start, const size_t end);
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index 63698c2..aa6b37a 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -33,6 +33,126 @@ CTensor = singa.Tensor
training = False
+
+def infer_dependency(op):
+ '''
+ Infer the dependency of all operations with the
+ given op as the last operation.
+
+ Operation A is depending on B is A uses the output(s) of B.
+
+ Args:
+ op: an Operation instance, e.g. the loss operation.
+
+ Return:
+ a Counter instance with the operation as the key,
+ and the number of operations that are depending on it as the value
+ '''
+ # dependency = {}
+ dependency_count = Counter()
+ queue = deque([op])
+ while len(queue) > 0:
+ cur_op = queue.pop()
+ for src_op, _, _, _ in cur_op.src:
+ if src_op not in dependency_count and \
+ (not isinstance(src_op, Dummy)):
+ # dependency[src_op] = [Counter() for _ in src_op.y_id2idx]
+ dependency_count[src_op] = 0
+ queue.append(src_op)
+ # y_idx = src_op.y_id2idx[x_id]
+ # dependency[src_op][y_idx][cur_op] += 1
+ dependency_count[src_op] += 1
+ return dependency_count
+
+
+def gradients(y, dy=None):
+ grads = {} # mapping: x->dx if x.stores_grad
+ for p, dp in backward(y, dy):
+ gradients[p] = dp
+ return grads
+
+
+def backward(y, dy=None):
+ '''
+ Run the backward propagation starting at y.
+
+ Args:
+ y: a Tensor instance, usually the loss
+ dy: a number or a Tensor instance, for the gradient of the
+ objective/loss w.r.t y, usually 1.0
+
+ Return:
+ a dictionary storing the gradient tensors of all tensors
+ whose stores_grad is true (e.g. parameter tensors)
+ '''
+ dependency = infer_dependency(y.creator)
+ assert y.size() == 1, 'y must be a Tensor with a single value;'\
+ 'size of y is % d' % y.size()
+
+ # by default the dy is a tensor with 1.0 for each sample;
+ if dy is None:
+ dy = float(1.0)
+ elif isinstance(dy, Tensor):
+ dy = dy.data
+ else:
+ dy = float(dy)
+
+ # ready is a queue of (operation, dy list)
+ ready = deque([(y.creator, (dy,))])
+ not_ready = {} # mapping: op->[dy]
+
+ if y.stores_grad:
+ gradients[y] = dy
+
+ while len(ready) > 0:
+ op, dys = ready.pop()
+ if not op.requires_grad or isinstance(op, Dummy):
+ continue
+ # if not isinstance(op, tensor.Dummy):
+ dxs = op._do_backward(*dys)
+ # TODO src and dx must match
+ assert len(op.src) == len(dxs), \
+ 'the number of src ops (=%d) and dx (=%d) not match' \
+ % (len(op.src), len(dxs))
+ for (src_op, x_id, y, y_stores_grad), dx in zip(op.src, dxs):
+ # prefix x is w.r.t op; prefix y is w.r.t src_op.
+ # x_id is the python id of one input arg of src_op, denoted as x.
+ # y_idx (below) is the index of x among the outputs of src_op.
+ # not_ready[src_op][y_idx] records the intermediate gradient
+ # of the y_idx'th output of src_op. 'intermediate gradient'
+ # indicates that if this output is used in multiple children
+ # operations, then we have to add the graident (dx) from all these
+ # children operations. When src_op is ready, it means that
+ # the gradient of all its outputs are available, i.e. all children
+ # operations have been backwarded.
+ # y is None if y.stores_grad is false; otherwise it is a Tensor
+ y_idx = src_op.y_id2idx[x_id]
+ if src_op not in not_ready:
+ # src_op may have mulitple outputs
+ not_ready[src_op] = [None for _ in src_op.y_id2idx]
+ not_ready[src_op][y_idx] = dx
+ else:
+ dxs = not_ready[src_op]
+ if dxs[y_idx] is None:
+ dxs[y_idx] = dx
+ else:
+ # add the gradient from another children operation that
+ # uses y_idx'th output of src_op as input arg
+ dxs[y_idx] += dx
+ if y_stores_grad:
+ # store the gradient for final return, e.g. if x is parameter
+ g = not_ready[src_op][y_idx]
+ tg = Tensor(device=g.device(), data=g)
+ yield (y, tg)
+ dependency[src_op] -= 1
+ if src_op.requires_grad is True:
+ if dependency[src_op] == 0:
+ if not isinstance(src_op, Dummy):
+ ready.append((src_op, not_ready[src_op]))
+ del not_ready[src_op]
+ del op # delete the operation to free all tensors from this op
+
+
class Operation(object):
'''
An operation includes the forward and backward function of
@@ -194,8 +314,8 @@ class Matmul(Operation):
Returns:
a tuple for (dx, dw)
'''
- return singa.Mult(dy, self.input[1].T()), \
- singa.Mult(self.input[0].T(), dy)
+ return singa.Mult(dy, singa.DefaultTranspose(self.input[1])), \
+ singa.Mult(singa.DefaultTranspose(self.input[0]), dy)
def matmul(x, w):
@@ -268,12 +388,12 @@ class SoftMax(Operation):
the result Tensor
'''
if self.axis == 1:
- x = x.T()
+ x = singa.DefaultTranspose(x)
self.output = singa.SoftMax(x)
if self.axis == 0:
return self.output
elif self.axis == 1:
- return self.output.T()
+ return singa.DefaultTranspose(self.output)
def backward(self, dy):
'''
@@ -286,7 +406,7 @@ class SoftMax(Operation):
'''
# calculations are made on numpy array
if self.axis == 1:
- dy = dy.T()
+ dy = singa.DefaultTranspose(dy)
grad = ctensor2numpy(dy)
output = ctensor2numpy(self.output)
out_1 = np.einsum('ki,ki->ki', grad, output)
@@ -298,14 +418,14 @@ class SoftMax(Operation):
if self.axis == 0:
return dx
elif self.axis == 1:
- return dx.T()
+ return singa.DefaultTranspose(dx)
def soft_max(x, axis=0):
return SoftMax(axis)(x)[0]
-class NLL(Operation):
+class CrossEntropy(Operation):
'''
Calculte negative log likelihood loss for a batch of training data.
@@ -350,12 +470,11 @@ class NLL(Operation):
pass # TODO, broadcast elementwise multiply seems not support
-def nll(y, t):
- return NLL()(y, t)[0]
+def cross_entropy(y, t):
+ return CrossEntropy()(y, t)[0]
class SoftMaxCrossEntropy(Operation):
-
def forward(self, x, t):
self.p = singa.SoftMax(x)
self.t = t
@@ -365,7 +484,8 @@ class SoftMaxCrossEntropy(Operation):
return loss
def backward(self, dy=1.0):
- return singa.SoftmaxCrossEntropyBwd(self.p, self.t), None
+ dx = singa.SoftmaxCrossEntropyBwd(self.p, self.t)
+ return singa.DivFloat(dx, float(self.p.shape()[0])), None
def softmax_cross_entropy(x, t):
@@ -448,11 +568,11 @@ class Flatten(Operation):
def forward(self, x):
# TODO Do flatten start from axis != 1
self.shape = list(x.shape())
- y = x.Reshape((x.shape()[0], x.Size() // x.shape()[0]))
+ y = singa.Reshape(x, (x.shape()[0], x.Size() // x.shape()[0]))
return y
def backward(self, dy):
- dx = dy.Reshape(self.shape)
+ dx = singa.Reshape(dy, self.shape)
return dx
@@ -466,11 +586,7 @@ class _Conv2D(Operation):
self.handle = handle
def forward(self, x, W, b):
- #assert x.nDim() == 4, 'The dimensions of input should be 4D.'
- #assert x.shape()[1] == self.in_channels, 'in_channels dismatched.'
- #assert (xs[0].shape()[2]+2*self.padding[0]-self.kernel_size[0])%self.stride[0] == 0, 'invalid padding.'
- #assert (xs[0].shape()[3]+2*self.padding[1]-self.kernel_size[1])%self.stride[1] == 0, 'invalid padding'
- #assert 0 == 0, 'invalid padding'
+ assert x.nDim() == 4, 'The dimensions of input should be 4D.'
if training:
if self.handle.bias_term:
@@ -517,125 +633,6 @@ def conv2d(x, W, b, handle):
return _Conv2D(handle)(x, W, b)[0]
-def infer_dependency(op):
- '''
- Infer the dependency of all operations with the
- given op as the last operation.
-
- Operation A is depending on B is A uses the output(s) of B.
-
- Args:
- op: an Operation instance, e.g. the loss operation.
-
- Return:
- a Counter instance with the operation as the key,
- and the number of operations that are depending on it as the value
- '''
- # dependency = {}
- dependency_count = Counter()
- queue = deque([op])
- while len(queue) > 0:
- cur_op = queue.pop()
- for src_op, _, _, _ in cur_op.src:
- if src_op not in dependency_count and \
- (not isinstance(src_op, Dummy)):
- # dependency[src_op] = [Counter() for _ in src_op.y_id2idx]
- dependency_count[src_op] = 0
- queue.append(src_op)
- # y_idx = src_op.y_id2idx[x_id]
- # dependency[src_op][y_idx][cur_op] += 1
- dependency_count[src_op] += 1
- return dependency_count
-
-
-def gradients(y, dy=None):
- grads = {} # mapping: x->dx if x.stores_grad
- for p, dp in backward(y, dy):
- gradients[p] = dp
- return grads
-
-
-def backward(y, dy=None):
- '''
- Run the backward propagation starting at y.
-
- Args:
- y: a Tensor instance, usually the loss
- dy: a number or a Tensor instance, for the gradient of the
- objective/loss w.r.t y, usually 1.0
-
- Return:
- a dictionary storing the gradient tensors of all tensors
- whose stores_grad is true (e.g. parameter tensors)
- '''
- dependency = infer_dependency(y.creator)
- assert y.size() == 1, 'y must be a Tensor with a single value;'\
- 'size of y is % d' % y.size()
-
- # by default the dy is a tensor with 1.0 for each sample;
- if dy is None:
- dy = float(1.0)
- elif isinstance(dy, Tensor):
- dy = dy.data
- else:
- dy = float(dy)
-
- # ready is a queue of (operation, dy list)
- ready = deque([(y.creator, (dy,))])
- not_ready = {} # mapping: op->[dy]
-
- if y.stores_grad:
- gradients[y] = dy
-
- while len(ready) > 0:
- op, dys = ready.pop()
- if not op.requires_grad or isinstance(op, Dummy):
- continue
- # if not isinstance(op, tensor.Dummy):
- dxs = op._do_backward(*dys)
- # TODO src and dx must match
- assert len(op.src) == len(dxs), \
- 'the number of src ops (=%d) and dx (=%d) not match' \
- % (len(op.src), len(dxs))
- for (src_op, x_id, y, y_stores_grad), dx in zip(op.src, dxs):
- # prefix x is w.r.t op; prefix y is w.r.t src_op.
- # x_id is the python id of one input arg of src_op, denoted as x.
- # y_idx (below) is the index of x among the outputs of src_op.
- # not_ready[src_op][y_idx] records the intermediate gradient
- # of the y_idx'th output of src_op. 'intermediate gradient'
- # indicates that if this output is used in multiple children
- # operations, then we have to add the graident (dx) from all these
- # children operations. When src_op is ready, it means that
- # the gradient of all its outputs are available, i.e. all children
- # operations have been backwarded.
- # y is None if y.stores_grad is false; otherwise it is a Tensor
- y_idx = src_op.y_id2idx[x_id]
- if src_op not in not_ready:
- # src_op may have mulitple outputs
- not_ready[src_op] = [None for _ in src_op.y_id2idx]
- not_ready[src_op][y_idx] = dx
- else:
- dxs = not_ready[src_op]
- if dxs[y_idx] is None:
- dxs[y_idx] = dx
- else:
- # add the gradient from another children operation that
- # uses y_idx'th output of src_op as input arg
- dxs[y_idx] += dx
- if y_stores_grad:
- # store the gradient for final return, e.g. if x is parameter
- g = not_ready[src_op][y_idx]
- tg = Tensor(device=g.device(), data=g)
- yield (y, tg)
- dependency[src_op] -= 1
- if src_op.requires_grad is True:
- if dependency[src_op] == 0:
- if not isinstance(src_op, Dummy):
- ready.append((src_op, not_ready[src_op]))
- del not_ready[src_op]
- del op # delete the operation to free all tensors from this op
-
-
class Layer(object):
def __init__(self):
@@ -651,8 +648,6 @@ class Layer(object):
class Linear(Layer):
def __init__(self, in_features, out_features, bias=True):
- #self.in_features = in_features
- #self.out_features = out_features
w_shape = (in_features, out_features)
b_shape = (1, out_features)
self.bias = bias
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index 0860d9d..46a47b7 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -134,7 +134,7 @@ class Tensor(object):
'''
return self.data.transpose()
- def transpose(self,axes = None):
+ def transpose(self, axes=None):
'''
To transpose the tensor
'''
@@ -142,13 +142,13 @@ class Tensor(object):
if axes == None:
tshape = [self.shape[x] for x in range(len(t.shape))]
t.shape = tuple(tshape)
- t.data = self.data.Transpose()
+ t.data = singa.DefaultTranspose(self.data)
else:
if(len(axes) != len(self.shape)):
raise ValueError('dimensions do not match')
tshape = [self.shape[x] for x in axes]
t.shape = tuple(tshape)
- t.data = self.data.Transpose(list(axes))
+ t.data = singa.Transpose(self.data, list(axes))
return t
def size(self): # TODO(wangwei) compute size
@@ -166,17 +166,18 @@ class Tensor(object):
return self.data.MemSize()
def reshape(self, shape):
- '''Change the tensor shape.
+ '''Return a new tensor with the given shape, and the original
+ tensor is not changed.
Args:
- shape (list<int>): new shape, which should have the same volumn as
- the original shape.
+ shape (list<int>): new shape, which should have the same
+ volumn as the original shape.
'''
t = Tensor(self.shape, self.device, self.dtype)
assert product(self.shape) == product(shape), \
'product of shape should be equal'
t.shape = shape
- t.data = self.data.Reshape(list(shape))
+ t.data = singa.Reshape(self.data, shape)
return t
def reset_like(self, t):
@@ -283,38 +284,41 @@ class Tensor(object):
Return:
the tensor which has been repeated
-
+
'''
t = Tensor()
t_ndim = self.ndim()
if isinstance(repeats, int) or isinstance(repeats, long):
if repeats < 0:
- raise ValueError("'repeats' should not be negative: {}".format(repeats))
+ raise ValueError(
+ "'repeats' should not be negative: {}".format(repeats))
if axis != None and axis < 0:
axis += t_ndim
# broadcast = True
if axis == None:
axis = 9999
- t.shape = (product(self.shape)*repeats,)
- Repeats = [repeats,]
+ t.shape = (product(self.shape) * repeats,)
+ Repeats = [repeats, ]
t.data = self.data.Repeat(Repeats, axis)
elif axis >= 0:
t_shape = list(self.shape)
- t_shape[axis] = self.shape[axis]*repeats
+ t_shape[axis] = self.shape[axis] * repeats
t.shape = tuple(t_shape)
- Repeats = [repeats,]
+ Repeats = [repeats, ]
t.data = self.data.Repeat(Repeats, axis)
elif isinstance(repeats, tuple) or isinstance(repeats, list):
for rep in repeats:
if rep < 0:
- raise ValueError("'repeats' should be int or sequence: {}".format(repeats))
+ raise ValueError(
+ "'repeats' should be int or sequence: {}".format(repeats))
if axis != None and axis < 0:
axis += t_ndim
if axis == None:
axis = 9999
- raise ValueError("when axis us None, 'repeats' should be int: {}".format(repeats))
+ raise ValueError(
+ "when axis us None, 'repeats' should be int: {}".format(repeats))
elif axis >= 0:
t_shape = list(self.shape)
t_shape[axis] = sum(repeats)
@@ -323,16 +327,15 @@ class Tensor(object):
else:
raise ValueError('repeats should be int or sequence')
- return t
+ return t
def T(self):
- ''' shallow copy, negate the transpose field.
+ ''' shallow copy.
Returns:
- a new Tensor which shares the underlying data memory (shallow copy)
- but is marked as a transposed version of this tensor.
+ a new Tensor which shares the underlying data memory (shallow copy).
'''
- return _call_singa_func(self.data.T)
+ return _call_singa_func(singa.DefaultTranspose, self.data)
def copy(self):
'''shallow copy calls copy constructor of singa::Tensor
@@ -611,8 +614,9 @@ def sizeof(dtype):
return singa.SizeOf(dtype)
-def reshape(t, s):
- '''Reshape the input tensor with the given shape.
+def reshape(tensor, shape):
+ '''Reshape the input tensor with the given shape and
+ the original tensor is not changed
Args:
t (Tensor): the tensor to be changed
@@ -624,12 +628,8 @@ def reshape(t, s):
'''
return _call_singa_func(singa.Reshape, t.data, s)
-def Reshape(t,s):
-
- ret = t.reshape(s)
- return ret
-def transpose(t,axes = None):
+def transpose(t, axes=None):
'''
Returns:
the transposed tensor
@@ -796,6 +796,7 @@ def tanh(t):
'''
return _call_singa_func(singa.Tanh, t.data)
+
def sum(t, axis=None, out=None):
'''Sum of tensor elements over given axis
@@ -827,24 +828,24 @@ def sum(t, axis=None, out=None):
one.set_value(1.0)
ret = tensordot(t, one, t_ndim)
- if isinstance(axis,int):
+ if isinstance(axis, int):
if axis < 0:
axis += t_ndim
axis_shape = t_shape[axis]
axis_shape = int(axis_shape)
- one = Tensor(shape = (axis_shape, ), device = t.device)
+ one = Tensor(shape=(axis_shape, ), device=t.device)
one.set_value(1.0)
- ret = tensordot(t, one, axes=([axis],[0]))
+ ret = tensordot(t, one, axes=([axis], [0]))
- if isinstance(axis,tuple):
+ if isinstance(axis, tuple):
l_axis = list(axis)
axis_shape = [t_shape[x] for x in axis]
axisshape = tuple(axis_shape)
one = Tensor(axisshape, t.device)
one.set_value(1.0)
one_axis = [x for x in range(one.ndim())]
- ret = tensordot(t, one, (l_axis,one_axis))
+ ret = tensordot(t, one, (l_axis, one_axis))
if out is not None:
if out.shape != ret.shape:
@@ -1181,10 +1182,10 @@ def einsum(ops, *args):
if len(broadcast_a) == 0:
broadcast_a = [1]
if len(broadcast_b) == 0:
- broadcast_b = [1]
+ broadcast_b = [1]
mult_A = repeat(A, product(broadcast_a))
mult_A = mult_A.reshape(reshape_A)
- mult_A = transpose(mult_A,transpose_A)
+ mult_A = transpose(mult_A, transpose_A)
mult_B = repeat(B, product(broadcast_b))
mult_B = mult_B.reshape(reshape_B)
mult_B = transpose(mult_B, transpose_B)
@@ -1199,9 +1200,9 @@ def einsum(ops, *args):
res = transpose(res, transpose_res)
return res
-
-def repeat (t, repeats, axis = None):
+
+def repeat(t, repeats, axis=None):
'''Return the repeated tensor
Args:
t(tensor): the tensor to be repeated
@@ -1213,12 +1214,11 @@ def repeat (t, repeats, axis = None):
Return:
the tensor which has been repeated
'''
- ret = t.repeat(repeats,axis)
+ ret = t.repeat(repeats, axis)
return ret
-
-def tensordot (A,B,axes=2):
+def tensordot(A, B, axes=2):
"""Returns the tensor multiplication of two tensors along specified axes.
This is equivalent to compute dot product along the specified axes which
@@ -1244,30 +1244,33 @@ def tensordot (A,B,axes=2):
# when axes is an integer, axes_A and axes_B represent axes at the last of ''A'' and
# the first of ''B''. For example, when axes is 1, we do the normal multiplication :
# if A is in shape(3,2,4), B is in shape(4,2,5), it will return a matrix in shape(3,2,2,5)
- #when axes is 2 and A,B are shape (3,2,4) and (2,4,5), it will return a matrix in shape(3,5)
+ # when axes is 2 and A,B are shape (3,2,4) and (2,4,5), it will return a
+ # matrix in shape(3,5)
if type(axes) == int or type(axes) == long:
axes_A = list(range(-axes, 0))
axes_B = list(range(0, axes))
axes_B = axes_B
else:
- axes_A,axes_B =axes
+ axes_A, axes_B = axes
# when axes is a pair of sequences of integers.For example, A is in shape(3,2,4),
- #B is in shape(4,2,5), we set axes as ([1,2],[1,0]), it will return a matrix in shape(3,5)
- if isinstance(axes_A,list):
+ # B is in shape(4,2,5), we set axes as ([1,2],[1,0]), it will return a
+ # matrix in shape(3,5)
+ if isinstance(axes_A, list):
na = len(axes_A)
axes_A = list(axes_A)
else:
axes_A = [axes_A]
na = 1
- if isinstance(axes_B,list):
+ if isinstance(axes_B, list):
nb = len(axes_B)
axes_B = list(axes_B)
else:
axes_B = [axes_B]
nb = 1
- # a_shape and b_shape are the shape of tensor A and B, while nda and ndb are the dim of A and B
+ # a_shape and b_shape are the shape of tensor A and B, while nda and ndb
+ # are the dim of A and B
a_shape = A.shape
nda = A.ndim()
b_shape = B.shape
@@ -1277,7 +1280,7 @@ def tensordot (A,B,axes=2):
if na != nb:
equal = False
else:
- # to make the shape match
+ # to make the shape match
for k in range(na):
if a_shape[axes_A[k]] != b_shape[axes_B[k]]:
equal = False
@@ -1291,18 +1294,19 @@ def tensordot (A,B,axes=2):
'''start to do the calculation according to the axes'''
notin = [k for k in range(nda) if k not in axes_A]
- # nda is the dim of A, and axes_a is the axis for A, notin is the axis which is not in axes_A
+ # nda is the dim of A, and axes_a is the axis for A, notin is the axis
+ # which is not in axes_A
newaxes_a = notin + axes_A
N2 = 1
for axis in axes_A:
N2 *= a_shape[axis]
N1 = 1
for ax in notin:
- N1 *=a_shape[ax]
+ N1 *= a_shape[ax]
# newshape_a is the shape to do multiplication.For example, A is in shape(3,2,4),
- #B is in shape(4,2,5), we set axes as ([1,2],[1,0]), then newshape_a should be (3,5)
- #olda is the shape that will be shown in the result.
- newshape_a = (N1,N2)
+ # B is in shape(4,2,5), we set axes as ([1,2],[1,0]), then newshape_a should be (3,5)
+ # olda is the shape that will be shown in the result.
+ newshape_a = (N1, N2)
olda = [a_shape[axis] for axis in notin]
notin = [k for k in range(ndb) if k not in axes_B]
newaxes_b = axes_B + notin
@@ -1320,7 +1324,7 @@ def tensordot (A,B,axes=2):
at = Reshape(A, newshape_a)
bt = Reshape(B, newshape_b)
- res = mult(at,bt)
+ res = mult(at, bt)
if len(olda + oldb) == 0:
olda = [1]
oldb = [1]
@@ -1330,6 +1334,7 @@ def tensordot (A,B,axes=2):
return res
+
def div(lhs, rhs, ret=None):
'''Elementi-wise division.
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/src/api/core_tensor.i
----------------------------------------------------------------------
diff --git a/src/api/core_tensor.i b/src/api/core_tensor.i
index cc72d21..9427b11 100644
--- a/src/api/core_tensor.i
+++ b/src/api/core_tensor.i
@@ -101,12 +101,11 @@ namespace singa{
const std::vector<size_t> &shape() const;
const size_t shape(size_t idx) const;
bool transpose() const;
- size_t nDim() const;
- Tensor Transpose() const;
- Tensor Transpose(const std::vector<size_t> &axes) const;
+ size_t nDim() const;
+
size_t Size() const;
size_t MemSize() const;
- Tensor Reshape(const std::vector<size_t> &shape);
+
void ResetLike(const Tensor &t);
void AsType(DataType type);
void ToDevice(std::shared_ptr<singa::Device> dev);
@@ -122,10 +121,10 @@ namespace singa{
void CopyData(const Tensor &other);
void RepeatData(std::vector<size_t> repeats, int axis, int total_repeats, const Tensor &src);
+
Tensor Clone() const;
Tensor Repeat(std::vector<size_t> repeats, int axis);
- Tensor T() const;
-
+
#if USE_JAVA
%rename(iAdd) operator+=(const Tensor &t);
@@ -166,6 +165,10 @@ namespace singa{
Tensor *dst, const Tensor &src, const size_t num);
Tensor Reshape(const Tensor &in, const std::vector<size_t> &s);
+ Tensor Transpose(const Tensor &in, const std::vector<size_t> &axes);
+
+ %rename(DefaultTranspose) Transpose(const Tensor &in);
+ Tensor Transpose(const Tensor &in);
Tensor Abs(const Tensor &t);
Tensor Exp(const Tensor &t);
@@ -326,6 +329,6 @@ namespace singa{
Tensor SoftMax(const Tensor &in);
void SoftMax(const Tensor &in, Tensor *out);
- const Tensor CrossEntropyFwd(const Tensor& p, const Tensor& t);
- const Tensor SoftmaxCrossEntropyBwd(const Tensor& p, const Tensor& t);
+ Tensor CrossEntropyFwd(const Tensor& p, const Tensor& t);
+ Tensor SoftmaxCrossEntropyBwd(const Tensor& p, const Tensor& t);
}
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index e5e8017..1ac1b42 100755
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -21,6 +21,7 @@
#include "./tensor_math_cuda.h"
#include "./tensor_math_opencl.h"
#include <utility>
+#include <algorithm>
#define Noaxis 9999
@@ -45,13 +46,7 @@ Tensor::Tensor(const Shape &shape, DataType dtype)
block_ = device_->NewBlock((int)size);
generate_strides();
}
-Tensor::Tensor(Shape &&shape, DataType dtype)
- : data_type_(dtype), device_(defaultDevice), shape_(shape) {
- size_t size = Product(shape_) * SizeOf(data_type_);
- if (size)
- block_ = device_->NewBlock((int)size);
- generate_strides();
-}
+
//non-strided constructors with device
Tensor::Tensor(const Shape &shape, std::shared_ptr<Device> device,
@@ -62,56 +57,24 @@ Tensor::Tensor(const Shape &shape, std::shared_ptr<Device> device,
block_ = device_->NewBlock((int)size);
generate_strides();
}
-Tensor::Tensor(Shape &&shape, std::shared_ptr<Device> device, DataType dtype)
- : data_type_(dtype), device_(device), shape_(shape) {
- size_t size = Product(shape_) * SizeOf(data_type_);
- if (size)
- block_ = device_->NewBlock((int)size);
- generate_strides();
-}
-Tensor::Tensor(const Tensor &in)
- : //transpose_(in.transpose_),
- data_type_(in.data_type_),
- device_(in.device_),
- block_(in.block()),
- shape_(in.shape_),
- strides_(in.strides_) {
+Tensor::Tensor(const Tensor &in) : data_type_(in.data_type_),
+ device_(in.device_), block_(in.block()), shape_(in.shape_),
+ strides_(in.strides_) {
if (block_ != nullptr)
block_->IncRefCount();
}
-//strided constructor taking in a tensor, shape and strides
-Tensor::Tensor(const Tensor &in, Shape &new_shape, vector<int> &new_strides)
- : //transpose_(in.transpose_),
- data_type_(in.data_type_),
- device_(in.device_),
- block_(in.block()),
- shape_(new_shape),
- strides_(new_strides) {
- if (block_ != nullptr)
- block_->IncRefCount();
-}
-Tensor::Tensor(Tensor &&in)
- : //transpose_(in.transpose_),
- data_type_(in.data_type_),
- device_(in.device_),
- shape_(std::move(in.shape_)),
- strides_(in.strides_) {
+Tensor::Tensor(Tensor &&in) : data_type_(in.data_type_),
+ device_(in.device_), shape_(std::move(in.shape_)),
+ strides_(std::move(in.strides_)) {
block_ = in.block_;
in.block_ = nullptr;
}
-void Tensor::SetBlock(Block *block) {
- LOG(WARNING) << "Pls avoid using this function, which may have side-effect.";
- if (block_ != nullptr)
- if (block_->DecRefCount()) device_->FreeBlock(block_);
- block_ = block;
-}
-
void Tensor::ResetLike(const Tensor &in) {
if (block_ == nullptr || device_ != in.device_ || MemSize() != in.MemSize()) {
if (block_ != nullptr && block_->DecRefCount() == 0)
@@ -124,41 +87,16 @@ void Tensor::ResetLike(const Tensor &in) {
strides_ = in.strides_;
}
-// if tensor is not transposed yet i.e strides == 1,
-// then we simply change the shape and generate new default strides
-// if tensor is already transposed i.e strides != 1,
-// it should be copied to a new tensor with newly generated default strides
-// TODO(wangwei) raise error if the shape not match
-
-// void Tensor::Reshape(const Shape &shape) {
-// if (strides_.size() == 0)
-// strides_.push_back(1);
-
-// if (Product(shape_) != Product(shape)) {
-// if (block_ != nullptr && block_->DecRefCount() == 0)
-// device_->FreeBlock(block_);
-// block_ = device_->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
-// } else if (transpose()) {
-// LOG(FATAL) << "Reshape Error: Reshape called on tranposed tensor. Not implemented yet." ;
-// }
-// shape_ = shape;
-// generate_strides();
-// }
-
-// void Tensor::Reshape(Shape &&shape) {
-// if (strides_.size() == 0)
-// strides_.push_back(1);
-
-// if (Product(shape_) != Product(shape)) {
-// if (block_ != nullptr && block_->DecRefCount() == 0)
-// device_->FreeBlock(block_);
-// block_ = device_->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
-// } else if (transpose()) {
-// LOG(FATAL) << "Reshape Error: Reshape called on tranposed tensor. Not implemented yet." ;
-// }
-// shape_ = std::move(shape);
-// generate_strides();
-// }
+void Tensor::SetShape(const Shape& shape) {
+ if (Product(shape_) != Product(shape)) {
+ if (block_ != nullptr && block_->DecRefCount() == 0)
+ device_->FreeBlock(block_);
+ block_ = device_->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
+ }
+ shape_ = shape;
+ generate_strides();
+}
+
void Tensor::AsType(const DataType type) {
if (data_type_ != type) {
@@ -217,7 +155,8 @@ void Tensor::CopyData(const Tensor &src) {
}
}
-void Tensor::RepeatData(vector<size_t> repeats, int axis, int total_repeats, const Tensor &src) {
+void Tensor::RepeatData(const vector<size_t>& repeats, int axis, int total_repeats,
+ const Tensor &src) {
if (repeats.size() == 1) {
CHECK_EQ(Size(), src.Size()*total_repeats);
} else {
@@ -336,7 +275,8 @@ void Tensor::ToProto(singa::TensorProto *proto) const {
}
}
-Tensor Tensor::Repeat(vector<size_t> repeats, int axis, std::shared_ptr<Device> device) {
+Tensor Tensor::Repeat(const vector<size_t>& repeats, int axis,
+ std::shared_ptr<Device> device) {
if (device == nullptr) device = device_;
vector<size_t> tshape;
int total_repeats = 0;
@@ -346,7 +286,7 @@ Tensor Tensor::Repeat(vector<size_t> repeats, int axis, std::shared_ptr<Device>
} else {
if (repeats.size() == 1) {
total_repeats = repeats[0];
- for (size_t i = 0; i < shape_.size(); i++) {
+ for (int i = 0; i < static_cast<int>(shape_.size()); i++) {
if (i == axis) {
tshape.push_back(shape_[i] * total_repeats);
} else {
@@ -363,7 +303,7 @@ Tensor Tensor::Repeat(vector<size_t> repeats, int axis, std::shared_ptr<Device>
}
total_repeats += repeats[i];
}
- for (size_t i = 0; i < shape_.size(); i++) {
+ for (int i = 0; i < static_cast<int>(shape_.size()); i++) {
if (i == axis) {
tshape.push_back(total_repeats);
} else {
@@ -387,68 +327,53 @@ Tensor Tensor::Clone(std::shared_ptr<Device> device) const {
return t;
}
-Tensor Tensor::T() const {
+Tensor& Tensor::T() {
// this function only works for 2d tensors
CHECK_EQ(shape_.size(), 2u);
- Tensor t;
- t.device_ = device_;
- t.data_type_ = data_type_;
- t.shape_.push_back(shape_[1]);
- t.shape_.push_back(shape_[0]);
- t.strides_.clear();
- t.strides_.push_back(strides_[1]);
- t.strides_.push_back(strides_[0]);
- t.block_ = block_;
- block_->IncRefCount();
- return t;
+ Transpose();
+ return *this;
}
//normal transpose without axes
-Tensor Tensor::Transpose() const {
- // if(shape_.size() != strides_.size())
- // generate_strides();
-
- Tensor t;
- t.device_ = device_;
- t.data_type_ = data_type_;
- t.strides_.clear();
- for (size_t n = 0; n < shape_.size(); ++n) {
- t.shape_.push_back(shape_[shape_.size() - n - 1]);
- t.strides_.push_back(strides_[shape_.size() - n - 1]);
- }
- t.block_ = block_;
- block_->IncRefCount();
- return t;
+Tensor& Tensor::Transpose() {
+ std::reverse(shape_.begin(), shape_.end());
+ std::reverse(strides_.begin(), strides_.end());
+ return *this;
}
//transpose with axes
-// TODO(wangwei) the shape and axes should match
-Tensor Tensor::Transpose(const vector<size_t> &axes) const {
- // if(axes.size() != shape_.size()){
- // std::cout << "Warning: Size of input axes doesn't match size of shape" << std::endl;
- // return void();
- // }
- // if(shape_.size() != strides_.size())
- // generate_strides();
+Tensor& Tensor::Transpose(const vector<size_t> &axes) {
+ CHECK_EQ(axes.size(), shape_.size()) <<
+ "Tranpose axes's length should be equal to shape";
- Tensor t;
- t.device_ = device_;
- t.data_type_ = data_type_;
- t.strides_.clear();
+ auto shape = shape_;
+ auto strides = strides_;
+ shape_.clear();
+ strides_.clear();
for (size_t n = 0; n < axes.size(); ++n) {
- t.shape_.push_back(shape_[axes[n]]);
- t.strides_.push_back(strides_[axes[n]]);
+ shape_.push_back(shape[axes[n]]);
+ strides_.push_back(strides[axes[n]]);
}
- t.block_ = block_;
- block_->IncRefCount();
- return t;
+ return *this;
+}
+
+//normal transpose without axes
+Tensor Transpose(const Tensor& in) {
+ Tensor out(in);
+ out.Transpose();
+ return out;
+}
+
+//transpose with axes
+Tensor Transpose(const Tensor& in, const vector<size_t> &axes) {
+ Tensor out(in);
+ out.Transpose(axes);
+ return out;
}
Tensor &Tensor::operator=(const Tensor &in) {
- // LOG(ERROR) << "= const &";
if (block_ != nullptr && block_->DecRefCount() == 0)
device_->FreeBlock(block_);
- //transpose_ = in.transpose_;
strides_ = in.strides_;
data_type_ = in.data_type_;
shape_ = in.shape_;
@@ -460,11 +385,9 @@ Tensor &Tensor::operator=(const Tensor &in) {
}
Tensor &Tensor::operator=(Tensor &&in) {
- // LOG(ERROR) << "= &&";
if (block_ != nullptr && block_->DecRefCount() == 0)
device_->FreeBlock(block_);
- //transpose_ = in.transpose_;
- strides_ = std::move(in.strides_);
+ strides_ = std::move(in.strides_);
data_type_ = in.data_type_;
shape_ = std::move(in.shape_);
device_ = in.device_;
@@ -473,17 +396,6 @@ Tensor &Tensor::operator=(Tensor &&in) {
return *this;
}
-// Tensor Reshape(const Tensor &in, const Shape &s) {
-// // Tensor out(in);
-// // out.Reshape(s);
-// return out;
-// }
-
-// Tensor Reshape(const Tensor &in, Shape &&s) {
-// // Tensor out(in);
-// // out.Reshape(std::move(s));
-// return out;
-// }
#define GenUnaryTensorArgMemberFn(op, fn) \
Tensor &Tensor::op(const Tensor &in) { \
@@ -539,7 +451,7 @@ void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
}
}
-void RepeatDataToFrom(bool broadcast_flag, vector<size_t> repeats, int axis,
+void RepeatDataToFrom(bool broadcast_flag, const vector<size_t>& repeats, int axis,
Tensor *dst, const Tensor &src, const size_t num) {
if (repeats.size() == 1) {
broadcast_flag = true;
@@ -561,11 +473,11 @@ void RepeatDataToFrom(bool broadcast_flag, vector<size_t> repeats, int axis,
axis_shape = 1;
shape_outer = Product(src.shape());
} else {
- for (size_t i = 0; i < axis; i++) {
+ for (int i = 0; i < axis; i++) {
shape_outer *= src.shape()[i];
}
axis_shape = src.shape()[axis];
- for (size_t i = axis + 1; i < src.nDim(); i++) {
+ for (int i = axis + 1; i < static_cast<int>(src.nDim()); i++) {
chunk *= src.shape()[i];
}
}
@@ -912,7 +824,7 @@ template <typename SType>
void AddColumn(const SType alpha, const SType beta, const Tensor &v,
Tensor *M) {
if (M->transpose()) {
- Tensor X = M->T();
+ Tensor X = Transpose(*M);
AddRow(v, &X);
} else {
CHECK_EQ(M->nDim(), 2u);
@@ -935,7 +847,7 @@ void AddRow(const Tensor &v, Tensor *M) { AddRow(1, 1, v, M); }
template <typename SType>
void AddRow(const SType alpha, const SType beta, const Tensor &v, Tensor *M) {
if (M->transpose()) {
- Tensor X = M->T();
+ Tensor X = Transpose(*M);
AddColumn(v, &X);
} else {
CHECK_EQ(M->nDim(), 2u);
@@ -980,7 +892,7 @@ Tensor ConcatOn(const vector<Tensor> &in, int axis) {
tmp.push_back(Reshape(t, {t.shape(0), t.Size() / t.shape(0)}));
}
auto ret = ConcatenateRows(tmp);
- ret = ret.Reshape(out_shape);
+ ret.Reshape(out_shape);
return ret;
} else {
for (const auto& t : in) {
@@ -990,7 +902,7 @@ Tensor ConcatOn(const vector<Tensor> &in, int axis) {
tmp.push_back(Reshape(t, {nrow, t.Size() / nrow}));
}
auto ret = ConcatenateColumns(tmp);
- ret = ret.Reshape(out_shape);
+ ret.Reshape(out_shape);
return ret;
}
}
@@ -1059,7 +971,8 @@ Tensor CopyRows(const Tensor &in, const size_t start, const size_t end) {
}
-Tensor SliceOn(const Tensor&in, const size_t start, const size_t end, int axis) {
+Tensor SliceOn(const Tensor&in, const size_t start, const size_t end,
+ int axis) {
Shape out_shape = in.shape();
out_shape[axis] = end - start;
if (axis == 0) {
@@ -1074,7 +987,7 @@ Tensor SliceOn(const Tensor&in, const size_t start, const size_t end, int axis)
auto suffix = in.Size() / nrow / in.shape(axis);
auto ret = SliceColumns(Reshape(in, {nrow, in.Size() / nrow}),
start * suffix, end * suffix);
- ret = ret.Reshape(out_shape);
+ ret.Reshape(out_shape);
return ret;
}
}
@@ -1145,7 +1058,7 @@ void SubRow(const Tensor &v, Tensor *M) { AddRow(-1, 1, v, M); }
void SumColumns(const Tensor &M, Tensor *v) {
if (M.transpose()) {
- Tensor X = M.T();
+ Tensor X = Transpose(M);
SumRows(X, v);
} else {
CHECK_EQ(M.nDim(), 2u);
@@ -1160,7 +1073,7 @@ void SumColumns(const Tensor &M, Tensor *v) {
}
void SumRows(const Tensor &M, Tensor *v) {
if (M.transpose()) {
- Tensor X = M.T();
+ Tensor X = Transpose(M);
SumColumns(X, v);
} else {
CHECK_EQ(M.nDim(), 2u);
@@ -1170,7 +1083,7 @@ void SumRows(const Tensor &M, Tensor *v) {
Tensor one(Shape{nb_row}, M.device(), M.data_type());
one.SetValue(1.0f); // TODO(wangwei) cast type
- Tensor X = M.T();
+ Tensor X = Transpose(M);
Mult(X, one, v);
}
}
@@ -1268,13 +1181,13 @@ void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
// ************************
// Misc.
// ************************
-const Tensor CrossEntropyFwd(const Tensor& p, const Tensor& t) {
+Tensor CrossEntropyFwd(const Tensor& p, const Tensor& t) {
Tensor loss({p.shape(0)}, p.device(), p.data_type());
ComputeCrossEntropy(p, t, &loss);
return loss;
}
-const Tensor SoftmaxCrossEntropyBwd(const Tensor& p, const Tensor& t) {
+Tensor SoftmaxCrossEntropyBwd(const Tensor& p, const Tensor& t) {
auto g = p.Clone();
SoftmaxCrossEntropyBwd(t, &g);
return g;
@@ -1310,65 +1223,20 @@ void SoftmaxCrossEntropyBwd(const Tensor &t, Tensor *p) {
});
}
-Tensor Tensor::Reshape(const Shape &shape) {
- if (strides_.size() == 0)
- strides_.push_back(1);
- // TODO(wangwei) remove this condition and report error if size changes.
- if (Product(shape_) != Product(shape)) {
- if (block_ != nullptr && block_->DecRefCount() == 0)
- device_->FreeBlock(block_);
- block_ = device_->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
- shape_ = shape;
- generate_strides();
- return *this;
-
- } else if (transpose()) {
- Tensor t(shape_, device_, data_type_);
- t.block_ = t.device()->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
+// if tensor is not transposed yet, we change the shape and generate new strides
+// if tensor is already transposed, we reallocate the memory and generate strides
+Tensor& Tensor::Reshape(const Shape &shape) {
+ if (transpose()) {
+ Tensor t(shape, device_, data_type_);
singa::Transform(*this, &t);
- t.shape_ = shape;
- return t;
+ shape_ = shape;
+ std::swap(t.block_, block_);
} else {
- Tensor t;
- t.shape_ = shape;
- t.device_ = device_;
- t.data_type_ = data_type_;
- t.block_ = block_; // be careful about the block inference (mem leaking)
- t.block_->IncRefCount();
- t.generate_strides();
- return t;
- }
-}
-
-Tensor Tensor::Reshape(Shape &&shape) {
- if (strides_.size() == 0)
- strides_.push_back(1);
-
- if (Product(shape_) != Product(shape)) {
- if (block_ != nullptr && block_->DecRefCount() == 0)
- device_->FreeBlock(block_);
- block_ = device_->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
- shape_ = std::move(shape);
+ shape_ = shape;
generate_strides();
- return *this;
-
- } else if (transpose()) {
- Tensor t(shape_, device_, data_type_);
- t.block_ = t.device()->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
- singa::Transform(*this, &t);
- t.shape_ = shape;
- return t;
- } else {
- Tensor t;
- t.shape_ = shape;
- t.device_ = device_;
- t.data_type_ = data_type_;
- t.block_ = block_; // be careful about the block inference (mem leaking)
- t.block_->IncRefCount();
- t.generate_strides();
- return t;
}
+ return *this;
}
Tensor Reshape(const Tensor &in, const Shape &s) {
@@ -1376,9 +1244,4 @@ Tensor Reshape(const Tensor &in, const Shape &s) {
return out.Reshape(s);
}
-Tensor Reshape(const Tensor &in, Shape &&s) {
- Tensor out(in);
- return out.Reshape(std::move(s));
-}
-
} // namespace singa
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/src/core/tensor/tensor_math.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h
index f438fc6..f5fbc84 100644
--- a/src/core/tensor/tensor_math.h
+++ b/src/core/tensor/tensor_math.h
@@ -253,7 +253,7 @@ void Tanh(const Tensor &in, Tensor *out, Context *ctx) {
/// similar to cudnnTransformTensor
/// copies the data from one tensor to another tensor with a different layout
-/// the tensors must have the same dimensions but not necessarily the same strides
+/// the tensors must have the same dimensions but not necessarily the same strides
template <typename DType, typename Lang>
void Transform(const Tensor &in, Tensor *out, Context *ctx) {
LOG(FATAL) << "Transform Not Implemented";
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b30d7ea5/src/core/tensor/tensor_math_cuda.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cuda.h b/src/core/tensor/tensor_math_cuda.h
index 2a43468..dfe5724 100644
--- a/src/core/tensor/tensor_math_cuda.h
+++ b/src/core/tensor/tensor_math_cuda.h
@@ -54,34 +54,23 @@ cudnn requires tensor dimensions to fulfill 1 requirement:
Tensor B has shape (2,3,4), cudnn requires shape of {1,2,3,4} to be the input
*/
vector<int> generate_shape_cuda(const Tensor& x) {
- Shape shape_ = x.shape();
+ Shape shape = x.shape();
+ CHECK_LE(shape.size(), 5) << "Dimensions (shape) beyond 5 are currently not supported" ;
vector<int> shape_arr;
- if (shape_.size() <= 4) {
- for (size_t n = 0; n < 4 - shape_.size(); ++n) {
+ if (shape.size() <= 4) {
+ for (int n = 0; n < 4 - shape.size(); ++n) {
shape_arr.push_back(1);
}
- for (size_t n = 0; n < shape_.size(); ++n) {
- shape_arr.push_back(shape_.at(n));
- }
- return shape_arr;
- } else if (shape_.size() == 5) {
- for (size_t n = 0; n < shape_.size(); ++n) {
- shape_arr.push_back(shape_.at(n));
- }
- return shape_arr;
- } else {
- LOG(FATAL) << "Dimensions (shape) beyond 5 are currently not supported" ;
}
+ for(auto x: shape)
+ shape_arr.push_back(static_cast<int>(x));
return shape_arr;
}
int generate_dim_cuda(const Tensor& x) {
+ CHECK_LE(x.nDim(), 5) << "Dimensions (shape) beyond 5 are currently not supported" ;
if (x.shape().size() <= 4) {return 4;}
- else if (x.shape().size() == 5) {return 5;}
- else {
- LOG(FATAL) << "Dimensions (shape) beyond 5 are currently not supported" ;
- }
- return 0;
+ else {return 5;}
}
/*
@@ -94,29 +83,17 @@ int generate_dim_cuda(const Tensor& x) {
and stride {9, 9, 3, 1} or {9, 9, 1, 3} to be the inputs
*/
vector<int> generate_strides_cuda(const Tensor& x) {
- Shape shape_ = x.shape();
- vector<int> strides_ = x.strides();
+ Shape shape = x.shape();
+ auto& strides = x.strides();
vector<int> strides_arr;
- int product = 1;
- for (size_t n = 0; n < (shape_.size()); ++n) {
- product *= shape_[n];
- }
- if (shape_.size() <= 4) {
- for (size_t n = 0; n < 4 - shape_.size(); ++n) {
+ int product = Product(shape);
+ if (shape.size() <= 4) {
+ for (int n = 0; n < 4 - shape.size(); ++n) {
strides_arr.push_back(product);
}
- for (size_t n = 0; n < strides_.size(); ++n) {
- strides_arr.push_back(strides_[n]);
- }
- return strides_arr;
- } else if (shape_.size() == 5) {
- for (size_t n = 0; n < strides_.size(); ++n) {
- strides_arr.push_back(strides_[n]);
- }
- return strides_arr;
- } else {
- LOG(FATAL) << "Dimensions (strides) beyond 5 are currently not supported" ;
}
+ for(auto x : strides)
+ strides_arr.push_back(static_cast<int>(x));
return strides_arr;
}
@@ -241,6 +218,22 @@ void Sub<float, lang::Cuda>(const Tensor& in1,
}
}
+template <>
+void Transform<float, lang::Cuda>(const Tensor& in, Tensor* out,
+ Context* ctx) {
+ const float* inPtr = static_cast<const float*>(in.block()->data());
+ float* outPtr = static_cast<float*>(out->block()->mutable_data());
+
+ float alpha = 1.0;
+ float beta = 0.0;
+
+ check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
+ (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
+ (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
+ ));
+
+}
+
/// Element-wise operation, clamp every element into [low, high]
/// if x>high, then x=high; if x<low, then x=low.
template <>
@@ -254,14 +247,7 @@ void Clamp<float, lang::Cuda>(const float low,
if (in.strides() == out->strides()) {
cuda::clamp(num, low, high, inPtr, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::clamp(num, low, high, outPtr, outPtr, ctx->stream);
}
}
@@ -280,36 +266,18 @@ void Div<float, lang::Cuda>(const Tensor& in1,
if (!in1.transpose() && !in2.transpose() && (in1.strides() == in2.strides())) {
cuda::div(num, inPtr1, inPtr2, outPtr, ctx->stream);
} else { //else we check whether in1 or in2 or both are transposed
- float alpha = 1.0;
- float beta = 0.0;
-
if (in1.transpose() && in2.transpose()) {
Tensor t(in1.shape(), in1.device(), in1.data_type());
- float* tPtr = static_cast<float*>(t.block()->mutable_data());
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in1), inPtr1,
- (void*)(&beta), generate_tensor_nd_desc(t), tPtr
- ));
+ Transform<float, lang::Cuda>(in1, &t, ctx);
+ Transform<float, lang::Cuda>(in2, out, ctx);
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in2), inPtr2,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
+ float* tPtr = static_cast<float*>(t.block()->mutable_data());
cuda::div(num, tPtr, outPtr, outPtr, ctx->stream);
-
} else if (in1.transpose()) {
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in1), inPtr1,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
+ Transform<float, lang::Cuda>(in1, out, ctx);
cuda::div(num, outPtr, inPtr2, outPtr, ctx->stream);
-
} else if (in2.transpose()) {
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in2), inPtr2,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
+ Transform<float, lang::Cuda>(in2, out, ctx);
cuda::div(num, inPtr1, outPtr, outPtr, ctx->stream);
}
}
@@ -325,14 +293,7 @@ void Div<float, lang::Cuda>(const float x, const Tensor& in,
if (in.strides() == out->strides()) {
cuda::div(num, x, inPtr, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::div(num, x, outPtr, outPtr, ctx->stream);
}
}
@@ -366,36 +327,17 @@ void EltwiseMult<float, lang::Cuda>(const Tensor& in1,
if (!in1.transpose() && !in2.transpose() && (in1.strides() == in2.strides())) {
cuda::mult(num, inPtr1, inPtr2, outPtr, ctx->stream);
} else { //else we check whether in1 or in2 or both are transposed
- float alpha = 1.0;
- float beta = 0.0;
-
if (in1.transpose() && in2.transpose()) {
Tensor t(in1.shape(), in1.device(), in1.data_type());
+ Transform<float, lang::Cuda>(in1, &t, ctx);
+ Transform<float, lang::Cuda>(in2, out, ctx);
float* tPtr = static_cast<float*>(t.block()->mutable_data());
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in1), inPtr1,
- (void*)(&beta), generate_tensor_nd_desc(t), tPtr
- ));
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in2), inPtr2,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
cuda::mult(num, tPtr, outPtr, outPtr, ctx->stream);
-
} else if (in1.transpose()) {
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in1), inPtr1,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
+ Transform<float, lang::Cuda>(in1, out, ctx);
cuda::mult(num, outPtr, inPtr2, outPtr, ctx->stream);
-
} else if (in2.transpose()) {
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in2), inPtr2,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
+ Transform<float, lang::Cuda>(in2, out, ctx);
cuda::mult(num, inPtr1, outPtr, outPtr, ctx->stream);
}
}
@@ -413,14 +355,7 @@ void Exp<float, lang::Cuda>(const Tensor& in, Tensor* out,
if (in.strides() == out->strides()) {
cuda::exp(num, inPtr, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::exp(num, outPtr, outPtr, ctx->stream);
}
}
@@ -435,14 +370,7 @@ void GE<float, lang::Cuda>(const Tensor& in, const float x,
if (in.strides() == out->strides()) {
cuda::ge(num, inPtr, x, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::ge(num, outPtr, x, outPtr, ctx->stream);
}
}
@@ -451,10 +379,7 @@ void GE<float, lang::Cuda>(const Tensor& in1, const Tensor& in2,
Tensor* out, Context* ctx) {
Sub<float, lang::Cuda>(in1, in2, out, ctx);
float* outPtr = static_cast<float*>(out->block()->mutable_data());
- // const float* inPtr1 = static_cast<const float*>(in1.block()->data());
- // const float* inPtr2 = static_cast<const float*>(in2.block()->data());
const size_t num = in1.Size();
- //cuda::ge(num, inPtr1, inPtr2, outPtr, ctx->stream);
cuda::ge(num, outPtr, 0.0, outPtr, ctx->stream);
}
@@ -469,14 +394,7 @@ void GT<float, lang::Cuda>(const Tensor& in, const float x,
if (in.strides() == out->strides()) {
cuda::gt(num, inPtr, x, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::gt(num, outPtr, x, outPtr, ctx->stream);
}
}
@@ -485,10 +403,7 @@ void GT<float, lang::Cuda>(const Tensor& in1, const Tensor& in2,
Tensor* out, Context* ctx) {
Sub<float, lang::Cuda>(in1, in2, out, ctx);
float* outPtr = static_cast<float*>(out->block()->mutable_data());
- // const float* inPtr1 = static_cast<const float*>(in1.block()->data());
- // const float* inPtr2 = static_cast<const float*>(in2.block()->data());
const size_t num = in1.Size();
- //cuda::gt(num, inPtr1, inPtr2, outPtr, ctx->stream);
cuda::gt(num, outPtr, 0.0, outPtr, ctx->stream);
}
@@ -502,14 +417,7 @@ void LE<float, lang::Cuda>(const Tensor& in, const float x,
if (in.strides() == out->strides()) {
cuda::le(num, inPtr, x, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::le(num, outPtr, x, outPtr, ctx->stream);
}
}
@@ -518,10 +426,7 @@ void LE<float, lang::Cuda>(const Tensor& in1, const Tensor& in2,
Tensor* out, Context* ctx) {
Sub<float, lang::Cuda>(in1, in2, out, ctx);
float* outPtr = static_cast<float*>(out->block()->mutable_data());
- // const float* inPtr1 = static_cast<const float*>(in1.block()->data());
- // const float* inPtr2 = static_cast<const float*>(in2.block()->data());
const size_t num = in1.Size();
- //cuda::le(num, inPtr1, inPtr2, outPtr, ctx->stream);
cuda::le(num, outPtr, 0.0, outPtr, ctx->stream);
}
@@ -536,14 +441,7 @@ void Log<float, lang::Cuda>(const Tensor& in, Tensor* out,
if (in.strides() == out->strides()) {
cuda::log(num, inPtr, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::log(num, outPtr, outPtr, ctx->stream);
}
}
@@ -558,14 +456,7 @@ void LT<float, lang::Cuda>(const Tensor& in, const float x,
if (in.strides() == out->strides()) {
cuda::lt(num, inPtr, x, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::lt(num, outPtr, x, outPtr, ctx->stream);
}
}
@@ -574,10 +465,7 @@ void LT<float, lang::Cuda>(const Tensor& in1, const Tensor& in2,
Tensor* out, Context* ctx) {
Sub<float, lang::Cuda>(in1, in2, out, ctx);
float* outPtr = static_cast<float*>(out->block()->mutable_data());
- // const float* inPtr1 = static_cast<const float*>(in1.block()->data());
- // const float* inPtr2 = static_cast<const float*>(in2.block()->data());
const size_t num = in1.Size();
- //cuda::lt(num, inPtr1, inPtr2, outPtr, ctx->stream);
cuda::lt(num, outPtr, 0.0, outPtr, ctx->stream);
}
@@ -592,14 +480,7 @@ void Pow<float, lang::Cuda>(const Tensor& in, const float x,
if (in.strides() == out->strides()) {
cuda::pow(num, inPtr, x, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::pow(num, outPtr, x, outPtr, ctx->stream);
}
}
@@ -617,36 +498,17 @@ void Pow<float, lang::Cuda>(const Tensor& in1,
if (!in1.transpose() && !in2.transpose() && (in1.strides() == in2.strides())) {
cuda::pow(num, inPtr1, inPtr2, outPtr, ctx->stream);
} else { //else we check whether in1 or in2 or both are transposed
- float alpha = 1.0;
- float beta = 0.0;
-
if (in1.transpose() && in2.transpose()) {
Tensor t(in1.shape(), in1.device(), in1.data_type());
float* tPtr = static_cast<float*>(t.block()->mutable_data());
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in1), inPtr1,
- (void*)(&beta), generate_tensor_nd_desc(t), tPtr
- ));
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in2), inPtr2,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
+ Transform<float, lang::Cuda>(in1, &t, ctx);
+ Transform<float, lang::Cuda>(in2, out, ctx);
cuda::pow(num, tPtr, outPtr, outPtr, ctx->stream);
-
} else if (in1.transpose()) {
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in1), inPtr1,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
+ Transform<float, lang::Cuda>(in1, out, ctx);
cuda::pow(num, outPtr, inPtr2, outPtr, ctx->stream);
-
} else if (in2.transpose()) {
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in2), inPtr2,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
+ Transform<float, lang::Cuda>(in2, out, ctx);
cuda::pow(num, inPtr1, outPtr, outPtr, ctx->stream);
}
}
@@ -694,14 +556,7 @@ void ReLU<float, lang::Cuda>(const Tensor& in, Tensor* out,
if (in.strides() == out->strides()) {
cuda::relu(num, inPtr, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::relu(num, outPtr, outPtr, ctx->stream);
}
}
@@ -749,14 +604,7 @@ void Sigmoid<float, lang::Cuda>(const Tensor& in, Tensor* out,
if (in.strides() == out->strides()) {
cuda::sigmoid(num, inPtr, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::sigmoid(num, outPtr, outPtr, ctx->stream);
}
}
@@ -772,14 +620,7 @@ void Sign<float, lang::Cuda>(const Tensor& in, Tensor* out,
if (in.strides() == out->strides()) {
cuda::sign(num, inPtr, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::sign(num, outPtr, outPtr, ctx->stream);
}
}
@@ -788,15 +629,14 @@ void Sign<float, lang::Cuda>(const Tensor& in, Tensor* out,
template <>
void Sqrt<float, lang::Cuda>(const Tensor& in, Tensor* out,
Context* ctx) {
- const float* inPtr = static_cast<const float*>(in.block()->data());
float* outPtr = static_cast<float*>(out->block()->mutable_data());
#if CUDNN_MAJOR < 7
+ Transform<float, lang::Cuda>(in, out, ctx);
size_t num = in.Size();
- cuda::sqrt(num, inPtr, outPtr, ctx->stream);
-
+ cuda::sqrt(num, outPtr, outPtr, ctx->stream);
#else
-
+ const float* inPtr = static_cast<const float*>(in.block()->data());
float alpha1 = 1.0;
float alpha2 = 0.0;
float beta = 0.0;
@@ -820,14 +660,7 @@ void Square<float, lang::Cuda>(const Tensor& in, Tensor* out,
if (in.strides() == out->strides()) {
cuda::square(num, inPtr, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::square(num, outPtr, outPtr, ctx->stream);
}
}
@@ -883,34 +716,11 @@ void Tanh<float, lang::Cuda>(const Tensor& in, Tensor* out,
if (in.strides() == out->strides()) {
cuda::tanh(num, inPtr, outPtr, ctx->stream);
} else { //else we transform in to out to store first
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
+ Transform<float, lang::Cuda>(in, out, ctx);
cuda::tanh(num, outPtr, outPtr, ctx->stream);
}
}
-template <>
-void Transform<float, lang::Cuda>(const Tensor& in, Tensor* out,
- Context* ctx) {
- const float* inPtr = static_cast<const float*>(in.block()->data());
- float* outPtr = static_cast<float*>(out->block()->mutable_data());
-
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(*out), outPtr
- ));
-
-}
-
// ================Random functions===========================================
/// Each element of out would be 1 with prob p and 0 with 1-p. 0<= p <= 1
// Get the random generator from 'ctx'
@@ -1175,16 +985,7 @@ void RowMax<float, lang::Cuda>(const Tensor& in, Tensor* out,
if (in.transpose()) {
Tensor t(in.shape(), in.device(), in.data_type());
- float* tPtr = static_cast<float*>(t.block()->mutable_data());
-
- float alpha = 1.0;
- float beta = 0.0;
-
- check_cudnn(cudnnTransformTensor(ctx->cudnn_handle,
- (void*)(&alpha), generate_tensor_nd_desc(in), inPtr,
- (void*)(&beta), generate_tensor_nd_desc(t), tPtr
- ));
-
+ Transform<float, lang::Cuda>(in, &t, ctx);
const float* tPtr_const = static_cast<const float*>(t.block()->data());
cuda::RowMax(nrow, ncol, tPtr_const, outPtr, ctx->stream);
} else {