You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@singa.apache.org by wa...@apache.org on 2018/07/01 13:10:30 UTC

[1/7] incubator-singa git commit: SINGA-362 Add functions to support einsum function

Repository: incubator-singa
Updated Branches:
  refs/heads/master e248e447b -> 7a19e63db


SINGA-362 Add functions to support einsum function


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/16c61112
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/16c61112
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/16c61112

Branch: refs/heads/master
Commit: 16c61112948099fe4279ca11651f88456cbc690d
Parents: c67c3b6
Author: sheyujian <sh...@me.com>
Authored: Mon May 21 15:44:15 2018 +0800
Committer: sheyujian <sh...@me.com>
Committed: Mon May 21 21:19:56 2018 +0800

----------------------------------------------------------------------
 include/singa/core/device.h |   5 ++
 include/singa/core/tensor.h |   8 ++
 python/singa/tensor.py      | 188 ++++++++++++++++++++++++++++++++++++++-
 src/core/device/device.cc   |  23 +++++
 src/core/tensor/tensor.cc   |  89 ++++++++++++++++++
 5 files changed, 312 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index 1a960d8..24569f4 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -75,6 +75,11 @@ class Device {
   virtual void CopyDataToFrom(Block* dst, Block* src, size_t nBytes,
                       CopyDirection direction, int dst_offset, int src_offset);
 
+  virtual void RepeatDataToFrom(Block* dst, Block* src, size_t nBytes,
+                                CopyDirection direct, bool broadcast_flag, 
+                                int axis_shape, int shape_outer, int chunk, 
+                                vector<int> repeats, int dst_offset, int src_offset);
+
   void CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes,
                            size_t dst_offset = 0);
   /// Submit the operation to the device, which may execute it right now or

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index 3cc28ff..c7958ff 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -165,6 +165,8 @@ class Tensor {
   /// Meta data would not be copied!
   void CopyData(const Tensor &other);
 
+  void RepeatData(vector<int> repeats, int axis, int total_repeats, const Tensor &other);
+
   /// Deserialize data, shape and transpose from protobuf object.
   void FromProto(const singa::TensorProto &proto);
 
@@ -175,6 +177,8 @@ class Tensor {
   /// device. If 'device' is nullptr, then clone it one the current device.
   Tensor Clone(std::shared_ptr<Device> device = nullptr) const;
 
+  Tensor Repeat(vector<int> repeats, int axis, std::shared_ptr<Device> device = nullptr) ;
+
   // Tensor operations
 
   /// Matrix transpose.  Valid only if shape.size() == 2.
@@ -287,6 +291,10 @@ Tensor Reshape(const Tensor &in, Shape &&s);
 void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
                     const size_t dst_offset = 0, const size_t src_offset = 0);
 
+void RepeatDataToFrom(bool broadcast_flag,  vector<int> repeats, int axis, 
+                      Tensor *dst, const Tensor &in, const size_t num, 
+                      const size_t dst_offset = 0, const size_t src_offset = 0);
+
 // =============Element-wise operations====================================
 Tensor Abs(const Tensor &in);
 Tensor Exp(const Tensor &in);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index 8f36775..ff7206c 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -71,6 +71,8 @@ float32 = core_pb2.kFloat32
 CTensor = singa.Tensor
 
 
+
+
 class Tensor(object):
     '''Python Tensor, which wraps a swig converted Tensor from CPP Tensor.
 
@@ -1073,9 +1075,193 @@ def einsum(ops, *args):
     transpose_res = [sorted(list(outputops)).index(x) for x in list(outputops)]
     res_ = res_.transpose(transpose_res)
     res = from_numpy(res_)
-
     return res
 
+def sum2(t, axis=None, out=None):
+    '''Sum of tensor elements over given axis
+
+    Args:
+        t: Singa.tensor
+            The array_like tensor to be sumed
+        axis: None or int or tuple of ints, optional
+            Axis or axes along which a sum is performed.
+            The default, axis=None, will sum all of the elements of the input array.
+            If axis is negative it counts from the last to the first axis.
+            If axis is a tuple of ints, a sum is performed on all of the axes specified
+            in the tuple instead of a single axis or all the axes as before.
+        out:Singa.tensor optional
+            Alternative output array in which to place the result.
+            It must have the same shape as the expected output,
+            but the type of the output values will be cast if necessary.
+
+    Return: sum_along_axis: tensor
+        A tensor with the same shape as t, with the specified axis removed.
+        If a is a 0-d array, or if axis is None, a scalar is returned.
+        If an output array is specified, a reference to out is returned
+    '''
+
+    t_shape = t.shape
+    t_ndim = t.ndim()
+
+    if axis is None:
+        one = Tensor(t.shape, t.device, t.dtype)
+        one.set_value(1.0)
+        ret = tensordot(t, one, t_ndim)
+
+    if isinstance(axis,int):
+        if axis < 0:
+            axis += 2
+
+        axis_shape = t_shape[axis]
+        one = Tensor(axis_shape, t.device, t.dtype)
+        one.set_value(1.0)
+        ret = tensordot(t, one, axes=([axis],[0]))
+
+    if isinstance(axis,tuple):
+        l_axis = list(axis)
+        axis_shape = [t_shape[x] for x in axis]
+        one = Tensor(axis_shape, t.device, t.dtype)
+        one.set_value(1.0)
+        one_axis = [x for x in range(one.ndim())]
+        ret = tensordot(t, one, (l_axis,one_axis))
+
+    if out is not None:
+        if out.shape != ret.shape:
+            raise ValueError('dimensions do not match')
+        out[:] = ret
+        return out
+    else:
+        return ret
+
+def repeat(t, repeats, axis = None):
+    if isinstance(repeats, int):
+        if repeats < 0:
+            raise ValueError("'repeats' should not be negative: {}".format(repeats))
+        # broadcast = True
+        if axis == None:
+            axis = 9999
+        if axis < 0:
+            axis += 2
+        ret = singa.Repeat(t, list(repeats), axis)
+    elif isinstance(repeats, tuple) or isinstance(repeats, list):
+        for rep in repeats:
+            if rep < 0:
+                raise ValueError("'repeats' should be int or sequence: {}".format(repeats))
+        if axis == None:
+            axis = 9999
+        if axis < 0:
+            axis += 2
+        ret = singa.Repeat(t, list(repeats), axis)
+        t_shape = t.shape
+        t_shape[axis] = sum(repeats)
+        ret = ret.reshape(t_shape)
+    else:
+        raise ValueError('repeats should be int or sequence')
+    return ret
+
+def tensordot (A,B,axes=2):
+
+    """Returns the tensor multiplication of two tensors along specified axes.
+
+    This is equivalent to compute dot product along the specified axes which
+    are treated as one axis by reshaping.
+
+    Args:
+        A: Singa.Tensor
+        B: Singa.Tensor
+        axes:
+            - If it is an integer, then ''axes'' represent axes at the last of ''a`'' and
+              the first of ''b'' are used.
+            - If it is a pair of sequences of integers, then these two
+              sequences specify the list of axes for ''a'' and ''b''. The
+              corresponding axes are paired for sum-product.
+
+    Return:
+        singa.tensor: The tensor  product of ''A'' and ''B'' along the
+        axes specified by ''axes''.
+
+    Thanks to numpy.tensordot.
+    the link is https://github.com/numpy/numpy/blob/v1.14.0/numpy/core/numeric.py#L1123-L1306
+    """
+    # when axes is an integer, axes_A and axes_B represent axes at the last of ''A'' and
+    # the first of ''B''. For example, when axes is 1, we do the normal multiplication :
+    # if A is in shape(3,2,4), B is in shape(4,2,5), it will return a matrix in shape(3,2,2,5)
+    #when axes is 2 and A,B are shape (3,2,4) and (2,4,5), it will return a matrix in shape(3,5)
+
+    if type(axes) == int:
+        axes_A = list(range(-axes, 0))
+        axes_B = list(range(0, axes))
+        axes_B = axes_B
+    else:
+        axes_A,axes_B =axes
+    # when axes is a pair of sequences of integers.For example, A is in shape(3,2,4),
+    #B is in shape(4,2,5), we set axes as ([1,2],[1,0]), it will return a matrix in shape(3,5)
+    if isinstance(axes_A,list):
+        na = len(axes_A)
+        axes_A = list(axes_A)
+    else:
+        axes_A = [axes_A]
+        na = 1
+    if isinstance(axes_B,list):
+        nb = len(axes_B)
+        axes_B = list(axes_B)
+    else:
+        axes_B = [axes_B]
+        nb = 1
+
+    # a_shape and b_shape are the shape of tensor A and B, while nda and ndb are the dim of A and B
+    a_shape = A.shape
+    nda = A.ndim()
+    b_shape = B.shape
+    ndb = B.ndim()
+    equal = True
+    # to check if the length of axe_A is equal to axes_B
+    if na != nb:
+        equal = False
+    else:
+    # to make the shape match
+        for k in range(na):
+            if a_shape[axes_A[k]] != b_shape[axes_B[k]]:
+                equal = False
+                break
+            if axes_A[k] < 0:
+                axes_A[k] += nda
+            if axes_B[k] < 0:
+                axes_B[k] += ndb
+    if not equal:
+        raise ValueError("shape-mismatch for sum")
+    '''start to do the calculation according to the axes'''
+
+    notin = [k for k in range(nda) if k not in axes_A]
+    # nda is the dim of A, and axes_a is the axis for A, notin is the axis which is not in axes_A
+    newaxes_a = notin + axes_A
+    N2 = 1
+    for axis in axes_A:
+        N2 *= a_shape[axis]
+    N1 = 1
+    for ax in notin:
+        N1 *=a_shape[ax]
+    # newshape_a is the shape to do multiplication.For example, A is in shape(3,2,4),
+    #B is in shape(4,2,5), we set axes as ([1,2],[1,0]), then newshape_a should be (3,5)
+    #olda is the shape that will be shown in the result.
+    newshape_a = (N1,N2)
+    olda = [a_shape[axis] for axis in notin]
+    notin = [k for k in range(ndb) if k not in axes_B]
+    newaxes_b = axes_B + notin
+    N2 = 1
+    for axis in axes_B:
+        N2 *= b_shape[axis]
+    N1 = 1
+    for bx in notin:
+        N1 *= b_shape[bx]
+    newshape_b = (N2, N1)
+    oldb = [b_shape[axis] for axis in notin]
+    # do transpose and reshape to get the 2D matrix to do multiplication
+    at = A.transpose(newaxes_a).reshape(newshape_a)
+    bt = B.transpose(newaxes_b).reshape(newshape_b)
+    res = mult(at, bt)
+    #reshape the result
+    return res.reshape(olda + oldb)
 
 def div(lhs, rhs, ret=None):
     '''Elementi-wise division.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/src/core/device/device.cc
----------------------------------------------------------------------
diff --git a/src/core/device/device.cc b/src/core/device/device.cc
index cda1b9f..d569015 100644
--- a/src/core/device/device.cc
+++ b/src/core/device/device.cc
@@ -64,6 +64,29 @@ void Device::CopyDataToFrom(Block* dst, Block* src, size_t nBytes,
       {src}, {dst});
 }
 
+void Device::RepeatDataToFrom(Block* dst, Block* src, size_t nBytes,
+                              CopyDirection direct, bool broadcast_flag, 
+                              int axis_shape, int shape_outer, int chunk, 
+                              vector<int> repeats, int dst_offset, int src_offset) {
+  const char *src_data = reinterpret_cast<const char*>(src->data()) + dst_offset;
+  char *dst_data = reinterpret_cast<char*>(dst->mutable_data()) + src_offset;
+
+  for (int i = 0; i < shape_outer; i++) {
+    for (int j = 0; j < axis_shape; j++) {
+      int temp = broadcast_flag ? repeats[0] : repeats[j];
+      for (int k = 0; k < temp; k++) {
+        this->Exec(
+            [this, dst_data, src_data, direct, chunk, repeats](Context* ctx) {
+              this->CopyToFrom(dst_data, src_data, chunk, direct, ctx);
+            },
+            {src}, {dst});
+        dst_data += chunk;
+      }
+      src_data += chunk;
+    }
+  }
+}
+
 void Device::CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes,
                                  size_t dst_offset) {
   auto direct = lang_ == kCpp ? kHostToHost : kHostToDevice;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/16c61112/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index de0d7d2..22541df 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -22,6 +22,8 @@
 #include "./tensor_math_opencl.h"
 #include <utility>
 
+#define Noaxis 9999
+
 namespace singa {
 
 Tensor::~Tensor() {
@@ -214,6 +216,20 @@ void Tensor::CopyData(const Tensor &src) {
   }
 }
 
+void Tensor::RepeatData(vector<int> repeats, int axis, int total_repeats, const Tensor &src) {
+  if(axis == Noaxis) {
+    CHECK_EQ(Size(), src.Size()*total_repeats);
+  } else {
+    CHECK_EQ(Size(), src.Size()*total_repeats/src.shape()[axis]);
+  }
+
+  CHECK(block_ != nullptr);
+  // Do repeat only if the src's block is already initialized.
+  if (src.block_ != nullptr) {
+    singa::RepeatDataToFrom(false, repeats, axis, this, src, Size(), 0, 0);
+  }
+}
+
 void Tensor::FromProto(const singa::TensorProto &proto) {
   if (block_ != nullptr && block_->DecRefCount() == 0)
     device_->FreeBlock(block_);
@@ -329,6 +345,29 @@ Tensor Tensor::Clone(std::shared_ptr<Device> device) const {
   return t;
 }
 
+Tensor Tensor::Repeat(vector<int> repeats, int axis, std::shared_ptr<Device> device) {
+  if (device == nullptr) device = device_;
+  Tensor t;
+  int total_repeats = 0;
+  if (axis == Noaxis) {
+    total_repeats = repeats[0];
+    t.shape_.push_back(Product(shape_)*total_repeats);
+  } else {
+    for (size_t i = 0; i < shape_[axis]; i++) {
+      if(repeats[i] < 0) {
+        LOG(FATAL) << "the repeats number is less than zero";
+      }
+      total_repeats += repeats[i];
+      t.shape_.push_back(Product(shape_)/shape_[axis]*total_repeats);
+    }
+  }
+  t.device_ = device_;
+  t.data_type_ = data_type_;
+  t.strides_.push_back(1);
+  t.RepeatData(repeats, axis, total_repeats, *this);
+  return t;
+}
+
 //yisen todo
 Tensor Tensor::T() const {
   // this function only works for 2d tensors
@@ -482,6 +521,56 @@ void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
     src_dev->CopyDataToFrom(to, from, nBytes, direct, (int)d_offset, (int)s_offset);
   }
 }
+
+void RepeatDataToFrom(bool broadcast_flag, vector<int> repeats, int axis, 
+                      Tensor *dst, const Tensor &src, const size_t num, 
+                      const size_t dst_offset, const size_t src_offset) {
+  if (repeats.size() == 1) {
+    broadcast_flag = true;
+  }
+  if (repeats.size() > 1) {
+    if (axis == Noaxis) {
+      LOG(FATAL) << "When repeats parameter is sequence, axis cannot be None";
+    }
+  }
+  for (size_t i = 0; i < repeats.size(); i++){
+    CHECK_GE(repeats[i], 0);
+  }
+  auto width = SizeOf(src.data_type());
+  CHECK_EQ(width, SizeOf(dst->data_type()));
+  size_t nBytes = num * width;
+  auto d_offset = dst_offset * width;
+  auto s_offset = src_offset * width;
+  int chunk = width;
+  int axis_shape = 1;
+  if (axis == Noaxis){
+    axis_shape = 1;
+  } else {
+    axis_shape = src.shape()[axis];
+    for(size_t i = axis + 1; i < src.nDim(); i++) {
+      chunk *= src.shape()[i];
+    }
+  }
+  int shape_outer = Product(src.shape());
+  std::shared_ptr<Device> src_dev = src.device(), dst_dev = dst->device();
+  Block *from = src.block(), *to = dst->block();
+  if (dst_dev->lang() != src_dev->lang()) {
+    // let the none cpp device conduct copy op
+    if (dst_dev->lang() == kCpp) {
+      src_dev->RepeatDataToFrom(to, from, nBytes, kDeviceToHost, broadcast_flag, axis_shape, 
+                                shape_outer, chunk, repeats, (int)d_offset, (int)s_offset);
+    } else if (src_dev->lang() == kCpp) {
+      dst_dev->RepeatDataToFrom(to, from, nBytes, kHostToDevice, broadcast_flag, axis_shape, 
+                                shape_outer, chunk, repeats, (int)d_offset, (int)s_offset);
+    } else {
+      LOG(FATAL) << "Not support mem repeat copy betwee Cuda and OpenCL device";
+    }
+  } else {
+    auto direct = src_dev->lang() == kCpp ? kHostToHost : kDeviceToDevice;
+    src_dev->RepeatDataToFrom(to, from, nBytes, direct, broadcast_flag, axis_shape, 
+                              shape_outer, chunk, repeats, (int)d_offset, (int)s_offset);
+  }
+}
 //============================================================================
 /// typedef DType accroding to type value.
 /// DType would be used in the code block __VA_ARGS__.


[6/7] incubator-singa git commit: SINGA-362 Add functions to support einsum function delete the repetitive reshape and transform, which are the same as yisen

Posted by wa...@apache.org.
SINGA-362 Add functions to support einsum function
delete the repetitive reshape and transform, which are the same as yisen


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/10f3aa1d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/10f3aa1d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/10f3aa1d

Branch: refs/heads/master
Commit: 10f3aa1d7e41c9f89ee3a7ef90644b492fbff543
Parents: 4940fef
Author: sheyujian <sh...@me.com>
Authored: Sun Jul 1 12:11:15 2018 +0800
Committer: sheyujian <sh...@me.com>
Committed: Sun Jul 1 12:49:00 2018 +0800

----------------------------------------------------------------------
 include/singa/core/tensor.h       |  10 +--
 src/core/tensor/tensor.cc         | 131 ++++++++++++++-------------------
 src/core/tensor/tensor_math.h     |   1 +
 src/core/tensor/tensor_math_cpp.h |  14 ----
 4 files changed, 57 insertions(+), 99 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/10f3aa1d/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index d9bb069..dca19b0 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -133,8 +133,8 @@ class Tensor {
   size_t MemSize() const { return block_->size(); }
 
   /// Reset the tensor shape, it may reallocate block, if MemSize() changes.
-  // void Reshape(const Shape &shape);
-  // void Reshape(Shape &&shape);
+  Tensor Reshape(const Shape &shape);
+  Tensor Reshape(Shape &&shape);
 
   /// Reset the shape, device, and data type as given tensor.
   /// If block size changes, then reallocate a new block.
@@ -191,10 +191,6 @@ class Tensor {
   /// Change the axes
   Tensor Transpose(const vector<size_t> &axes) const;
 
-  Tensor Reshape(const Shape &shape);
-
-  Tensor Reshape(Shape &&shape);
-
   /// Copy the meta info with data block shared.
   Tensor &operator=(const Tensor &in);
 
@@ -309,7 +305,6 @@ Tensor Sign(const Tensor &in);
 Tensor Sqrt(const Tensor &in);
 Tensor Square(const Tensor &in);
 Tensor Tanh(const Tensor &in);
-Tensor Transform(const Tensor &in);
 
 void Abs(const Tensor &in, Tensor *out);
 void Exp(const Tensor &in, Tensor *out);
@@ -320,7 +315,6 @@ void Sign(const Tensor &in, Tensor *out);
 void Sqrt(const Tensor &in, Tensor *out);
 void Square(const Tensor &in, Tensor *out);
 void Tanh(const Tensor &in, Tensor *out);
-void Transform(const Tensor &in, Tensor *out);
 
 /// Element-wise opeartion, out[i]=in[i]^x
 template <typename SType>

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/10f3aa1d/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index 3bf0a77..39ab12d 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -124,61 +124,41 @@ void Tensor::ResetLike(const Tensor &in) {
   strides_ = in.strides_;
 }
 
-Tensor Tensor::Reshape(const Shape &shape) {
-  if (strides_.size() == 0)
-    strides_.push_back(1);
-
-  if (Product(shape_) != Product(shape)) {
-    if (block_ != nullptr && block_->DecRefCount() == 0)
-      device_->FreeBlock(block_);
-    block_ = device_->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
-    shape_ = shape;
-    generate_strides();
-    return *this;
-
-  } else if (transpose()) {
-    Tensor t(shape_, device_, data_type_);
-    t.block_ = t.device()->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
-    singa::Transform(*this, &t);
-    t.shape_ = shape;
-    return t;
- }
-
-  shape_ = shape;
-  generate_strides();
-  Tensor t(shape, device_, data_type_);
-  t.block_ = block_;
-  t.block_->IncRefCount();
-  return t;
-}
-
-Tensor Tensor::Reshape(Shape &&shape) {
-  if (strides_.size() == 0)
-    strides_.push_back(1);
-
-  if (Product(shape_) != Product(shape)) {
-    if (block_ != nullptr && block_->DecRefCount() == 0)
-      device_->FreeBlock(block_);
-    block_ = device_->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
-    shape_ = std::move(shape);
-    generate_strides();
-    return *this;
-
-  } else if (transpose()) {
-    Tensor t(shape_, device_, data_type_);
-    t.block_ = t.device()->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
-    singa::Transform(*this, &t);
-    t.shape_ = shape;
-    return t;
- }
-
-  shape_ = shape;
-  generate_strides();
-  Tensor t(shape, device_, data_type_);
-  t.block_ = block_;
-  t.block_->IncRefCount();
-  return t;
-}
+// if tensor is not transposed yet i.e strides == 1,
+// then we simply change the shape and generate new default strides
+// if tensor is already transposed i.e strides != 1,
+// it should be copied to a new tensor with newly generated default strides
+// TODO(wangwei) raise error if the shape not match
+
+// void Tensor::Reshape(const Shape &shape) {
+//   if (strides_.size() == 0)
+//     strides_.push_back(1);
+
+//   if (Product(shape_) != Product(shape)) {
+//     if (block_ != nullptr && block_->DecRefCount() == 0)
+//       device_->FreeBlock(block_);
+//     block_ = device_->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
+//   } else if (transpose()) {
+//     LOG(FATAL) << "Reshape Error: Reshape called on tranposed tensor. Not implemented yet." ;
+//   }
+//   shape_ = shape;
+//   generate_strides();
+// }
+
+// void Tensor::Reshape(Shape &&shape) {
+//   if (strides_.size() == 0)
+//     strides_.push_back(1);
+
+//   if (Product(shape_) != Product(shape)) {
+//     if (block_ != nullptr && block_->DecRefCount() == 0)
+//       device_->FreeBlock(block_);
+//     block_ = device_->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
+//   } else if (transpose()) {
+//     LOG(FATAL) << "Reshape Error: Reshape called on tranposed tensor. Not implemented yet." ;
+//   }
+//   shape_ = std::move(shape);
+//   generate_strides();
+// }
 
 void Tensor::AsType(const DataType type) {
   if (data_type_ != type) {
@@ -356,15 +336,6 @@ void Tensor::ToProto(singa::TensorProto *proto) const {
   }
 }
 
-Tensor Tensor::Clone(std::shared_ptr<Device> device) const {
-  if (device == nullptr) device = device_;
-  Tensor t(shape_, device_, data_type_);
-  //t.transpose_ = transpose_;
-  t.strides_ = strides_;
-  t.CopyData(*this);
-  return t;
-}
-
 Tensor Tensor::Repeat(vector<size_t> repeats, int axis, std::shared_ptr<Device> device) {
   if (device == nullptr) device = device_;
   vector<size_t> tshape;
@@ -407,7 +378,15 @@ Tensor Tensor::Repeat(vector<size_t> repeats, int axis, std::shared_ptr<Device>
   return t;
 }
 
-//yisen todo
+Tensor Tensor::Clone(std::shared_ptr<Device> device) const {
+  if (device == nullptr) device = device_;
+  Tensor t(shape_, device_, data_type_);
+  //t.transpose_ = transpose_;
+  t.strides_ = strides_;
+  t.CopyData(*this);
+  return t;
+}
+
 Tensor Tensor::T() const {
   // this function only works for 2d tensors
   CHECK_EQ(shape_.size(), 2u);
@@ -494,18 +473,17 @@ Tensor &Tensor::operator=(Tensor &&in) {
   return *this;
 }
 
-//yisen todo
-Tensor Reshape(const Tensor &in, const Shape &s) {
-  Tensor out(in);
-  out = out.Reshape(s);
-  return out;
-}
+// Tensor Reshape(const Tensor &in, const Shape &s) {
+//   // Tensor out(in);
+//   // out.Reshape(s);
+//   return out;
+// }
 
-Tensor Reshape(const Tensor &in, Shape &&s) {
-  Tensor out(in);
-  out = out.Reshape(std::move(s));
-  return out;
-}
+// Tensor Reshape(const Tensor &in, Shape &&s) {
+//   // Tensor out(in);
+//   // out.Reshape(std::move(s));
+//   return out;
+// }
 
 #define GenUnaryTensorArgMemberFn(op, fn) \
   Tensor &Tensor::op(const Tensor &in) {  \
@@ -753,7 +731,6 @@ GenUnaryTensorFn(Sign);
 GenUnaryTensorFn(Sqrt);
 GenUnaryTensorFn(Square);
 GenUnaryTensorFn(Tanh);
-GenUnaryTensorFn(Transform);
 
 #define EltwiseBinaryTensorFn(fn, lhs, rhs, ret)                            \
   do {                                                                      \

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/10f3aa1d/src/core/tensor/tensor_math.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h
index 388c010..f438fc6 100644
--- a/src/core/tensor/tensor_math.h
+++ b/src/core/tensor/tensor_math.h
@@ -258,6 +258,7 @@ template <typename DType, typename Lang>
 void Transform(const Tensor &in, Tensor *out, Context *ctx) {
   LOG(FATAL) << "Transform Not Implemented";
 }
+
 // **************************************
 // Random functions
 // **************************************

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/10f3aa1d/src/core/tensor/tensor_math_cpp.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h
index e302b04..bfdd026 100644
--- a/src/core/tensor/tensor_math_cpp.h
+++ b/src/core/tensor/tensor_math_cpp.h
@@ -427,20 +427,6 @@ void Tanh<float, lang::Cpp>(const Tensor& in, Tensor* out,
 }
 
 template <>
-void Transform<float, lang::Cpp>(const Tensor& in, Tensor* out,
-                            Context *ctx) {
-  float *outPtr = static_cast<float *>(out->block()->mutable_data());
-  const float *inPtr = static_cast<const float *>(in.block()->data());
-  vector<int> traversal_info = generate_traversal_info(in);
-  vector<int> shape_multipliers = generate_shape_multipliers(in);
-
-  for (size_t i = 0; i < in.Size(); i++) {
-    outPtr[i] = inPtr[traversal_info[in.shape().size()]];
-    traverse_next(in, shape_multipliers, traversal_info, i + 1);
-  }
-}
-
-template <>
 void Bernoulli<float, lang::Cpp>(const float p, Tensor* out,
                                  Context *ctx) {
   std::bernoulli_distribution distribution(p);


[4/7] incubator-singa git commit: SINGA-362 Add functions to support einsum function 1.fix one problem in device.cc 2.use add(t, 0) to reset the stride and could use reshape after transpose

Posted by wa...@apache.org.
SINGA-362 Add functions to support einsum function
1.fix one problem in device.cc
2.use add(t,0) to reset the stride and could use reshape after transpose


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8d9eb297
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8d9eb297
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8d9eb297

Branch: refs/heads/master
Commit: 8d9eb297dd7c2263b5face4bfcaf80a1a6680be8
Parents: 5e8f6a4
Author: sheyujian <sh...@me.com>
Authored: Fri May 25 10:36:36 2018 +0800
Committer: sheyujian <sh...@me.com>
Committed: Fri May 25 14:51:59 2018 +0800

----------------------------------------------------------------------
 python/singa/tensor.py     | 84 ++++++++++++-----------------------------
 src/core/device/device.cc  |  4 +-
 test/python/test_tensor.py |  2 -
 3 files changed, 27 insertions(+), 63 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8d9eb297/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index 5f38ef2..21a362a 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -271,24 +271,18 @@ class Tensor(object):
         return _call_singa_func(self.data.Clone)
 
     def repeat(self, repeats, axis):
-        # ret = CTensor()
-        # if isinstance(repeats, int):
-        #     if axis == 9999:
-        #         Repeats = [repeats,]
-        #         ret = self.data.Repeat(Repeats, axis)
-        #     else:
-        #         Repeats = [repeats,]
-        #         ret = self.data.Repeat(Repeats, axis)
-            
-
-        # elif isinstance(repeats, tuple) or isinstance(repeats, list):
-        #     if axis == 9999:
-        #         ret = self.data.Repeat(list(repeats), axis)
-
-        #     elif axis >= 0:
-        #         ret = self.data.Repeat(list(repeats), axis)
-        # return ret
+        '''Repeat data of a tensor 
 
+        Args:
+            repeats(int or a sequence): the number that the tensor need to repeat for
+            axis (int):the axis to do repeat
+                       If it is None, then the repeated tensor will be flattened.If it isn't None,
+                       the repeats could be sequence, but it's size should match the axis's shape
+
+        Return:
+            the tensor which has been repeated
+        
+        '''
         t_ndim = self.ndim()
         if isinstance(repeats, int) or isinstance(repeats, long):
             if repeats < 0:
@@ -1144,26 +1138,6 @@ def einsum(ops, *args):
     reshape_A = list(A.shape) + broadcast_a
     reshape_B = list(B.shape) + broadcast_b
 
-    # A_ = to_numpy(A)
-    # B_ = to_numpy(B)
-
-    # mult_A = np.repeat(A_, np.product(broadcast_a)).reshape(
-    #     reshape_A).transpose(transpose_A)
-    # mult_B = np.repeat(B_, np.product(broadcast_b)).reshape(
-    #     reshape_B).transpose(transpose_B)
-
-    # if mult_A.shape != mult_B.shape:
-    #     raise ValueError("Error: matrix dimension mismatch")
-    # res_ = np.multiply(mult_A, mult_B)
-
-    # reduce the axis and find the final transpose for the output
-    # sum_R = sorted(sums, reverse=True)
-    # for i in sum_R:
-    #     res_ = res_.sum(axis=i)
-    # transpose_res = [sorted(list(outputops)).index(x) for x in list(outputops)]
-    # res_ = res_.transpose(transpose_res)
-    # res = from_numpy(res_)
-    # return res
     if len(broadcast_a) == 0:
         broadcast_a = [1]
     if len(broadcast_b) == 0:
@@ -1352,24 +1326,19 @@ def tensordot (A,B,axes=2):
     newshape_b = (N2, N1)
     oldb = [b_shape[axis] for axis in notin]
     # do transpose and reshape to get the 2D matrix to do multiplication
-    A_ = to_numpy(A)
-    B_ = to_numpy(B)
-    at_ = np.transpose(A_,newaxes_a).reshape(newshape_a)
-    bt_ = np.transpose(B_,newaxes_b).reshape(newshape_b)
-    # print(at_)
-    # print(bt_)
-    at = from_numpy(at_)
-    bt = from_numpy(bt_)
-
-    # A = transpose(A, newaxes_a)
-    # B = transpose(B, newaxes_b)
-    # A = 
-    # at = Reshape(A, newshape_a)
-    # bt = Reshape(B, newshape_b)
-    # _at = to_numpy(at)
-    # _bt = to_numpy(bt)
-    # print(_at)
-    # print(_bt)
+    # A_ = to_numpy(A)
+    # B_ = to_numpy(B)
+    # at_ = np.transpose(A_,newaxes_a).reshape(newshape_a)
+    # bt_ = np.transpose(B_,newaxes_b).reshape(newshape_b)
+    # at = from_numpy(at_)
+    # bt = from_numpy(bt_)
+
+    A = transpose(A, newaxes_a)
+    B = transpose(B, newaxes_b)
+    A = add(A, 0)
+    B = add(B, 0)
+    at = Reshape(A, newshape_a)
+    bt = Reshape(B, newshape_b)
 
     res = mult(at,bt)
     if len(olda + oldb) == 0:
@@ -1378,10 +1347,7 @@ def tensordot (A,B,axes=2):
         res.reshape(tuple(olda + oldb))
     else:
         res.reshape(tuple(olda + oldb))
-    # print(res.shape)
-    # res_ = np.dot(at_, bt_)
-    # res = from_numpy(res_.reshape(olda + oldb))
-    #reshape the result
+
     return res
 
 def div(lhs, rhs, ret=None):

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8d9eb297/src/core/device/device.cc
----------------------------------------------------------------------
diff --git a/src/core/device/device.cc b/src/core/device/device.cc
index 135ae3a..0c9c6a2 100644
--- a/src/core/device/device.cc
+++ b/src/core/device/device.cc
@@ -68,8 +68,8 @@ void Device::RepeatDataToFrom(Block* dst, Block* src, size_t nBytes,
                               CopyDirection direct, bool broadcast_flag, 
                               int axis_shape, int shape_outer, int chunk, 
                               vector<size_t> repeats, int dst_offset, int src_offset) {
-  const char *src_data = reinterpret_cast<const char*>(src->data()) + dst_offset;
-  char *dst_data = reinterpret_cast<char*>(dst->mutable_data()) + src_offset;
+  const char *src_data = reinterpret_cast<const char*>(src->data()) + src_offset;
+  char *dst_data = reinterpret_cast<char*>(dst->mutable_data()) + dst_offset;
 
   for (int i = 0; i < shape_outer; i++) {
     for (int j = 0; j < axis_shape; j++) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8d9eb297/test/python/test_tensor.py
----------------------------------------------------------------------
diff --git a/test/python/test_tensor.py b/test/python/test_tensor.py
index 7d83677..098994b 100644
--- a/test/python/test_tensor.py
+++ b/test/python/test_tensor.py
@@ -208,8 +208,6 @@ class TestTensorMethods(unittest.TestCase):
         ta_repeat2 = tensor.repeat(ta, 4, axis = 1)
         a_repeat2 = np.repeat(a, 4, axis = 1)
         Ta_repeat2 = tensor.to_numpy(ta_repeat2)
-        # print(Ta_repeat2)
-        # print(a_repeat2)
 
         self.assertAlmostEqual(np.sum(Ta_repeat1 - a_repeat1), 0., places=3)
         self.assertAlmostEqual(np.sum(Ta_repeat2 - a_repeat2), 0., places=3)


[2/7] incubator-singa git commit: SINGA-362 Add funcitons to support einsum function 1. fix the bug and support some test case 2. need to do some fix and use transpose function developed by yisen

Posted by wa...@apache.org.
SINGA-362 Add funcitons to support einsum function
1. fix the bug and support some test case
2. need to do some fix and use transpose function developed by yisen


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/7d25ed93
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/7d25ed93
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/7d25ed93

Branch: refs/heads/master
Commit: 7d25ed93b4a05ef24ec73b8277945bd24db716d2
Parents: 16c6111
Author: sheyujian <sh...@me.com>
Authored: Tue May 22 10:04:20 2018 +0800
Committer: sheyujian <sh...@me.com>
Committed: Thu May 24 10:43:35 2018 +0800

----------------------------------------------------------------------
 include/singa/core/device.h |   2 +-
 include/singa/core/tensor.h |   6 +--
 python/singa/tensor.py      | 107 ++++++++++++++++++++++++++++++++-------
 src/api/core_tensor.i       |   7 +++
 src/core/device/device.cc   |   2 +-
 src/core/tensor/tensor.cc   |  55 ++++++++++++++------
 test/python/test_tensor.py  |  61 ++++++++++++++++++++--
 test/singa/test_tensor.cc   |  31 ++++++++++++
 8 files changed, 228 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7d25ed93/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index 24569f4..d6b8bf3 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -78,7 +78,7 @@ class Device {
   virtual void RepeatDataToFrom(Block* dst, Block* src, size_t nBytes,
                                 CopyDirection direct, bool broadcast_flag, 
                                 int axis_shape, int shape_outer, int chunk, 
-                                vector<int> repeats, int dst_offset, int src_offset);
+                                vector<size_t> repeats, int dst_offset, int src_offset);
 
   void CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes,
                            size_t dst_offset = 0);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7d25ed93/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index c7958ff..3cfafc5 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -165,7 +165,7 @@ class Tensor {
   /// Meta data would not be copied!
   void CopyData(const Tensor &other);
 
-  void RepeatData(vector<int> repeats, int axis, int total_repeats, const Tensor &other);
+  void RepeatData(vector<size_t> repeats, int axis, int total_repeats, const Tensor &other);
 
   /// Deserialize data, shape and transpose from protobuf object.
   void FromProto(const singa::TensorProto &proto);
@@ -177,7 +177,7 @@ class Tensor {
   /// device. If 'device' is nullptr, then clone it one the current device.
   Tensor Clone(std::shared_ptr<Device> device = nullptr) const;
 
-  Tensor Repeat(vector<int> repeats, int axis, std::shared_ptr<Device> device = nullptr) ;
+  Tensor Repeat(vector<size_t> repeats, int axis, std::shared_ptr<Device> device = nullptr) ;
 
   // Tensor operations
 
@@ -291,7 +291,7 @@ Tensor Reshape(const Tensor &in, Shape &&s);
 void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
                     const size_t dst_offset = 0, const size_t src_offset = 0);
 
-void RepeatDataToFrom(bool broadcast_flag,  vector<int> repeats, int axis, 
+void RepeatDataToFrom(bool broadcast_flag, vector<size_t> repeats, int axis, 
                       Tensor *dst, const Tensor &in, const size_t num, 
                       const size_t dst_offset = 0, const size_t src_offset = 0);
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7d25ed93/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index ff7206c..d559ecb 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -136,6 +136,10 @@ class Tensor(object):
         '''
         return self.data.transpose()
 
+    # def transpose(self):
+
+    #     return self.data
+
     def size(self):  # TODO(wangwei) compute size
         '''
         Returns:
@@ -255,6 +259,28 @@ class Tensor(object):
         '''
         return _call_singa_func(self.data.Clone)
 
+    def repeat_(self, repeats, axis):
+        ret = CTensor()
+        if isinstance(repeats, int):
+            if axis == 9999:
+                Repeats = [repeats,]
+                ret = self.data.Repeat(Repeats, axis)
+            else:
+                Repeats = [repeats,]
+                ret = self.data.Repeat(Repeats, axis)
+            return ret
+
+        elif isinstance(repeats, tuple) or isinstance(repeats, list):
+            if axis == 9999:
+                ret = self.data.Repeat(list(repeats), axis)
+
+            elif axis >= 0:
+                ret = self.data.Repeat(list(repeats), axis)
+            return ret
+        
+
+        
+
     def T(self):
         ''' shallow copy, negate the transpose field.
 
@@ -1104,23 +1130,25 @@ def sum2(t, axis=None, out=None):
     t_ndim = t.ndim()
 
     if axis is None:
-        one = Tensor(t.shape, t.device, t.dtype)
+        one = Tensor(t.shape, t.device)
         one.set_value(1.0)
         ret = tensordot(t, one, t_ndim)
 
     if isinstance(axis,int):
         if axis < 0:
-            axis += 2
+            axis += t_ndim
 
         axis_shape = t_shape[axis]
-        one = Tensor(axis_shape, t.device, t.dtype)
+        axis_shape = int(axis_shape)
+        one = Tensor(shape = (axis_shape, ), device = t.device)
         one.set_value(1.0)
         ret = tensordot(t, one, axes=([axis],[0]))
 
     if isinstance(axis,tuple):
         l_axis = list(axis)
         axis_shape = [t_shape[x] for x in axis]
-        one = Tensor(axis_shape, t.device, t.dtype)
+        axisshape = tuple(axis_shape)
+        one = Tensor(axisshape, t.device)
         one.set_value(1.0)
         one_axis = [x for x in range(one.ndim())]
         ret = tensordot(t, one, (l_axis,one_axis))
@@ -1133,31 +1161,59 @@ def sum2(t, axis=None, out=None):
     else:
         return ret
 
-def repeat(t, repeats, axis = None):
+def repeat (t, repeats, axis = None):
+    t_ndim = t.ndim()
     if isinstance(repeats, int):
         if repeats < 0:
             raise ValueError("'repeats' should not be negative: {}".format(repeats))
+        if axis != None and axis < 0:
+            axis += t_ndim
         # broadcast = True
         if axis == None:
             axis = 9999
-        if axis < 0:
-            axis += 2
-        ret = singa.Repeat(t, list(repeats), axis)
+            ret = Tensor()
+            ret.shape = (product(t.shape)*repeats,)
+            # Repeats = [repeats,]
+            ret.data = t.repeat_(repeats, axis)
+            # ret.data = t.data.Repeat(Repeats, axis)
+        elif axis >= 0:
+            ret = Tensor()
+            t_shape = list(t.shape)
+            t_shape[axis] = t.shape[axis]*repeats
+            print(t_shape)
+            ret.shape = tuple(t_shape)
+            print(ret.shape)
+            # Repeats = [repeats,]
+            ret.data = t.repeat_(repeats, axis)
+            # ret.data = t.data.Repeat(Repeats, axis)
+            print(ret.shape)
+
     elif isinstance(repeats, tuple) or isinstance(repeats, list):
         for rep in repeats:
             if rep < 0:
                 raise ValueError("'repeats' should be int or sequence: {}".format(repeats))
+
+        if axis != None and axis < 0:
+            axis += t_ndim
         if axis == None:
             axis = 9999
-        if axis < 0:
-            axis += 2
-        ret = singa.Repeat(t, list(repeats), axis)
-        t_shape = t.shape
-        t_shape[axis] = sum(repeats)
-        ret = ret.reshape(t_shape)
+            ret = Tensor()
+            ret.shape = (sum(repeats), )
+            t_shape = list(t.shape)
+            ret.data = t.repeat_(repeats, axis)
+            #ret = t.data.Repeat(list(repeats), axis)
+            
+        elif axis >= 0:
+            ret = Tensor()
+            t_shape = list(t.shape)
+            t_shape[axis] = sum(repeats)
+            ret.shape = tuple(t_shape)
+            ret.data = t.repeat_(repeats, axis)
+            #ret = t.data.Repeat(list(repeats), axis)
     else:
         raise ValueError('repeats should be int or sequence')
     return ret
+        
 
 def tensordot (A,B,axes=2):
 
@@ -1188,7 +1244,7 @@ def tensordot (A,B,axes=2):
     # if A is in shape(3,2,4), B is in shape(4,2,5), it will return a matrix in shape(3,2,2,5)
     #when axes is 2 and A,B are shape (3,2,4) and (2,4,5), it will return a matrix in shape(3,5)
 
-    if type(axes) == int:
+    if type(axes) == int or type(axes) == long:
         axes_A = list(range(-axes, 0))
         axes_B = list(range(0, axes))
         axes_B = axes_B
@@ -1257,11 +1313,24 @@ def tensordot (A,B,axes=2):
     newshape_b = (N2, N1)
     oldb = [b_shape[axis] for axis in notin]
     # do transpose and reshape to get the 2D matrix to do multiplication
-    at = A.transpose(newaxes_a).reshape(newshape_a)
-    bt = B.transpose(newaxes_b).reshape(newshape_b)
-    res = mult(at, bt)
+    A_ = to_numpy(A)
+    B_ = to_numpy(B)
+    at_ = np.transpose(A_,newaxes_a).reshape(newshape_a)
+    bt_ = np.transpose(B_,newaxes_b).reshape(newshape_b)
+    at = from_numpy(at_)
+    bt = from_numpy(bt_)
+    res = mult(at,bt)
+    if len(olda + oldb) == 0:
+        olda = [1]
+        oldb = [1]
+        res.reshape(tuple(olda + oldb))
+    else:
+        res.reshape(tuple(olda + oldb))
+    print(res.shape)
+    # res_ = np.dot(at_, bt_)
+    # res = from_numpy(res_.reshape(olda + oldb))
     #reshape the result
-    return res.reshape(olda + oldb)
+    return res
 
 def div(lhs, rhs, ret=None):
     '''Elementi-wise division.

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7d25ed93/src/api/core_tensor.i
----------------------------------------------------------------------
diff --git a/src/api/core_tensor.i b/src/api/core_tensor.i
index 31562c9..756fe60 100644
--- a/src/api/core_tensor.i
+++ b/src/api/core_tensor.i
@@ -119,9 +119,12 @@ namespace singa{
     %template(CopyIntDataFromHostPtr) CopyDataFromHostPtr<int>;
 
     void CopyData(const Tensor &other);
+    void RepeatData(std::vector<size_t> repeats, int axis, int total_repeats, const Tensor &src);
     Tensor Clone() const;
+    Tensor Repeat(std::vector<size_t> repeats, int axis);
     Tensor T() const;
 
+
 #if USE_JAVA
     %rename(iAdd) operator+=(const Tensor &t);
     %rename(iSub) operator-=(const Tensor &t);
@@ -157,6 +160,10 @@ namespace singa{
   void CopyDataToFrom(Tensor *dst, const Tensor &src, size_t num,
                       size_t src_offset = 0, size_t dst_offset = 0);
 
+  void RepeatDataToFrom(bool broadcast_flag, std::vector<size_t> repeats, int axis, 
+                        Tensor *dst, const Tensor &src, const size_t num, 
+                        const size_t dst_offset, const size_t src_offset);
+
   Tensor Reshape(const Tensor &in, const std::vector<size_t> &s);
 
   Tensor Abs(const Tensor &t);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7d25ed93/src/core/device/device.cc
----------------------------------------------------------------------
diff --git a/src/core/device/device.cc b/src/core/device/device.cc
index d569015..135ae3a 100644
--- a/src/core/device/device.cc
+++ b/src/core/device/device.cc
@@ -67,7 +67,7 @@ void Device::CopyDataToFrom(Block* dst, Block* src, size_t nBytes,
 void Device::RepeatDataToFrom(Block* dst, Block* src, size_t nBytes,
                               CopyDirection direct, bool broadcast_flag, 
                               int axis_shape, int shape_outer, int chunk, 
-                              vector<int> repeats, int dst_offset, int src_offset) {
+                              vector<size_t> repeats, int dst_offset, int src_offset) {
   const char *src_data = reinterpret_cast<const char*>(src->data()) + dst_offset;
   char *dst_data = reinterpret_cast<char*>(dst->mutable_data()) + src_offset;
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7d25ed93/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index 22541df..b75ac40 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -216,8 +216,8 @@ void Tensor::CopyData(const Tensor &src) {
   }
 }
 
-void Tensor::RepeatData(vector<int> repeats, int axis, int total_repeats, const Tensor &src) {
-  if(axis == Noaxis) {
+void Tensor::RepeatData(vector<size_t> repeats, int axis, int total_repeats, const Tensor &src) {
+  if(repeats.size() == 1) {
     CHECK_EQ(Size(), src.Size()*total_repeats);
   } else {
     CHECK_EQ(Size(), src.Size()*total_repeats/src.shape()[axis]);
@@ -345,25 +345,44 @@ Tensor Tensor::Clone(std::shared_ptr<Device> device) const {
   return t;
 }
 
-Tensor Tensor::Repeat(vector<int> repeats, int axis, std::shared_ptr<Device> device) {
+Tensor Tensor::Repeat(vector<size_t> repeats, int axis, std::shared_ptr<Device> device) {
   if (device == nullptr) device = device_;
-  Tensor t;
+  vector<size_t> tshape;
   int total_repeats = 0;
   if (axis == Noaxis) {
     total_repeats = repeats[0];
-    t.shape_.push_back(Product(shape_)*total_repeats);
+    tshape.push_back(Product(shape_)*total_repeats);
   } else {
-    for (size_t i = 0; i < shape_[axis]; i++) {
-      if(repeats[i] < 0) {
-        LOG(FATAL) << "the repeats number is less than zero";
+    if (repeats.size() == 1){
+      total_repeats = repeats[0];
+      for (int i = 0; i < shape_.size(); i++) {
+        if (i == axis) {
+          tshape.push_back(shape_[i] * total_repeats);
+        } else {
+          tshape.push_back(shape_[i]);
+        }
+      }
+    } else {
+      if (repeats.size() != shape_[axis]) {
+        LOG(FATAL) << "the repeats number doesn't match the axis";
+      }
+      for (size_t i = 0; i < shape_[axis]; i++) {
+        if(repeats[i] < 0) {
+          LOG(FATAL) << "the repeats number is less than zero";
+        }
+        total_repeats += repeats[i];
+      }
+      for (int i = 0; i < shape_.size(); i++){
+        if (i == axis) {
+          tshape.push_back(total_repeats);
+        } else{
+          tshape.push_back(shape_[i]);
+        }
       }
-      total_repeats += repeats[i];
-      t.shape_.push_back(Product(shape_)/shape_[axis]*total_repeats);
     }
   }
-  t.device_ = device_;
-  t.data_type_ = data_type_;
-  t.strides_.push_back(1);
+  Tensor t(tshape, device_);
+  //t.strides_.push_back(1);
   t.RepeatData(repeats, axis, total_repeats, *this);
   return t;
 }
@@ -522,7 +541,7 @@ void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
   }
 }
 
-void RepeatDataToFrom(bool broadcast_flag, vector<int> repeats, int axis, 
+void RepeatDataToFrom(bool broadcast_flag, vector<size_t> repeats, int axis, 
                       Tensor *dst, const Tensor &src, const size_t num, 
                       const size_t dst_offset, const size_t src_offset) {
   if (repeats.size() == 1) {
@@ -543,15 +562,20 @@ void RepeatDataToFrom(bool broadcast_flag, vector<int> repeats, int axis,
   auto s_offset = src_offset * width;
   int chunk = width;
   int axis_shape = 1;
+  int shape_outer = 1;
   if (axis == Noaxis){
     axis_shape = 1;
+    shape_outer = Product(src.shape());
   } else {
+    for (size_t i = 0; i < axis; i++) {
+      shape_outer *= src.shape()[i];
+    }
     axis_shape = src.shape()[axis];
     for(size_t i = axis + 1; i < src.nDim(); i++) {
       chunk *= src.shape()[i];
     }
   }
-  int shape_outer = Product(src.shape());
+  
   std::shared_ptr<Device> src_dev = src.device(), dst_dev = dst->device();
   Block *from = src.block(), *to = dst->block();
   if (dst_dev->lang() != src_dev->lang()) {
@@ -667,6 +691,7 @@ void Tensor::SetValue(const SType x) {
   CHECK_EQ(sizeof(SType), SizeOf(data_type_));
   //auto size = Size();
   auto ptr = block_;
+  
   TYPE_LANG_SWITCH(data_type_, DType, device_->lang(), Lang, {
     // TODO(wangwei) cast x to DType
     device_->Exec([this, x, ptr](Context * ctx) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7d25ed93/test/python/test_tensor.py
----------------------------------------------------------------------
diff --git a/test/python/test_tensor.py b/test/python/test_tensor.py
index 0b3b85b..a47bbff 100644
--- a/test/python/test_tensor.py
+++ b/test/python/test_tensor.py
@@ -167,9 +167,10 @@ class TestTensorMethods(unittest.TestCase):
 
     def test_einsum(self):
 
-        a = np.arange(12).reshape(3, 2, 2)
-
+        a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2])
+        a = np.reshape(a,(2,3,2))
         ta = tensor.from_numpy(a)
+
         res1 = np.einsum('kij,kij->kij', a, a)
         tres1 = tensor.einsum('kij,kij->kij', ta, ta)
         Tres1 = tensor.to_numpy(tres1)
@@ -177,9 +178,61 @@ class TestTensorMethods(unittest.TestCase):
         tres2 = tensor.einsum('kij,kih->kjh', ta, ta)
         Tres2 = tensor.to_numpy(tres2)
         
-        self.assertEqual(np.sum(Tres1 - res1), 0.)
-        self.assertEqual(np.sum(Tres2 - res2), 0.)
+        self.assertAlmostEqual(np.sum(Tres1 - res1), 0.,places=3)
+        self.assertAlmostEqual(np.sum(Tres2 - res2), 0.,places=3)
+
+    def test_repeat(self):
+
+        a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2])
+        a = np.reshape(a,(2,3,2))
+        ta = tensor.from_numpy(a)
+
+        ta_repeat1 = tensor.repeat(ta,2,axis = None)
+        a_repeat1 = np.repeat(a,2,axis = None)
+        Ta_repeat1 = tensor.to_numpy(ta_repeat1)
+        ta_repeat2 = tensor.repeat(ta, 4, axis = 1)
+        a_repeat2 = np.repeat(a, 4, axis = 1)
+        Ta_repeat2 = tensor.to_numpy(ta_repeat2)
+        print(Ta_repeat2)
+        print(a_repeat2)
+
+        self.assertAlmostEqual(np.sum(Ta_repeat1 - a_repeat1), 0., places=3)
+        self.assertAlmostEqual(np.sum(Ta_repeat2 - a_repeat2), 0., places=3)
+
+    def test_sum2(self):
+        a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2])
+        a = np.reshape(a,(2,3,2))
+        ta = tensor.from_numpy(a)
+
+        a_sum0 = np.sum(a)
+        ta_sum0 = tensor.sum2(ta)
+        Ta_sum0 = tensor.to_numpy(ta_sum0)
+        a_sum1 = np.sum(a, axis = 1)
+        ta_sum1 = tensor.sum2(ta, axis = 1)
+        Ta_sum1 = tensor.to_numpy(ta_sum1)
+        a_sum2 = np.sum(a, axis = 2)
+        ta_sum2 = tensor.sum2(ta, axis = 2)
+        Ta_sum2 = tensor.to_numpy(ta_sum2)
+
+        self.assertAlmostEqual(np.sum(a_sum0 - Ta_sum0), 0., places=3)
+        self.assertAlmostEqual(np.sum(a_sum1 - Ta_sum1), 0., places=3)
+        self.assertAlmostEqual(np.sum(a_sum2 - Ta_sum2), 0., places=3)
+
+    def test_tensordot(self):
+        a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2])
+        a = np.reshape(a,(2,3,2))
+
+        ta = tensor.from_numpy(a)
+
+        res1 = np.tensordot(a, a, axes = 1)
+        tres1 = tensor.tensordot(ta, ta, axes = 1)
+        Tres1 = tensor.to_numpy(tres1)
+        res2 = np.tensordot(a, a, axes = ([0,1],[2,1]))
+        tres2 = tensor.tensordot(ta, ta, axes = ([0,1],[2,1]))
+        Tres2 = tensor.to_numpy(tres2)
 
+        self.assertAlmostEqual(np.sum(Tres1 - res1), 0., places=3)
+        self.assertAlmostEqual(np.sum(Tres2 - res2), 0., places=3)
 
 
 if __name__ == '__main__':

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7d25ed93/test/singa/test_tensor.cc
----------------------------------------------------------------------
diff --git a/test/singa/test_tensor.cc b/test/singa/test_tensor.cc
index 316b996..c8df3ee 100644
--- a/test/singa/test_tensor.cc
+++ b/test/singa/test_tensor.cc
@@ -129,3 +129,34 @@ TEST(TensorClass, T) {
   EXPECT_EQ(t.shape()[1],  o.shape()[0]);
 }
 
+TEST(TensorClass, Repeat) {
+  float data[] = {1.0f, 2.0f, 3.0f};
+  Tensor t(Shape{3});
+  t.CopyDataFromHostPtr(data, 3);
+
+  Tensor o = t.Repeat(vector <size_t>{2},9999);
+  const float* dptr = static_cast<const float*>(o.block()->data());
+  EXPECT_FLOAT_EQ(1.0f, dptr[0]);
+  EXPECT_FLOAT_EQ(1.0f, dptr[1]);
+  EXPECT_FLOAT_EQ(2.0f, dptr[2]);
+  EXPECT_FLOAT_EQ(2.0f, dptr[3]);
+  EXPECT_FLOAT_EQ(3.0f, dptr[4]);
+  EXPECT_FLOAT_EQ(3.0f, dptr[5]);
+}
+
+TEST(TensorCLass, RepeatData) {
+  float data[] = {1.0f, 2.0f, 3.0f};
+  Tensor t(Shape{3});
+  t.CopyDataFromHostPtr(data, 3);
+
+  Tensor o(Shape{6});
+  o.RepeatData({2},9999,2,t);
+  const float* dptr = static_cast<const float*>(o.block()->data());
+  EXPECT_FLOAT_EQ(1.0f, dptr[0]);
+  EXPECT_FLOAT_EQ(1.0f, dptr[1]);
+  EXPECT_FLOAT_EQ(2.0f, dptr[2]);
+  EXPECT_FLOAT_EQ(2.0f, dptr[3]);
+  EXPECT_FLOAT_EQ(3.0f, dptr[4]);
+  EXPECT_FLOAT_EQ(3.0f, dptr[5]);
+}
+


[7/7] incubator-singa git commit: Merge branch 'pr385'

Posted by wa...@apache.org.
Merge branch 'pr385'


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/7a19e63d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/7a19e63d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/7a19e63d

Branch: refs/heads/master
Commit: 7a19e63db1d4bddf899755380d363553c42b863d
Parents: e248e44 10f3aa1
Author: Wang Wei <wa...@gmail.com>
Authored: Sun Jul 1 21:10:13 2018 +0800
Committer: Wang Wei <wa...@gmail.com>
Committed: Sun Jul 1 21:10:13 2018 +0800

----------------------------------------------------------------------
 examples/cifar10/cnn.cc     |   2 +-
 include/singa/core/tensor.h |  10 +-
 python/singa/tensor.py      | 307 +++++++++++++++++++++++++++++++++++----
 src/api/core_tensor.i       |  12 +-
 src/core/tensor/tensor.cc   | 126 +++++++++++++++-
 src/io/image_transformer.cc |  12 +-
 test/python/test_tensor.py  |  74 +++++++++-
 test/singa/test_tensor.cc   |  31 ++++
 8 files changed, 529 insertions(+), 45 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7a19e63d/include/singa/core/tensor.h
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/7a19e63d/src/core/tensor/tensor.cc
----------------------------------------------------------------------


[3/7] incubator-singa git commit: SINGA-362 Add functions to support einsum function 1.add the transpose function in python and use the function in existing functions 2.one problem need to fix: the tensor cannot be reshaped after transpose

Posted by wa...@apache.org.
SINGA-362 Add functions to support einsum function
1.add the transpose function in python and use the function in existing functions
2.one problem need to fix: the tensor cannot be reshaped after transpose


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/5e8f6a4f
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/5e8f6a4f
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/5e8f6a4f

Branch: refs/heads/master
Commit: 5e8f6a4f5a5903ff76a1a176a3c135514c0299ca
Parents: 7d25ed9
Author: sheyujian <sh...@me.com>
Authored: Thu May 24 14:10:41 2018 +0800
Committer: sheyujian <sh...@me.com>
Committed: Fri May 25 10:12:39 2018 +0800

----------------------------------------------------------------------
 include/singa/core/tensor.h |   2 +-
 python/singa/tensor.py      | 212 ++++++++++++++++++++++++---------------
 src/api/core_tensor.i       |   4 +-
 src/core/tensor/tensor.cc   |   2 +-
 test/python/test_tensor.py  |  19 +++-
 5 files changed, 154 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5e8f6a4f/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index 3cfafc5..7947d93 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -189,7 +189,7 @@ class Tensor {
   Tensor Transpose() const;
 
   /// Change the axes
-  Tensor Transpose(const vector<size_t>& axes) const;
+  Tensor Transpose(const vector<size_t> &axes) const;
 
   /// Copy the meta info with data block shared.
   Tensor &operator=(const Tensor &in);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5e8f6a4f/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index d559ecb..5f38ef2 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -136,9 +136,20 @@ class Tensor(object):
         '''
         return self.data.transpose()
 
-    # def transpose(self):
-
-    #     return self.data
+    def transpose(self,axes = None):
+        '''
+        To transpose the tensor
+        '''
+        if axes == None:
+            tshape = [self.shape[x] for x in range(len(self.shape))]
+            self.shape = tuple(tshape)
+            self.data = self.data.Transpose()
+        else:
+            if(len(axes) != len(self.shape)):
+                raise ValueError('dimensions do not match')
+            tshape = [self.shape[x] for x in axes]
+            self.shape = tuple(tshape)
+            self.data = self.data.Transpose(list(axes))
 
     def size(self):  # TODO(wangwei) compute size
         '''
@@ -259,24 +270,61 @@ class Tensor(object):
         '''
         return _call_singa_func(self.data.Clone)
 
-    def repeat_(self, repeats, axis):
-        ret = CTensor()
-        if isinstance(repeats, int):
-            if axis == 9999:
+    def repeat(self, repeats, axis):
+        # ret = CTensor()
+        # if isinstance(repeats, int):
+        #     if axis == 9999:
+        #         Repeats = [repeats,]
+        #         ret = self.data.Repeat(Repeats, axis)
+        #     else:
+        #         Repeats = [repeats,]
+        #         ret = self.data.Repeat(Repeats, axis)
+            
+
+        # elif isinstance(repeats, tuple) or isinstance(repeats, list):
+        #     if axis == 9999:
+        #         ret = self.data.Repeat(list(repeats), axis)
+
+        #     elif axis >= 0:
+        #         ret = self.data.Repeat(list(repeats), axis)
+        # return ret
+
+        t_ndim = self.ndim()
+        if isinstance(repeats, int) or isinstance(repeats, long):
+            if repeats < 0:
+                raise ValueError("'repeats' should not be negative: {}".format(repeats))
+            if axis != None and axis < 0:
+                axis += t_ndim
+            # broadcast = True
+            if axis == None:
+                axis = 9999
+                self.shape = (product(self.shape)*repeats,)
                 Repeats = [repeats,]
-                ret = self.data.Repeat(Repeats, axis)
-            else:
+                self.data = self.data.Repeat(Repeats, axis)
+            elif axis >= 0:
+                t_shape = list(self.shape)
+                t_shape[axis] = self.shape[axis]*repeats
+                self.shape = tuple(t_shape)
                 Repeats = [repeats,]
-                ret = self.data.Repeat(Repeats, axis)
-            return ret
+                self.data = self.data.Repeat(Repeats, axis)
 
         elif isinstance(repeats, tuple) or isinstance(repeats, list):
-            if axis == 9999:
-                ret = self.data.Repeat(list(repeats), axis)
-
+            for rep in repeats:
+                if rep < 0:
+                    raise ValueError("'repeats' should be int or sequence: {}".format(repeats))
+
+            if axis != None and axis < 0:
+                axis += t_ndim
+            if axis == None:
+                axis = 9999
+                raise ValueError("when axis us None, 'repeats' should be int: {}".format(repeats))
             elif axis >= 0:
-                ret = self.data.Repeat(list(repeats), axis)
-            return ret
+                t_shape = list(self.shape)
+                t_shape[axis] = sum(repeats)
+                self.shape = tuple(t_shape)
+                self.data = self.data.Repeat(list(repeats), axis)
+        else:
+            raise ValueError('repeats should be int or sequence')
         
 
         
@@ -580,6 +628,20 @@ def reshape(t, s):
     '''
     return _call_singa_func(singa.Reshape, t.data, s)
 
+def Reshape(t,s):
+    ret = t.deepcopy()
+    ret.reshape(s)
+    return ret
+
+def transpose(t,axes = None):
+    '''
+    Returns:
+        the transposed tensor 
+    '''
+    ret = t.deepcopy()
+    ret.transpose(axes)
+    return ret
+
 
 def copy_data_to_from(dst, src, size, dst_offset=0, src_offset=0):
     '''Copy the data between two Tensor instances which could be on different
@@ -1082,26 +1144,50 @@ def einsum(ops, *args):
     reshape_A = list(A.shape) + broadcast_a
     reshape_B = list(B.shape) + broadcast_b
 
-    A_ = to_numpy(A)
-    B_ = to_numpy(B)
+    # A_ = to_numpy(A)
+    # B_ = to_numpy(B)
 
-    mult_A = np.repeat(A_, np.product(broadcast_a)).reshape(
-        reshape_A).transpose(transpose_A)
-    mult_B = np.repeat(B_, np.product(broadcast_b)).reshape(
-        reshape_B).transpose(transpose_B)
+    # mult_A = np.repeat(A_, np.product(broadcast_a)).reshape(
+    #     reshape_A).transpose(transpose_A)
+    # mult_B = np.repeat(B_, np.product(broadcast_b)).reshape(
+    #     reshape_B).transpose(transpose_B)
 
-    if mult_A.shape != mult_B.shape:
-        raise ValueError("Error: matrix dimension mismatch")
-    res_ = np.multiply(mult_A, mult_B)
+    # if mult_A.shape != mult_B.shape:
+    #     raise ValueError("Error: matrix dimension mismatch")
+    # res_ = np.multiply(mult_A, mult_B)
 
     # reduce the axis and find the final transpose for the output
+    # sum_R = sorted(sums, reverse=True)
+    # for i in sum_R:
+    #     res_ = res_.sum(axis=i)
+    # transpose_res = [sorted(list(outputops)).index(x) for x in list(outputops)]
+    # res_ = res_.transpose(transpose_res)
+    # res = from_numpy(res_)
+    # return res
+    if len(broadcast_a) == 0:
+        broadcast_a = [1]
+    if len(broadcast_b) == 0:
+        broadcast_b = [1]  
+    mult_A = repeat(A, product(broadcast_a))
+    mult_A.reshape(reshape_A)
+    mult_A = transpose(mult_A,transpose_A)
+    mult_B = repeat(B, product(broadcast_b))
+    mult_B.reshape(reshape_B)
+    mult_B = transpose(mult_B, transpose_B)
+
+    if mult_A.shape != mult_B.shape:
+        raise ValueError("Error: matrix dimension mismatch")
+    res = eltwise_mult(mult_A, mult_B)
     sum_R = sorted(sums, reverse=True)
     for i in sum_R:
-        res_ = res_.sum(axis=i)
+        res = sum2(res, axis=i)
     transpose_res = [sorted(list(outputops)).index(x) for x in list(outputops)]
-    res_ = res_.transpose(transpose_res)
-    res = from_numpy(res_)
+    res = transpose(res, transpose_res)
+
     return res
+    
+
+
 
 def sum2(t, axis=None, out=None):
     '''Sum of tensor elements over given axis
@@ -1162,59 +1248,12 @@ def sum2(t, axis=None, out=None):
         return ret
 
 def repeat (t, repeats, axis = None):
-    t_ndim = t.ndim()
-    if isinstance(repeats, int):
-        if repeats < 0:
-            raise ValueError("'repeats' should not be negative: {}".format(repeats))
-        if axis != None and axis < 0:
-            axis += t_ndim
-        # broadcast = True
-        if axis == None:
-            axis = 9999
-            ret = Tensor()
-            ret.shape = (product(t.shape)*repeats,)
-            # Repeats = [repeats,]
-            ret.data = t.repeat_(repeats, axis)
-            # ret.data = t.data.Repeat(Repeats, axis)
-        elif axis >= 0:
-            ret = Tensor()
-            t_shape = list(t.shape)
-            t_shape[axis] = t.shape[axis]*repeats
-            print(t_shape)
-            ret.shape = tuple(t_shape)
-            print(ret.shape)
-            # Repeats = [repeats,]
-            ret.data = t.repeat_(repeats, axis)
-            # ret.data = t.data.Repeat(Repeats, axis)
-            print(ret.shape)
-
-    elif isinstance(repeats, tuple) or isinstance(repeats, list):
-        for rep in repeats:
-            if rep < 0:
-                raise ValueError("'repeats' should be int or sequence: {}".format(repeats))
-
-        if axis != None and axis < 0:
-            axis += t_ndim
-        if axis == None:
-            axis = 9999
-            ret = Tensor()
-            ret.shape = (sum(repeats), )
-            t_shape = list(t.shape)
-            ret.data = t.repeat_(repeats, axis)
-            #ret = t.data.Repeat(list(repeats), axis)
-            
-        elif axis >= 0:
-            ret = Tensor()
-            t_shape = list(t.shape)
-            t_shape[axis] = sum(repeats)
-            ret.shape = tuple(t_shape)
-            ret.data = t.repeat_(repeats, axis)
-            #ret = t.data.Repeat(list(repeats), axis)
-    else:
-        raise ValueError('repeats should be int or sequence')
+
+    ret = t.deepcopy()
+    ret.repeat(repeats,axis)
     return ret
-        
 
+        
 def tensordot (A,B,axes=2):
 
     """Returns the tensor multiplication of two tensors along specified axes.
@@ -1317,8 +1356,21 @@ def tensordot (A,B,axes=2):
     B_ = to_numpy(B)
     at_ = np.transpose(A_,newaxes_a).reshape(newshape_a)
     bt_ = np.transpose(B_,newaxes_b).reshape(newshape_b)
+    # print(at_)
+    # print(bt_)
     at = from_numpy(at_)
     bt = from_numpy(bt_)
+
+    # A = transpose(A, newaxes_a)
+    # B = transpose(B, newaxes_b)
+    # A = 
+    # at = Reshape(A, newshape_a)
+    # bt = Reshape(B, newshape_b)
+    # _at = to_numpy(at)
+    # _bt = to_numpy(bt)
+    # print(_at)
+    # print(_bt)
+
     res = mult(at,bt)
     if len(olda + oldb) == 0:
         olda = [1]
@@ -1326,7 +1378,7 @@ def tensordot (A,B,axes=2):
         res.reshape(tuple(olda + oldb))
     else:
         res.reshape(tuple(olda + oldb))
-    print(res.shape)
+    # print(res.shape)
     # res_ = np.dot(at_, bt_)
     # res = from_numpy(res_.reshape(olda + oldb))
     #reshape the result

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5e8f6a4f/src/api/core_tensor.i
----------------------------------------------------------------------
diff --git a/src/api/core_tensor.i b/src/api/core_tensor.i
index 756fe60..587dddd 100644
--- a/src/api/core_tensor.i
+++ b/src/api/core_tensor.i
@@ -100,8 +100,10 @@ namespace singa{
     const DataType data_type() const;
     const std::vector<size_t> &shape() const;
     const size_t shape(size_t idx) const;
-    size_t nDim() const;
     bool transpose() const;
+    size_t nDim() const;
+    Tensor Transpose() const;
+    Tensor Transpose(const std::vector<size_t> &axes) const;
     size_t Size() const;
     size_t MemSize() const;
     void Reshape(const std::vector<size_t> &shape);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5e8f6a4f/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index b75ac40..de2ea8a 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -424,7 +424,7 @@ Tensor Tensor::Transpose() const {
 
 //transpose with axes
 // TODO(wangwei) the shape and axes should match
-Tensor Tensor::Transpose(const vector<size_t>& axes) const {
+Tensor Tensor::Transpose(const vector<size_t> &axes) const {
   // if(axes.size() != shape_.size()){
   //   std::cout << "Warning: Size of input axes doesn't match size of shape" << std::endl;
   //   return void();

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5e8f6a4f/test/python/test_tensor.py
----------------------------------------------------------------------
diff --git a/test/python/test_tensor.py b/test/python/test_tensor.py
index a47bbff..7d83677 100644
--- a/test/python/test_tensor.py
+++ b/test/python/test_tensor.py
@@ -165,6 +165,21 @@ class TestTensorMethods(unittest.TestCase):
         b = tensor.to_numpy(t)
         self.assertEqual(np.sum(a-b), 0.)
 
+    def test_transpose(self):
+        a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2])
+        a = np.reshape(a,(2,3,2))
+        ta = tensor.from_numpy(a)
+
+        A1 = np.transpose(a)
+        tA1 = tensor.transpose(ta)
+        TA1 = tensor.to_numpy(tA1)
+        A2 = np.transpose(a,[0,2,1])
+        tA2 = tensor.transpose(ta,[0,2,1])
+        TA2 = tensor.to_numpy(tA2)
+
+        self.assertAlmostEqual(np.sum(TA1 - A1), 0.,places=3)
+        self.assertAlmostEqual(np.sum(TA2 - A2), 0.,places=3)
+
     def test_einsum(self):
 
         a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2])
@@ -193,8 +208,8 @@ class TestTensorMethods(unittest.TestCase):
         ta_repeat2 = tensor.repeat(ta, 4, axis = 1)
         a_repeat2 = np.repeat(a, 4, axis = 1)
         Ta_repeat2 = tensor.to_numpy(ta_repeat2)
-        print(Ta_repeat2)
-        print(a_repeat2)
+        # print(Ta_repeat2)
+        # print(a_repeat2)
 
         self.assertAlmostEqual(np.sum(Ta_repeat1 - a_repeat1), 0., places=3)
         self.assertAlmostEqual(np.sum(Ta_repeat2 - a_repeat2), 0., places=3)


[5/7] incubator-singa git commit: SINGA-362 Add functions to support einsum function 1. change the api of repeat, reshape, transpose to be similar to numpy api 2. have some change in the reshape function to make it modified to 'Tensor Reshape' instead of '

Posted by wa...@apache.org.
SINGA-362 Add functions to support einsum function
1. change the api of repeat,reshape,transpose to be similar to numpy api
2. have some change in the reshape function to make it modified to 'Tensor Reshape' instead of 'void Reshape'(it is same as Yisen's revise)


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/4940fefb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/4940fefb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/4940fefb

Branch: refs/heads/master
Commit: 4940fefbf65f0da474aff71b23bc60656aa40dc5
Parents: 8d9eb29
Author: sheyujian <sh...@me.com>
Authored: Thu May 31 10:28:12 2018 +0800
Committer: sheyujian <sh...@me.com>
Committed: Sat Jun 2 01:38:15 2018 +0800

----------------------------------------------------------------------
 examples/cifar10/cnn.cc           |   2 +-
 include/singa/core/device.h       |   5 -
 include/singa/core/tensor.h       |  14 ++-
 python/singa/tensor.py            | 194 +++++++++++++++------------------
 src/api/core_tensor.i             |   5 +-
 src/core/device/device.cc         |  23 ----
 src/core/tensor/tensor.cc         | 111 +++++++++++--------
 src/core/tensor/tensor_math.h     |   7 ++
 src/core/tensor/tensor_math_cpp.h |  14 +++
 src/io/image_transformer.cc       |  12 +-
 test/python/test_tensor.py        |   8 +-
 11 files changed, 199 insertions(+), 196 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4940fefb/examples/cifar10/cnn.cc
----------------------------------------------------------------------
diff --git a/examples/cifar10/cnn.cc b/examples/cifar10/cnn.cc
index 61097b6..8af8a2f 100644
--- a/examples/cifar10/cnn.cc
+++ b/examples/cifar10/cnn.cc
@@ -144,7 +144,7 @@ void Train(int num_epoch, string data_dir) {
     auto train = data.ReadTrainData();
     size_t nsamples = train.first.shape(0);
     auto mtrain =
-        Reshape(train.first, Shape{nsamples, train.first.Size() / nsamples});
+         Reshape(train.first, Shape{nsamples, train.first.Size() / nsamples});
     const Tensor& mean = Average(mtrain, 0);
     SubRow(mean, &mtrain);
     train_x = Reshape(mtrain, train.first.shape());

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4940fefb/include/singa/core/device.h
----------------------------------------------------------------------
diff --git a/include/singa/core/device.h b/include/singa/core/device.h
index d6b8bf3..1a960d8 100644
--- a/include/singa/core/device.h
+++ b/include/singa/core/device.h
@@ -75,11 +75,6 @@ class Device {
   virtual void CopyDataToFrom(Block* dst, Block* src, size_t nBytes,
                       CopyDirection direction, int dst_offset, int src_offset);
 
-  virtual void RepeatDataToFrom(Block* dst, Block* src, size_t nBytes,
-                                CopyDirection direct, bool broadcast_flag, 
-                                int axis_shape, int shape_outer, int chunk, 
-                                vector<size_t> repeats, int dst_offset, int src_offset);
-
   void CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes,
                            size_t dst_offset = 0);
   /// Submit the operation to the device, which may execute it right now or

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4940fefb/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index 7947d93..d9bb069 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -133,8 +133,8 @@ class Tensor {
   size_t MemSize() const { return block_->size(); }
 
   /// Reset the tensor shape, it may reallocate block, if MemSize() changes.
-  void Reshape(const Shape &shape);
-  void Reshape(Shape &&shape);
+  // void Reshape(const Shape &shape);
+  // void Reshape(Shape &&shape);
 
   /// Reset the shape, device, and data type as given tensor.
   /// If block size changes, then reallocate a new block.
@@ -191,6 +191,10 @@ class Tensor {
   /// Change the axes
   Tensor Transpose(const vector<size_t> &axes) const;
 
+  Tensor Reshape(const Shape &shape);
+
+  Tensor Reshape(Shape &&shape);
+
   /// Copy the meta info with data block shared.
   Tensor &operator=(const Tensor &in);
 
@@ -269,6 +273,7 @@ inline size_t Product(const Shape &shape, int start = 0, size_t len = 0) {
   return v;
 }
 
+
 inline void CheckDataTypeAndLang(const Tensor &in1, const Tensor &in2) {
   CHECK_EQ(in1.data_type(), in2.data_type());
   CHECK_EQ(in1.device()->lang(), in2.device()->lang());
@@ -292,8 +297,7 @@ void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
                     const size_t dst_offset = 0, const size_t src_offset = 0);
 
 void RepeatDataToFrom(bool broadcast_flag, vector<size_t> repeats, int axis, 
-                      Tensor *dst, const Tensor &in, const size_t num, 
-                      const size_t dst_offset = 0, const size_t src_offset = 0);
+                      Tensor *dst, const Tensor &in, const size_t num);
 
 // =============Element-wise operations====================================
 Tensor Abs(const Tensor &in);
@@ -305,6 +309,7 @@ Tensor Sign(const Tensor &in);
 Tensor Sqrt(const Tensor &in);
 Tensor Square(const Tensor &in);
 Tensor Tanh(const Tensor &in);
+Tensor Transform(const Tensor &in);
 
 void Abs(const Tensor &in, Tensor *out);
 void Exp(const Tensor &in, Tensor *out);
@@ -315,6 +320,7 @@ void Sign(const Tensor &in, Tensor *out);
 void Sqrt(const Tensor &in, Tensor *out);
 void Square(const Tensor &in, Tensor *out);
 void Tanh(const Tensor &in, Tensor *out);
+void Transform(const Tensor &in, Tensor *out);
 
 /// Element-wise opeartion, out[i]=in[i]^x
 template <typename SType>

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4940fefb/python/singa/tensor.py
----------------------------------------------------------------------
diff --git a/python/singa/tensor.py b/python/singa/tensor.py
index 21a362a..ba8d02c 100644
--- a/python/singa/tensor.py
+++ b/python/singa/tensor.py
@@ -71,8 +71,6 @@ float32 = core_pb2.kFloat32
 CTensor = singa.Tensor
 
 
-
-
 class Tensor(object):
     '''Python Tensor, which wraps a swig converted Tensor from CPP Tensor.
 
@@ -140,16 +138,18 @@ class Tensor(object):
         '''
         To transpose the tensor
         '''
+        t = Tensor(self.shape, self.device, self.dtype)
         if axes == None:
-            tshape = [self.shape[x] for x in range(len(self.shape))]
-            self.shape = tuple(tshape)
-            self.data = self.data.Transpose()
+            tshape = [self.shape[x] for x in range(len(t.shape))]
+            t.shape = tuple(tshape)
+            t.data = self.data.Transpose()
         else:
             if(len(axes) != len(self.shape)):
                 raise ValueError('dimensions do not match')
             tshape = [self.shape[x] for x in axes]
-            self.shape = tuple(tshape)
-            self.data = self.data.Transpose(list(axes))
+            t.shape = tuple(tshape)
+            t.data = self.data.Transpose(list(axes))
+        return t
 
     def size(self):  # TODO(wangwei) compute size
         '''
@@ -172,10 +172,12 @@ class Tensor(object):
             shape (list<int>): new shape, which should have the same volumn as
                 the original shape.
         '''
+        t = Tensor(self.shape, self.device, self.dtype)
         assert product(self.shape) == product(shape), \
             'product of shape should be equal'
-        self.shape = shape
-        self.data.Reshape(list(shape))
+        t.shape = shape
+        t.data = self.data.Reshape(list(shape))
+        return t
 
     def reset_like(self, t):
         '''Reset the shape, dtype and device as the given tensor.
@@ -283,6 +285,7 @@ class Tensor(object):
             the tensor which has been repeated
         
         '''
+        t = Tensor()
         t_ndim = self.ndim()
         if isinstance(repeats, int) or isinstance(repeats, long):
             if repeats < 0:
@@ -292,15 +295,15 @@ class Tensor(object):
             # broadcast = True
             if axis == None:
                 axis = 9999
-                self.shape = (product(self.shape)*repeats,)
+                t.shape = (product(self.shape)*repeats,)
                 Repeats = [repeats,]
-                self.data = self.data.Repeat(Repeats, axis)
+                t.data = self.data.Repeat(Repeats, axis)
             elif axis >= 0:
                 t_shape = list(self.shape)
                 t_shape[axis] = self.shape[axis]*repeats
-                self.shape = tuple(t_shape)
+                t.shape = tuple(t_shape)
                 Repeats = [repeats,]
-                self.data = self.data.Repeat(Repeats, axis)
+                t.data = self.data.Repeat(Repeats, axis)
 
         elif isinstance(repeats, tuple) or isinstance(repeats, list):
             for rep in repeats:
@@ -315,13 +318,12 @@ class Tensor(object):
             elif axis >= 0:
                 t_shape = list(self.shape)
                 t_shape[axis] = sum(repeats)
-                self.shape = tuple(t_shape)
-                self.data = self.data.Repeat(list(repeats), axis)
+                t.shape = tuple(t_shape)
+                t.data = self.data.Repeat(list(repeats), axis)
         else:
             raise ValueError('repeats should be int or sequence')
-        
 
-        
+        return t     
 
     def T(self):
         ''' shallow copy, negate the transpose field.
@@ -623,8 +625,8 @@ def reshape(t, s):
     return _call_singa_func(singa.Reshape, t.data, s)
 
 def Reshape(t,s):
-    ret = t.deepcopy()
-    ret.reshape(s)
+
+    ret = t.reshape(s)
     return ret
 
 def transpose(t,axes = None):
@@ -632,8 +634,7 @@ def transpose(t,axes = None):
     Returns:
         the transposed tensor 
     '''
-    ret = t.deepcopy()
-    ret.transpose(axes)
+    ret = t.transpose(axes)
     return ret
 
 
@@ -795,24 +796,63 @@ def tanh(t):
     '''
     return _call_singa_func(singa.Tanh, t.data)
 
-
-def sum(t, axis=None):
-    '''Sum elements of the input tensor long the given axis.
+def sum(t, axis=None, out=None):
+    '''Sum of tensor elements over given axis
 
     Args:
-        t (Tensor): input Tensor
-        axis (int, optional): if None, the summation is done over all elements;
-            if axis is provided, then it is calculated along the given axis,
-            e.g. 0 -- sum each column; 1 -- sum each row.
+        t: Singa.tensor
+            The array_like tensor to be sumed
+        axis: None or int or tuple of ints, optional
+            Axis or axes along which a sum is performed.
+            The default, axis=None, will sum all of the elements of the input array.
+            If axis is negative it counts from the last to the first axis.
+            If axis is a tuple of ints, a sum is performed on all of the axes specified
+            in the tuple instead of a single axis or all the axes as before.
+        out:Singa.tensor optional
+            Alternative output array in which to place the result.
+            It must have the same shape as the expected output,
+            but the type of the output values will be cast if necessary.
 
-    Returns:
-        a float value as the sum of all elements, or a new Tensor
+    Return: sum_along_axis: tensor
+        A tensor with the same shape as t, with the specified axis removed.
+        If a is a 0-d array, or if axis is None, a scalar is returned.
+        If an output array is specified, a reference to out is returned
     '''
 
+    t_shape = t.shape
+    t_ndim = t.ndim()
+
     if axis is None:
-        return singa.SumAsFloat(t.data)
+        one = Tensor(t.shape, t.device)
+        one.set_value(1.0)
+        ret = tensordot(t, one, t_ndim)
+
+    if isinstance(axis,int):
+        if axis < 0:
+            axis += t_ndim
+
+        axis_shape = t_shape[axis]
+        axis_shape = int(axis_shape)
+        one = Tensor(shape = (axis_shape, ), device = t.device)
+        one.set_value(1.0)
+        ret = tensordot(t, one, axes=([axis],[0]))
+
+    if isinstance(axis,tuple):
+        l_axis = list(axis)
+        axis_shape = [t_shape[x] for x in axis]
+        axisshape = tuple(axis_shape)
+        one = Tensor(axisshape, t.device)
+        one.set_value(1.0)
+        one_axis = [x for x in range(one.ndim())]
+        ret = tensordot(t, one, (l_axis,one_axis))
+
+    if out is not None:
+        if out.shape != ret.shape:
+            raise ValueError('dimensions do not match')
+        out[:] = ret
+        return out
     else:
-        return _call_singa_func(singa.Sum, t.data, axis)
+        return ret
 
 
 def pow(t, x, out=None):
@@ -1143,10 +1183,10 @@ def einsum(ops, *args):
     if len(broadcast_b) == 0:
         broadcast_b = [1]  
     mult_A = repeat(A, product(broadcast_a))
-    mult_A.reshape(reshape_A)
+    mult_A = mult_A.reshape(reshape_A)
     mult_A = transpose(mult_A,transpose_A)
     mult_B = repeat(B, product(broadcast_b))
-    mult_B.reshape(reshape_B)
+    mult_B = mult_B.reshape(reshape_B)
     mult_B = transpose(mult_B, transpose_B)
 
     if mult_A.shape != mult_B.shape:
@@ -1154,77 +1194,26 @@ def einsum(ops, *args):
     res = eltwise_mult(mult_A, mult_B)
     sum_R = sorted(sums, reverse=True)
     for i in sum_R:
-        res = sum2(res, axis=i)
+        res = sum(res, axis=i)
     transpose_res = [sorted(list(outputops)).index(x) for x in list(outputops)]
     res = transpose(res, transpose_res)
 
     return res
     
 
-
-
-def sum2(t, axis=None, out=None):
-    '''Sum of tensor elements over given axis
-
+def repeat (t, repeats, axis = None):
+    '''Return the repeated tensor
     Args:
-        t: Singa.tensor
-            The array_like tensor to be sumed
-        axis: None or int or tuple of ints, optional
-            Axis or axes along which a sum is performed.
-            The default, axis=None, will sum all of the elements of the input array.
-            If axis is negative it counts from the last to the first axis.
-            If axis is a tuple of ints, a sum is performed on all of the axes specified
-            in the tuple instead of a single axis or all the axes as before.
-        out:Singa.tensor optional
-            Alternative output array in which to place the result.
-            It must have the same shape as the expected output,
-            but the type of the output values will be cast if necessary.
+        t(tensor): the tensor to be repeated
+        repeats(int or a sequence): the number that the tensor need to repeat for
+        axis (int):the axis to do repeat
+                    If it is None, then the repeated tensor will be flattened.If it isn't None,
+                    the repeats could be sequence, but it's size should match the axis's shape
 
-    Return: sum_along_axis: tensor
-        A tensor with the same shape as t, with the specified axis removed.
-        If a is a 0-d array, or if axis is None, a scalar is returned.
-        If an output array is specified, a reference to out is returned
+    Return:
+        the tensor which has been repeated
     '''
-
-    t_shape = t.shape
-    t_ndim = t.ndim()
-
-    if axis is None:
-        one = Tensor(t.shape, t.device)
-        one.set_value(1.0)
-        ret = tensordot(t, one, t_ndim)
-
-    if isinstance(axis,int):
-        if axis < 0:
-            axis += t_ndim
-
-        axis_shape = t_shape[axis]
-        axis_shape = int(axis_shape)
-        one = Tensor(shape = (axis_shape, ), device = t.device)
-        one.set_value(1.0)
-        ret = tensordot(t, one, axes=([axis],[0]))
-
-    if isinstance(axis,tuple):
-        l_axis = list(axis)
-        axis_shape = [t_shape[x] for x in axis]
-        axisshape = tuple(axis_shape)
-        one = Tensor(axisshape, t.device)
-        one.set_value(1.0)
-        one_axis = [x for x in range(one.ndim())]
-        ret = tensordot(t, one, (l_axis,one_axis))
-
-    if out is not None:
-        if out.shape != ret.shape:
-            raise ValueError('dimensions do not match')
-        out[:] = ret
-        return out
-    else:
-        return ret
-
-def repeat (t, repeats, axis = None):
-
-    ret = t.deepcopy()
-    ret.repeat(repeats,axis)
+    ret = t.repeat(repeats,axis)
     return ret
 
         
@@ -1325,18 +1314,9 @@ def tensordot (A,B,axes=2):
         N1 *= b_shape[bx]
     newshape_b = (N2, N1)
     oldb = [b_shape[axis] for axis in notin]
-    # do transpose and reshape to get the 2D matrix to do multiplication
-    # A_ = to_numpy(A)
-    # B_ = to_numpy(B)
-    # at_ = np.transpose(A_,newaxes_a).reshape(newshape_a)
-    # bt_ = np.transpose(B_,newaxes_b).reshape(newshape_b)
-    # at = from_numpy(at_)
-    # bt = from_numpy(bt_)
 
     A = transpose(A, newaxes_a)
     B = transpose(B, newaxes_b)
-    A = add(A, 0)
-    B = add(B, 0)
     at = Reshape(A, newshape_a)
     bt = Reshape(B, newshape_b)
 
@@ -1344,9 +1324,9 @@ def tensordot (A,B,axes=2):
     if len(olda + oldb) == 0:
         olda = [1]
         oldb = [1]
-        res.reshape(tuple(olda + oldb))
+        res = res.reshape(tuple(olda + oldb))
     else:
-        res.reshape(tuple(olda + oldb))
+        res = res.reshape(tuple(olda + oldb))
 
     return res
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4940fefb/src/api/core_tensor.i
----------------------------------------------------------------------
diff --git a/src/api/core_tensor.i b/src/api/core_tensor.i
index 587dddd..d94e506 100644
--- a/src/api/core_tensor.i
+++ b/src/api/core_tensor.i
@@ -106,7 +106,7 @@ namespace singa{
     Tensor Transpose(const std::vector<size_t> &axes) const;
     size_t Size() const;
     size_t MemSize() const;
-    void Reshape(const std::vector<size_t> &shape);
+    Tensor Reshape(const std::vector<size_t> &shape);
     void ResetLike(const Tensor &t);
     void AsType(DataType type);
     void ToDevice(std::shared_ptr<singa::Device> dev);
@@ -163,8 +163,7 @@ namespace singa{
                       size_t src_offset = 0, size_t dst_offset = 0);
 
   void RepeatDataToFrom(bool broadcast_flag, std::vector<size_t> repeats, int axis, 
-                        Tensor *dst, const Tensor &src, const size_t num, 
-                        const size_t dst_offset, const size_t src_offset);
+                        Tensor *dst, const Tensor &src, const size_t num);
 
   Tensor Reshape(const Tensor &in, const std::vector<size_t> &s);
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4940fefb/src/core/device/device.cc
----------------------------------------------------------------------
diff --git a/src/core/device/device.cc b/src/core/device/device.cc
index 0c9c6a2..cda1b9f 100644
--- a/src/core/device/device.cc
+++ b/src/core/device/device.cc
@@ -64,29 +64,6 @@ void Device::CopyDataToFrom(Block* dst, Block* src, size_t nBytes,
       {src}, {dst});
 }
 
-void Device::RepeatDataToFrom(Block* dst, Block* src, size_t nBytes,
-                              CopyDirection direct, bool broadcast_flag, 
-                              int axis_shape, int shape_outer, int chunk, 
-                              vector<size_t> repeats, int dst_offset, int src_offset) {
-  const char *src_data = reinterpret_cast<const char*>(src->data()) + src_offset;
-  char *dst_data = reinterpret_cast<char*>(dst->mutable_data()) + dst_offset;
-
-  for (int i = 0; i < shape_outer; i++) {
-    for (int j = 0; j < axis_shape; j++) {
-      int temp = broadcast_flag ? repeats[0] : repeats[j];
-      for (int k = 0; k < temp; k++) {
-        this->Exec(
-            [this, dst_data, src_data, direct, chunk, repeats](Context* ctx) {
-              this->CopyToFrom(dst_data, src_data, chunk, direct, ctx);
-            },
-            {src}, {dst});
-        dst_data += chunk;
-      }
-      src_data += chunk;
-    }
-  }
-}
-
 void Device::CopyDataFromHostPtr(Block* dst, const void* src, size_t nBytes,
                                  size_t dst_offset) {
   auto direct = lang_ == kCpp ? kHostToHost : kHostToDevice;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4940fefb/src/core/tensor/tensor.cc
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc
index de2ea8a..3bf0a77 100644
--- a/src/core/tensor/tensor.cc
+++ b/src/core/tensor/tensor.cc
@@ -124,12 +124,7 @@ void Tensor::ResetLike(const Tensor &in) {
   strides_ = in.strides_;
 }
 
-// if tensor is not transposed yet i.e strides == 1,
-// then we simply change the shape and generate new default strides
-// if tensor is already transposed i.e strides != 1,
-// it should be copied to a new tensor with newly generated default strides
-// TODO(wangwei) raise error if the shape not match
-void Tensor::Reshape(const Shape &shape) {
+Tensor Tensor::Reshape(const Shape &shape) {
   if (strides_.size() == 0)
     strides_.push_back(1);
 
@@ -137,14 +132,27 @@ void Tensor::Reshape(const Shape &shape) {
     if (block_ != nullptr && block_->DecRefCount() == 0)
       device_->FreeBlock(block_);
     block_ = device_->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
+    shape_ = shape;
+    generate_strides();
+    return *this;
+
   } else if (transpose()) {
-    LOG(FATAL) << "Reshape Error: Reshape called on tranposed tensor. Not implemented yet." ;
-  }
+    Tensor t(shape_, device_, data_type_);
+    t.block_ = t.device()->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
+    singa::Transform(*this, &t);
+    t.shape_ = shape;
+    return t;
+ }
+
   shape_ = shape;
   generate_strides();
+  Tensor t(shape, device_, data_type_);
+  t.block_ = block_;
+  t.block_->IncRefCount();
+  return t;
 }
 
-void Tensor::Reshape(Shape &&shape) {
+Tensor Tensor::Reshape(Shape &&shape) {
   if (strides_.size() == 0)
     strides_.push_back(1);
 
@@ -152,11 +160,24 @@ void Tensor::Reshape(Shape &&shape) {
     if (block_ != nullptr && block_->DecRefCount() == 0)
       device_->FreeBlock(block_);
     block_ = device_->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
+    shape_ = std::move(shape);
+    generate_strides();
+    return *this;
+
   } else if (transpose()) {
-    LOG(FATAL) << "Reshape Error: Reshape called on tranposed tensor. Not implemented yet." ;
-  }
-  shape_ = std::move(shape);
+    Tensor t(shape_, device_, data_type_);
+    t.block_ = t.device()->NewBlock((int)(Product(shape) * SizeOf(data_type_)));
+    singa::Transform(*this, &t);
+    t.shape_ = shape;
+    return t;
+ }
+
+  shape_ = shape;
   generate_strides();
+  Tensor t(shape, device_, data_type_);
+  t.block_ = block_;
+  t.block_->IncRefCount();
+  return t;
 }
 
 void Tensor::AsType(const DataType type) {
@@ -226,7 +247,7 @@ void Tensor::RepeatData(vector<size_t> repeats, int axis, int total_repeats, con
   CHECK(block_ != nullptr);
   // Do repeat only if the src's block is already initialized.
   if (src.block_ != nullptr) {
-    singa::RepeatDataToFrom(false, repeats, axis, this, src, Size(), 0, 0);
+    singa::RepeatDataToFrom(false, repeats, axis, this, src, Size());
   }
 }
 
@@ -234,10 +255,9 @@ void Tensor::FromProto(const singa::TensorProto &proto) {
   if (block_ != nullptr && block_->DecRefCount() == 0)
     device_->FreeBlock(block_);
   block_ = nullptr;
-  Shape shape;
-  for (uint32_t s : proto.shape()) shape.push_back(s);
+  for (uint32_t s : proto.shape()) shape_.push_back(s);
   data_type_ = proto.data_type();
-  Reshape(shape);
+  block_ = device_->NewBlock((int)(Product(shape()) * SizeOf(data_type_)));
   //transpose_ = proto.transpose();
   strides_.clear();
   for (int32_t s : proto.strides()) strides_.push_back(s);
@@ -477,13 +497,13 @@ Tensor &Tensor::operator=(Tensor &&in) {
 //yisen todo
 Tensor Reshape(const Tensor &in, const Shape &s) {
   Tensor out(in);
-  out.Reshape(s);
+  out = out.Reshape(s);
   return out;
 }
 
 Tensor Reshape(const Tensor &in, Shape &&s) {
   Tensor out(in);
-  out.Reshape(std::move(s));
+  out = out.Reshape(std::move(s));
   return out;
 }
 
@@ -542,12 +562,10 @@ void CopyDataToFrom(Tensor *dst, const Tensor &src, const size_t num,
 }
 
 void RepeatDataToFrom(bool broadcast_flag, vector<size_t> repeats, int axis, 
-                      Tensor *dst, const Tensor &src, const size_t num, 
-                      const size_t dst_offset, const size_t src_offset) {
+                      Tensor *dst, const Tensor &src, const size_t num) {
   if (repeats.size() == 1) {
     broadcast_flag = true;
-  }
-  if (repeats.size() > 1) {
+  } else if (repeats.size() > 1) {
     if (axis == Noaxis) {
       LOG(FATAL) << "When repeats parameter is sequence, axis cannot be None";
     }
@@ -557,9 +575,7 @@ void RepeatDataToFrom(bool broadcast_flag, vector<size_t> repeats, int axis,
   }
   auto width = SizeOf(src.data_type());
   CHECK_EQ(width, SizeOf(dst->data_type()));
-  size_t nBytes = num * width;
-  auto d_offset = dst_offset * width;
-  auto s_offset = src_offset * width;
+  // size_t nBytes = num * width;
   int chunk = width;
   int axis_shape = 1;
   int shape_outer = 1;
@@ -575,26 +591,34 @@ void RepeatDataToFrom(bool broadcast_flag, vector<size_t> repeats, int axis,
       chunk *= src.shape()[i];
     }
   }
-  
+  int dst_offset = 0;
+  int src_offset = 0;
   std::shared_ptr<Device> src_dev = src.device(), dst_dev = dst->device();
   Block *from = src.block(), *to = dst->block();
-  if (dst_dev->lang() != src_dev->lang()) {
-    // let the none cpp device conduct copy op
-    if (dst_dev->lang() == kCpp) {
-      src_dev->RepeatDataToFrom(to, from, nBytes, kDeviceToHost, broadcast_flag, axis_shape, 
-                                shape_outer, chunk, repeats, (int)d_offset, (int)s_offset);
-    } else if (src_dev->lang() == kCpp) {
-      dst_dev->RepeatDataToFrom(to, from, nBytes, kHostToDevice, broadcast_flag, axis_shape, 
-                                shape_outer, chunk, repeats, (int)d_offset, (int)s_offset);
-    } else {
-      LOG(FATAL) << "Not support mem repeat copy betwee Cuda and OpenCL device";
+  for (int i = 0; i < shape_outer; i++) {
+    for (int j = 0; j < axis_shape; j++) {
+      int temp = broadcast_flag ? repeats[0] : repeats[j];
+      for (int k = 0; k < temp; k++) {
+        if (dst_dev->lang() != src_dev->lang()) {
+          // let the none cpp device conduct copy op
+          if (dst_dev->lang() == kCpp) {
+            src_dev->CopyDataToFrom(to, from, chunk, kDeviceToHost, dst_offset, src_offset);
+          } else if (src_dev->lang() == kCpp) {
+            dst_dev->CopyDataToFrom(to, from, chunk, kHostToDevice, dst_offset, src_offset);
+          } else {
+            LOG(FATAL) << "Not support mem repeat copy betwee Cuda and OpenCL device";
+          }
+        } else {
+          auto direct = src_dev->lang() == kCpp ? kHostToHost : kDeviceToDevice;
+          src_dev->CopyDataToFrom(to, from, chunk, direct, dst_offset, src_offset);
+        }
+        dst_offset += chunk;
+      }
+      src_offset += chunk;
     }
-  } else {
-    auto direct = src_dev->lang() == kCpp ? kHostToHost : kDeviceToDevice;
-    src_dev->RepeatDataToFrom(to, from, nBytes, direct, broadcast_flag, axis_shape, 
-                              shape_outer, chunk, repeats, (int)d_offset, (int)s_offset);
   }
 }
+
 //============================================================================
 /// typedef DType accroding to type value.
 /// DType would be used in the code block __VA_ARGS__.
@@ -729,6 +753,7 @@ GenUnaryTensorFn(Sign);
 GenUnaryTensorFn(Sqrt);
 GenUnaryTensorFn(Square);
 GenUnaryTensorFn(Tanh);
+GenUnaryTensorFn(Transform);
 
 #define EltwiseBinaryTensorFn(fn, lhs, rhs, ret)                            \
   do {                                                                      \
@@ -977,7 +1002,7 @@ Tensor ConcatOn(const vector<Tensor> &in, int axis) {
       tmp.push_back(Reshape(t, {t.shape(0), t.Size() / t.shape(0)}));
     }
     auto ret = ConcatenateRows(tmp);
-    ret.Reshape(out_shape);
+    ret = ret.Reshape(out_shape);
     return ret;
   } else {
     for (const auto& t : in) {
@@ -987,7 +1012,7 @@ Tensor ConcatOn(const vector<Tensor> &in, int axis) {
       tmp.push_back(Reshape(t, {nrow, t.Size() / nrow}));
     }
     auto ret = ConcatenateColumns(tmp);
-    ret.Reshape(out_shape);
+    ret = ret.Reshape(out_shape);
     return ret;
   }
 }
@@ -1071,7 +1096,7 @@ Tensor SliceOn(const Tensor&in, const size_t start, const size_t end, int axis)
     auto suffix = in.Size() / nrow / in.shape(axis);
     auto ret = SliceColumns(Reshape(in, {nrow, in.Size() / nrow}),
                             start * suffix, end * suffix);
-    ret.Reshape(out_shape);
+    ret = ret.Reshape(out_shape);
     return ret;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4940fefb/src/core/tensor/tensor_math.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math.h b/src/core/tensor/tensor_math.h
index c7fdfe5..388c010 100644
--- a/src/core/tensor/tensor_math.h
+++ b/src/core/tensor/tensor_math.h
@@ -251,6 +251,13 @@ void Tanh(const Tensor &in, Tensor *out, Context *ctx) {
   LOG(FATAL) << "Tanh Not Implemented";
 }
 
+/// similar to cudnnTransformTensor
+/// copies the data from one tensor to another tensor with a different layout
+/// the tensors must have the same dimensions but not necessarily the same strides 
+template <typename DType, typename Lang>
+void Transform(const Tensor &in, Tensor *out, Context *ctx) {
+  LOG(FATAL) << "Transform Not Implemented";
+}
 // **************************************
 // Random functions
 // **************************************

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4940fefb/src/core/tensor/tensor_math_cpp.h
----------------------------------------------------------------------
diff --git a/src/core/tensor/tensor_math_cpp.h b/src/core/tensor/tensor_math_cpp.h
index bfdd026..e302b04 100644
--- a/src/core/tensor/tensor_math_cpp.h
+++ b/src/core/tensor/tensor_math_cpp.h
@@ -427,6 +427,20 @@ void Tanh<float, lang::Cpp>(const Tensor& in, Tensor* out,
 }
 
 template <>
+void Transform<float, lang::Cpp>(const Tensor& in, Tensor* out,
+                            Context *ctx) {
+  float *outPtr = static_cast<float *>(out->block()->mutable_data());
+  const float *inPtr = static_cast<const float *>(in.block()->data());
+  vector<int> traversal_info = generate_traversal_info(in);
+  vector<int> shape_multipliers = generate_shape_multipliers(in);
+
+  for (size_t i = 0; i < in.Size(); i++) {
+    outPtr[i] = inPtr[traversal_info[in.shape().size()]];
+    traverse_next(in, shape_multipliers, traversal_info, i + 1);
+  }
+}
+
+template <>
 void Bernoulli<float, lang::Cpp>(const float p, Tensor* out,
                                  Context *ctx) {
   std::bernoulli_distribution distribution(p);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4940fefb/src/io/image_transformer.cc
----------------------------------------------------------------------
diff --git a/src/io/image_transformer.cc b/src/io/image_transformer.cc
index 6e5567d..204ad08 100644
--- a/src/io/image_transformer.cc
+++ b/src/io/image_transformer.cc
@@ -229,7 +229,7 @@ namespace singa {
             }
           }
         }
-        output.Reshape(Shape{channel, crop_height, crop_width});
+        output = Reshape(output, Shape{channel, crop_height, crop_width});
         output.CopyDataFromHostPtr<float>(out, crop_height * crop_width * channel);
         delete[] out;
       } else if (image_dim_order == "HWC") {
@@ -247,7 +247,7 @@ namespace singa {
             }
           }
         }
-        output.Reshape(Shape{crop_height, crop_width, channel});
+        output = Reshape(output, Shape{crop_height, crop_width, channel});
         output.CopyDataFromHostPtr<float>(out, crop_height * crop_width * channel);
         delete[] out;
       } else {
@@ -266,7 +266,7 @@ namespace singa {
           out[out_idx] = in[in_idx];
         }
       }
-      output.Reshape(Shape{crop_height, crop_width});
+      output = Reshape(output, Shape{crop_height, crop_width});
       output.CopyDataFromHostPtr<float>(out, crop_height * crop_width);
       delete[] out;
     }
@@ -304,7 +304,7 @@ namespace singa {
             }
           }
         }
-        output.Reshape(Shape{channel, height, width});
+        output = Reshape(output, Shape{channel, height, width});
         output.CopyDataFromHostPtr<float>(out, height * width * channel);
         delete[] out;
       } else if (image_dim_order == "HWC") {
@@ -325,7 +325,7 @@ namespace singa {
             }
           }
         }
-        output.Reshape(Shape{height, width, channel});
+        output = Reshape(output, Shape{height, width, channel});
         output.CopyDataFromHostPtr<float>(out, height * width * channel);
         delete[] out;
       } else {
@@ -347,7 +347,7 @@ namespace singa {
           out[out_idx] = in[in_idx];
         }
       }
-      output.Reshape(Shape{height, width});
+      output = Reshape(output, Shape{height, width});
       output.CopyDataFromHostPtr<float>(out, height * width);
       delete[] out;
     }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4940fefb/test/python/test_tensor.py
----------------------------------------------------------------------
diff --git a/test/python/test_tensor.py b/test/python/test_tensor.py
index 098994b..080dd1f 100644
--- a/test/python/test_tensor.py
+++ b/test/python/test_tensor.py
@@ -212,19 +212,19 @@ class TestTensorMethods(unittest.TestCase):
         self.assertAlmostEqual(np.sum(Ta_repeat1 - a_repeat1), 0., places=3)
         self.assertAlmostEqual(np.sum(Ta_repeat2 - a_repeat2), 0., places=3)
 
-    def test_sum2(self):
+    def test_sum(self):
         a = np.array([1.1,1.1,1.1,1.1,1.4,1.3,1.1,1.6,1.1,1.1,1.1,1.2])
         a = np.reshape(a,(2,3,2))
         ta = tensor.from_numpy(a)
 
         a_sum0 = np.sum(a)
-        ta_sum0 = tensor.sum2(ta)
+        ta_sum0 = tensor.sum(ta)
         Ta_sum0 = tensor.to_numpy(ta_sum0)
         a_sum1 = np.sum(a, axis = 1)
-        ta_sum1 = tensor.sum2(ta, axis = 1)
+        ta_sum1 = tensor.sum(ta, axis = 1)
         Ta_sum1 = tensor.to_numpy(ta_sum1)
         a_sum2 = np.sum(a, axis = 2)
-        ta_sum2 = tensor.sum2(ta, axis = 2)
+        ta_sum2 = tensor.sum(ta, axis = 2)
         Ta_sum2 = tensor.to_numpy(ta_sum2)
 
         self.assertAlmostEqual(np.sum(a_sum0 - Ta_sum0), 0., places=3)