You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ha...@apache.org on 2020/03/24 07:27:33 UTC

[incubator-mxnet] branch master updated: [Numpy] FFI for diag/diagonal/diag_indices_from (#17789)

This is an automated email from the ASF dual-hosted git repository.

haoj pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new d4052fd  [Numpy] FFI for diag/diagonal/diag_indices_from (#17789)
d4052fd is described below

commit d4052fde4a94c8a70f805fe9b44980125afa8686
Author: Minghao Liu <40...@users.noreply.github.com>
AuthorDate: Tue Mar 24 15:26:32 2020 +0800

    [Numpy] FFI for diag/diagonal/diag_indices_from (#17789)
    
    * ffi_diag/diagonal/diag_indices_from
    
    * sanity && benchmark
---
 benchmark/python/ffi/benchmark_ffi.py  |  12 ++++
 python/mxnet/ndarray/numpy/_op.py      | 106 ++++++++++++++++++++++++++++++---
 python/mxnet/numpy/multiarray.py       |  96 ++++++++++++++++++++++++++++-
 python/mxnet/symbol/numpy/_symbol.py   |  56 ++++++++++++++++-
 src/api/operator/numpy/np_matrix_op.cc |  49 +++++++++++++++
 src/operator/numpy/np_matrix_op-inl.h  |  44 +++++++++-----
 src/operator/numpy/np_matrix_op.cc     |  12 ++--
 src/operator/numpy/np_matrix_op.cu     |   8 +--
 8 files changed, 349 insertions(+), 34 deletions(-)

diff --git a/benchmark/python/ffi/benchmark_ffi.py b/benchmark/python/ffi/benchmark_ffi.py
index 4a4c410..96d8e1d 100644
--- a/benchmark/python/ffi/benchmark_ffi.py
+++ b/benchmark/python/ffi/benchmark_ffi.py
@@ -85,6 +85,18 @@ def prepare_workloads():
     OpArgMngr.add_workload("fmin", pool['2x2'], pool['2x2'])
     OpArgMngr.add_workload("fmod", pool['2x2'], pool['2x2'])
     OpArgMngr.add_workload("may_share_memory", pool['2x3'][:0], pool['2x3'][:1])
+    OpArgMngr.add_workload("diag", pool['2x2'], k=1)
+    OpArgMngr.add_workload("diagonal", pool['2x2x2'], offset=-1, axis1=0, axis2=1)
+    OpArgMngr.add_workload("diag_indices_from", pool['2x2'])
+    OpArgMngr.add_workload("bincount", dnp.arange(3, dtype=int), pool['3'], minlength=4)
+    OpArgMngr.add_workload("percentile", pool['2x2x2'], 80, axis=0, out=pool['2x2'],\
+                           interpolation='midpoint')
+    OpArgMngr.add_workload("quantile", pool['2x2x2'], 0.8, axis=0, out=pool['2x2'],\
+                           interpolation='midpoint')
+    OpArgMngr.add_workload("all", pool['2x2x2'], axis=(0, 1),\
+                           out=dnp.array([False, False], dtype=bool), keepdims=False)
+    OpArgMngr.add_workload("any", pool['2x2x2'], axis=(0, 1),\
+                           out=dnp.array([False, False], dtype=bool), keepdims=False)
     OpArgMngr.add_workload("roll", pool["2x2"], 1, axis=0)
     OpArgMngr.add_workload("rot90", pool["2x2"], 2)
 
diff --git a/python/mxnet/ndarray/numpy/_op.py b/python/mxnet/ndarray/numpy/_op.py
index 4bcc4a5..ff0e48d 100644
--- a/python/mxnet/ndarray/numpy/_op.py
+++ b/python/mxnet/ndarray/numpy/_op.py
@@ -47,7 +47,7 @@ __all__ = ['shape', 'zeros', 'zeros_like', 'ones', 'ones_like', 'full', 'full_li
            'equal', 'not_equal', 'greater', 'less', 'greater_equal', 'less_equal', 'roll', 'rot90', 'einsum',
            'true_divide', 'nonzero', 'quantile', 'percentile', 'shares_memory', 'may_share_memory',
            'diff', 'ediff1d', 'resize', 'polyval', 'nan_to_num', 'isnan', 'isinf', 'isposinf', 'isneginf', 'isfinite',
-           'where', 'bincount', 'pad', 'cumsum']
+           'where', 'bincount', 'pad', 'cumsum', 'diag', 'diagonal']
 
 
 @set_module('mxnet.ndarray.numpy')
@@ -5130,6 +5130,7 @@ def ravel(x, order='C'):
         raise TypeError('type {} not supported'.format(str(type(x))))
 
 
+@set_module('mxnet.ndarray.numpy')
 def unravel_index(indices, shape, order='C'): # pylint: disable=redefined-outer-name
     """
     Converts a flat index or array of flat indices into a tuple of coordinate arrays.
@@ -5159,11 +5160,7 @@ def unravel_index(indices, shape, order='C'): # pylint: disable=redefined-outer-
     if order == 'C':
         if isinstance(indices, numeric_types):
             return _np.unravel_index(indices, shape)
-        ret = _npi.unravel_index_fallback(indices, shape=shape)
-        ret_list = []
-        for item in ret:
-            ret_list += [item]
-        return tuple(ret_list)
+        return tuple(_npi.unravel_index_fallback(indices, shape=shape))
     else:
         raise NotImplementedError('Do not support column-major (Fortran-style) order at this moment')
 
@@ -5207,6 +5204,7 @@ def flatnonzero(a):
     return nonzero(ravel(a))[0]
 
 
+@set_module('mxnet.ndarray.numpy')
 def diag_indices_from(arr):
     """
     This returns a tuple of indices that can be used to access the main diagonal of an array
@@ -5243,7 +5241,7 @@ def diag_indices_from(arr):
         [  8,   9, 100,  11],
         [ 12,  13,  14, 100]])
     """
-    return tuple(_npi.diag_indices_from(arr))
+    return tuple(_api_internal.diag_indices_from(arr))
 
 
 @set_module('mxnet.ndarray.numpy')
@@ -7941,3 +7939,97 @@ def cumsum(a, axis=None, dtype=None, out=None):
            [ 4,  9, 15]])
     """
     return _api_internal.cumsum(a, axis, dtype, out)
+
+
+@set_module('mxnet.ndarray.numpy')
+def diag(v, k=0):
+    """
+    Extracts a diagonal or constructs a diagonal array.
+    - 1-D arrays: constructs a 2-D array with the input as its diagonal, all other elements are zero.
+    - 2-D arrays: extracts the k-th Diagonal
+
+    Parameters
+    ----------
+    array : ndarray
+        The array to apply diag method.
+    k : offset
+        extracts or constructs kth diagonal given input array
+
+    Returns
+    ----------
+    out : ndarray
+    The extracted diagonal or constructed diagonal array.
+
+    Examples
+    --------
+    >>> x = np.arange(9).reshape((3,3))
+    >>> x
+    array([[0, 1, 2],
+           [3, 4, 5],
+           [6, 7, 8]])
+    >>> np.diag(x)
+    array([0, 4, 8])
+    >>> np.diag(x, k=1)
+    array([1, 5])
+    >>> np.diag(x, k=-1)
+    array([3, 7])
+
+    >>> np.diag(np.diag(x))
+    array([[0, 0, 0],
+           [0, 4, 0],
+           [0, 0, 8]])
+    """
+    return _api_internal.diag(v, k)
+
+
+@set_module('mxnet.ndarray.numpy')
+def diagonal(a, offset=0, axis1=0, axis2=1):
+    """
+    If a is 2-D, returns the diagonal of a with the given offset, i.e., the collection of elements of
+    the form a[i, i+offset]. If a has more than two dimensions, then the axes specified by axis1 and
+    axis2 are used to determine the 2-D sub-array whose diagonal is returned. The shape of the
+    resulting array can be determined by removing axis1 and axis2 and appending an index to the
+    right equal to the size of the resulting diagonals.
+
+    Parameters
+    ----------
+    a : ndarray
+        Input data from which diagonal are taken.
+    offset: int, Optional
+        Offset of the diagonal from the main diagonal
+    axis1: int, Optional
+        Axis to be used as the first axis of the 2-D sub-arrays
+    axis2: int, Optional
+        Axis to be used as the second axis of the 2-D sub-arrays
+
+    Returns
+    -------
+    out : ndarray
+        Output result
+
+    Raises
+    -------
+    ValueError:  If the dimension of a is less than 2.
+
+    Examples
+    --------
+    >>> a = np.arange(4).reshape(2,2)
+    >>> a
+    array([[0, 1],
+        [2, 3]])
+    >>> np.diagonal(a)
+    array([0, 3])
+    >>> np.diagonal(a, 1)
+    array([1])
+
+    >>> a = np.arange(8).reshape(2,2,2)
+    >>>a
+    array([[[0, 1],
+            [2, 3]],
+            [[4, 5],
+            [6, 7]]])
+    >>> np.diagonal(a, 0, 0, 1)
+    array([[0, 6],
+            [1, 7]])
+    """
+    return _api_internal.diagonal(a, offset, axis1, axis2)
diff --git a/python/mxnet/numpy/multiarray.py b/python/mxnet/numpy/multiarray.py
index 61f0705..281a6f7 100644
--- a/python/mxnet/numpy/multiarray.py
+++ b/python/mxnet/numpy/multiarray.py
@@ -73,7 +73,7 @@ __all__ = ['ndarray', 'empty', 'empty_like', 'array', 'shape',
            'greater', 'less', 'greater_equal', 'less_equal', 'roll', 'rot90', 'einsum', 'true_divide', 'nonzero',
            'quantile', 'percentile', 'shares_memory', 'may_share_memory', 'diff', 'ediff1d', 'resize', 'matmul',
            'nan_to_num', 'isnan', 'isinf', 'isposinf', 'isneginf', 'isfinite', 'polyval', 'where', 'bincount',
-           'pad', 'cumsum']
+           'pad', 'cumsum', 'diag', 'diagonal']
 
 __all__ += fallback.__all__
 
@@ -10102,3 +10102,97 @@ def cumsum(a, axis=None, dtype=None, out=None):
     """
     return _mx_nd_np.cumsum(a, axis=axis, dtype=dtype, out=out)
 # pylint: enable=redefined-outer-name
+
+
+@set_module('mxnet.numpy')
+def diag(v, k=0):
+    """
+    Extracts a diagonal or constructs a diagonal array.
+    - 1-D arrays: constructs a 2-D array with the input as its diagonal, all other elements are zero.
+    - 2-D arrays: extracts the k-th Diagonal
+
+    Parameters
+    ----------
+    array : ndarray
+        The array to apply diag method.
+    k : offset
+        extracts or constructs kth diagonal given input array
+
+    Returns
+    ----------
+    out : ndarray
+    The extracted diagonal or constructed diagonal array.
+
+    Examples
+    --------
+    >>> x = np.arange(9).reshape((3,3))
+    >>> x
+    array([[0, 1, 2],
+           [3, 4, 5],
+           [6, 7, 8]])
+    >>> np.diag(x)
+    array([0, 4, 8])
+    >>> np.diag(x, k=1)
+    array([1, 5])
+    >>> np.diag(x, k=-1)
+    array([3, 7])
+
+    >>> np.diag(np.diag(x))
+    array([[0, 0, 0],
+           [0, 4, 0],
+           [0, 0, 8]])
+    """
+    return _mx_nd_np.diag(v, k=k)
+
+
+@set_module('mxnet.numpy')
+def diagonal(a, offset=0, axis1=0, axis2=1):
+    """
+    If a is 2-D, returns the diagonal of a with the given offset, i.e., the collection of elements of
+    the form a[i, i+offset]. If a has more than two dimensions, then the axes specified by axis1 and
+    axis2 are used to determine the 2-D sub-array whose diagonal is returned. The shape of the
+    resulting array can be determined by removing axis1 and axis2 and appending an index to the
+    right equal to the size of the resulting diagonals.
+
+    Parameters
+    ----------
+    a : ndarray
+        Input data from which diagonal are taken.
+    offset: int, Optional
+        Offset of the diagonal from the main diagonal
+    axis1: int, Optional
+        Axis to be used as the first axis of the 2-D sub-arrays
+    axis2: int, Optional
+        Axis to be used as the second axis of the 2-D sub-arrays
+
+    Returns
+    -------
+    out : ndarray
+        Output result
+
+    Raises
+    -------
+    ValueError:  If the dimension of a is less than 2.
+
+    Examples
+    --------
+    >>> a = np.arange(4).reshape(2,2)
+    >>> a
+    array([[0, 1],
+        [2, 3]])
+    >>> np.diagonal(a)
+    array([0, 3])
+    >>> np.diagonal(a, 1)
+    array([1])
+
+    >>> a = np.arange(8).reshape(2,2,2)
+    >>>a
+    array([[[0, 1],
+            [2, 3]],
+            [[4, 5],
+            [6, 7]]])
+    >>> np.diagonal(a, 0, 0, 1)
+    array([[0, 6],
+            [1, 7]])
+    """
+    return _mx_nd_np.diagonal(a, offset=offset, axis1=axis1, axis2=axis2)
diff --git a/python/mxnet/symbol/numpy/_symbol.py b/python/mxnet/symbol/numpy/_symbol.py
index 897f856..a2a4cd9 100644
--- a/python/mxnet/symbol/numpy/_symbol.py
+++ b/python/mxnet/symbol/numpy/_symbol.py
@@ -53,7 +53,7 @@ __all__ = ['zeros', 'zeros_like', 'ones', 'ones_like', 'full', 'full_like', 'emp
            'equal', 'not_equal', 'greater', 'less', 'greater_equal', 'less_equal', 'roll', 'rot90', 'einsum',
            'true_divide', 'quantile', 'percentile', 'shares_memory', 'may_share_memory', 'diff', 'ediff1d',
            'resize', 'polyval', 'nan_to_num', 'isnan', 'isinf', 'isposinf', 'isneginf', 'isfinite',
-           'where', 'bincount', 'pad', 'cumsum']
+           'where', 'bincount', 'pad', 'cumsum', 'diag', 'diagonal']
 
 
 @set_module('mxnet.symbol.numpy')
@@ -6968,4 +6968,58 @@ def cumsum(a, axis=None, dtype=None, out=None):
     return _npi.cumsum(a, axis=axis, dtype=dtype, out=out)
 
 
+@set_module('mxnet.symbol.numpy')
+def diag(v, k=0):
+    """
+    Extracts a diagonal or constructs a diagonal array.
+    - 1-D arrays: constructs a 2-D array with the input as its diagonal, all other elements are zero.
+    - 2-D arrays: extracts the k-th Diagonal
+
+    Parameters
+    ----------
+    array : _Symbol
+        The array to apply diag method.
+    k : offset
+        extracts or constructs kth diagonal given input array
+
+    Returns
+    ----------
+    out : _Symbol
+    The extracted diagonal or constructed diagonal array.
+    """
+    return _npi.diag(v, k=k)
+
+
+@set_module('mxnet.symbol.numpy')
+def diagonal(a, offset=0, axis1=0, axis2=1):
+    """
+    If a is 2-D, returns the diagonal of a with the given offset, i.e., the collection of elements of
+    the form a[i, i+offset]. If a has more than two dimensions, then the axes specified by axis1 and
+    axis2 are used to determine the 2-D sub-array whose diagonal is returned. The shape of the
+    resulting array can be determined by removing axis1 and axis2 and appending an index to the
+    right equal to the size of the resulting diagonals.
+
+    Parameters
+    ----------
+    a : _Symbol
+        Input data from which diagonal are taken.
+    offset: int, Optional
+        Offset of the diagonal from the main diagonal
+    axis1: int, Optional
+        Axis to be used as the first axis of the 2-D sub-arrays
+    axis2: int, Optional
+        Axis to be used as the second axis of the 2-D sub-arrays
+
+    Returns
+    -------
+    out : _Symbol
+        Output result
+
+    Raises
+    -------
+    ValueError:  If the dimension of a is less than 2.
+    """
+    return _npi.diagonal(a, offset=offset, axis1=axis1, axis2=axis2)
+
+
 _set_np_symbol_class(_Symbol)
diff --git a/src/api/operator/numpy/np_matrix_op.cc b/src/api/operator/numpy/np_matrix_op.cc
index 36d06c7..ae8421a 100644
--- a/src/api/operator/numpy/np_matrix_op.cc
+++ b/src/api/operator/numpy/np_matrix_op.cc
@@ -142,4 +142,53 @@ MXNET_REGISTER_API("_npi.rot90")
   *ret = ndoutputs[0];
 });
 
+MXNET_REGISTER_API("_npi.diag")
+.set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) {
+  using namespace runtime;
+  const nnvm::Op* op = Op::Get("_npi_diag");
+  nnvm::NodeAttrs attrs;
+  op::NumpyDiagParam param;
+  param.k = args[1].operator int();
+  attrs.parsed = param;
+  attrs.op = op;
+  SetAttrDict<op::NumpyDiagParam>(&attrs);
+  NDArray* inputs[] = {args[0].operator mxnet::NDArray*()};
+  int num_inputs = 1;
+  int num_outputs = 0;
+  auto ndoutputs = Invoke(op, &attrs, num_inputs, inputs, &num_outputs, nullptr);
+  *ret = ndoutputs[0];
+});
+
+MXNET_REGISTER_API("_npi.diagonal")
+.set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) {
+  using namespace runtime;
+  const nnvm::Op* op = Op::Get("_npi_diagonal");
+  nnvm::NodeAttrs attrs;
+  op::NumpyDiagonalParam param;
+  param.offset = args[1].operator int();
+  param.axis1 = args[2].operator int();
+  param.axis2 = args[3].operator int();
+  attrs.parsed = param;
+  attrs.op = op;
+  SetAttrDict<op::NumpyDiagonalParam>(&attrs);
+  NDArray* inputs[] = {args[0].operator mxnet::NDArray*()};
+  int num_inputs = 1;
+  int num_outputs = 0;
+  auto ndoutputs = Invoke(op, &attrs, num_inputs, inputs, &num_outputs, nullptr);
+  *ret = ndoutputs[0];
+});
+
+MXNET_REGISTER_API("_npi.diag_indices_from")
+.set_body([](runtime::MXNetArgs args, runtime::MXNetRetValue* ret) {
+  using namespace runtime;
+  const nnvm::Op* op = Op::Get("_npi_diag_indices_from");
+  nnvm::NodeAttrs attrs;
+  attrs.op = op;
+  NDArray* inputs[] = {args[0].operator mxnet::NDArray*()};
+  int num_inputs = 1;
+  int num_outputs = 0;
+  auto ndoutputs = Invoke(op, &attrs, num_inputs, inputs, &num_outputs, nullptr);
+  *ret = ndoutputs[0];
+});
+
 }  // namespace mxnet
diff --git a/src/operator/numpy/np_matrix_op-inl.h b/src/operator/numpy/np_matrix_op-inl.h
index 0bbe263..2e48596 100644
--- a/src/operator/numpy/np_matrix_op-inl.h
+++ b/src/operator/numpy/np_matrix_op-inl.h
@@ -983,6 +983,11 @@ struct NumpyDiagParam : public dmlc::Parameter<NumpyDiagParam> {
                 "Use k>0 for diagonals above the main diagonal, "
                 "and k<0 for diagonals below the main diagonal. ");
   }
+  void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
+    std::ostringstream k_s;
+    k_s << k;
+    (*dict)["k"] = k_s.str();
+  }
 };
 
 inline mxnet::TShape NumpyDiagShapeImpl(const mxnet::TShape &ishape,
@@ -1006,7 +1011,7 @@ inline mxnet::TShape NumpyDiagShapeImpl(const mxnet::TShape &ishape,
   auto s = std::max(std::min(h, w), a);
   // s is the length of diagonal with k as the offset
 
-  int32_t n_dim = ishape.ndim() - 1;
+  int n_dim = ishape.ndim() - 1;
   mxnet::TShape oshape(n_dim, -1);
   oshape[n_dim - 1] = s;
   return oshape;
@@ -1177,8 +1182,8 @@ void NumpyDiagOpBackward(const nnvm::NodeAttrs &attrs,
 
 struct NumpyDiagonalParam : public dmlc::Parameter<NumpyDiagonalParam> {
   int offset;
-  int32_t axis1;
-  int32_t axis2;
+  int axis1;
+  int axis2;
   DMLC_DECLARE_PARAMETER(NumpyDiagonalParam) {
     DMLC_DECLARE_FIELD(offset)
       .set_default(0)
@@ -1195,12 +1200,21 @@ struct NumpyDiagonalParam : public dmlc::Parameter<NumpyDiagonalParam> {
       .describe("The second axis of the sub-arrays of interest. "
                 "Ignored when the input is a 1-D array.");
   }
+  void SetAttrDict(std::unordered_map<std::string, std::string>* dict) {
+    std::ostringstream offset_s, axis1_s, axis2_s;
+    offset_s << offset;
+    axis1_s << axis1;
+    axis2_s << axis2;
+    (*dict)["offset"] = offset_s.str();
+    (*dict)["axis1"] = axis1_s.str();
+    (*dict)["axis2"] = axis2_s.str();
+  }
 };
 
 inline mxnet::TShape NumpyDiagonalShapeImpl(const mxnet::TShape& ishape, const int k,
-                                            const int32_t axis1, const int32_t axis2) {
-  int32_t x1 = CheckAxis(axis1, ishape.ndim());
-  int32_t x2 = CheckAxis(axis2, ishape.ndim());
+                                            const int axis1, const int axis2) {
+  int x1 = CheckAxis(axis1, ishape.ndim());
+  int x2 = CheckAxis(axis2, ishape.ndim());
 
   CHECK_NE(x1, x2) << "axis1 and axis2 cannot refer to the same axis " << x1;
 
@@ -1215,11 +1229,11 @@ inline mxnet::TShape NumpyDiagonalShapeImpl(const mxnet::TShape& ishape, const i
   if (s < 0) s = 0;
   if (x1 > x2) std::swap(x1, x2);
 
-  int32_t n_dim = ishape.ndim() - 1;
+  int n_dim = ishape.ndim() - 1;
   mxnet::TShape oshape(n_dim, -1);
 
   // remove axis1 and axis2 and append the new axis to the end
-  uint32_t idx = 0;
+  int idx = 0;
   for (int i = 0; i <= n_dim; ++i) {
     if (i != x1 && i != x2) {
       oshape[idx++] = ishape[i];
@@ -1292,22 +1306,22 @@ void NumpyDiagonalOpImpl(const TBlob& in_data,
                          const std::vector<OpReqType>& req) {
   using namespace mxnet_op;
   using namespace mshadow;
-  uint32_t x1 = CheckAxis(param.axis1, ishape.ndim());
-  uint32_t x2 = CheckAxis(param.axis2, ishape.ndim());
-  uint32_t idim = ishape.ndim(), odim = oshape.ndim();
-  uint32_t minx = x1, maxx = x2;
+  int x1 = CheckAxis(param.axis1, ishape.ndim());
+  int x2 = CheckAxis(param.axis2, ishape.ndim());
+  int idim = ishape.ndim(), odim = oshape.ndim();
+  int minx = x1, maxx = x2;
   if (minx > maxx) std::swap(minx, maxx);
 
   index_t oleading = 1,
           obody = 1,
           otrailing = 1;
-  for (uint32_t i = 0; i < minx; ++i) {
+  for (int i = 0; i < minx; ++i) {
     oleading *= ishape[i];
   }
-  for (uint32_t i = minx + 1; i < maxx; ++i) {
+  for (int i = minx + 1; i < maxx; ++i) {
     obody *= ishape[i];
   }
-  for (uint32_t i = maxx + 1; i < idim; ++i) {
+  for (int i = maxx + 1; i < idim; ++i) {
     otrailing *= ishape[i];
   }
 
diff --git a/src/operator/numpy/np_matrix_op.cc b/src/operator/numpy/np_matrix_op.cc
index e9d269d..1c0a8a6 100644
--- a/src/operator/numpy/np_matrix_op.cc
+++ b/src/operator/numpy/np_matrix_op.cc
@@ -1435,7 +1435,7 @@ NNVM_REGISTER_OP(_npi_dsplit)
 .add_argument("data", "NDArray-or-Symbol", "The input")
 .add_arguments(SplitParam::__FIELDS__());
 
-NNVM_REGISTER_OP(_np_diag)
+NNVM_REGISTER_OP(_npi_diag)
 .set_attr_parser(ParamParser<NumpyDiagParam>)
 .set_num_inputs(1)
 .set_num_outputs(1)
@@ -1446,18 +1446,18 @@ NNVM_REGISTER_OP(_np_diag)
 .set_attr<mxnet::FInferShape>("FInferShape", NumpyDiagOpShape)
 .set_attr<nnvm::FInferType>("FInferType", NumpyDiagOpType)
 .set_attr<FCompute>("FCompute<cpu>", NumpyDiagOpForward<cpu>)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_diag"})
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_npi_diag"})
 .add_argument("data", "NDArray-or-Symbol", "Input ndarray")
 .add_arguments(NumpyDiagParam::__FIELDS__());
 
-NNVM_REGISTER_OP(_backward_np_diag)
+NNVM_REGISTER_OP(_backward_npi_diag)
 .set_attr_parser(ParamParser<NumpyDiagParam>)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr<nnvm::TIsBackward>("TIsBackward", true)
 .set_attr<FCompute>("FCompute<cpu>", NumpyDiagOpBackward<cpu>);
 
-NNVM_REGISTER_OP(_np_diagonal)
+NNVM_REGISTER_OP(_npi_diagonal)
 .set_attr_parser(ParamParser<NumpyDiagonalParam>)
 .set_num_inputs(1)
 .set_num_outputs(1)
@@ -1468,11 +1468,11 @@ NNVM_REGISTER_OP(_np_diagonal)
 .set_attr<mxnet::FInferShape>("FInferShape", NumpyDiagonalOpShape)
 .set_attr<nnvm::FInferType>("FInferType", NumpyDiagonalOpType)
 .set_attr<FCompute>("FCompute<cpu>", NumpyDiagonalOpForward<cpu>)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_np_diagonal"})
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_npi_diagonal"})
 .add_argument("data", "NDArray-or-Symbol", "Input ndarray")
 .add_arguments(NumpyDiagonalParam::__FIELDS__());
 
-NNVM_REGISTER_OP(_backward_np_diagonal)
+NNVM_REGISTER_OP(_backward_npi_diagonal)
 .set_attr_parser(ParamParser<NumpyDiagonalParam>)
 .set_num_inputs(1)
 .set_num_outputs(1)
diff --git a/src/operator/numpy/np_matrix_op.cu b/src/operator/numpy/np_matrix_op.cu
index c9e896b..c4b3290 100644
--- a/src/operator/numpy/np_matrix_op.cu
+++ b/src/operator/numpy/np_matrix_op.cu
@@ -127,16 +127,16 @@ NNVM_REGISTER_OP(_npi_dsplit)
 NNVM_REGISTER_OP(_npx_reshape)
 .set_attr<FCompute>("FCompute<gpu>", UnaryOp::IdentityCompute<gpu>);
 
-NNVM_REGISTER_OP(_np_diag)
+NNVM_REGISTER_OP(_npi_diag)
 .set_attr<FCompute>("FCompute<gpu>", NumpyDiagOpForward<gpu>);
 
-NNVM_REGISTER_OP(_backward_np_diag)
+NNVM_REGISTER_OP(_backward_npi_diag)
 .set_attr<FCompute>("FCompute<gpu>", NumpyDiagOpBackward<gpu>);
 
-NNVM_REGISTER_OP(_np_diagonal)
+NNVM_REGISTER_OP(_npi_diagonal)
 .set_attr<FCompute>("FCompute<gpu>", NumpyDiagonalOpForward<gpu>);
 
-NNVM_REGISTER_OP(_backward_np_diagonal)
+NNVM_REGISTER_OP(_backward_npi_diagonal)
 .set_attr<FCompute>("FCompute<gpu>", NumpyDiagonalOpBackward<gpu>);
 
 NNVM_REGISTER_OP(_np_diagflat)