You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by la...@apache.org on 2021/09/20 18:54:49 UTC

[incubator-mxnet] branch master updated: Split np_elemwise_broadcast_logic_op.cc (#20580)

This is an automated email from the ASF dual-hosted git repository.

lausen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 179e7db  Split np_elemwise_broadcast_logic_op.cc (#20580)
179e7db is described below

commit 179e7db21693162eda5b254ef4ae2c36e577b97e
Author: Zhenghui Jin <69...@users.noreply.github.com>
AuthorDate: Mon Sep 20 11:52:35 2021 -0700

    Split np_elemwise_broadcast_logic_op.cc (#20580)
    
    To avoid build failures due to large source files.
    See https://github.com/apache/incubator-mxnet/issues/19688
---
 .../numpy/np_elemwise_broadcast_logic_op.cu        |  65 ------
 ...ogic_op.cc => np_elemwise_broadcast_logic_op.h} |  93 +++-----
 .../numpy/np_elemwise_broadcast_logic_op_and.cc    |  38 ++++
 .../numpy/np_elemwise_broadcast_logic_op_and.cu    |  35 +++
 .../numpy/np_elemwise_broadcast_logic_op_equal.cc  |  38 ++++
 .../numpy/np_elemwise_broadcast_logic_op_equal.cu  |  35 +++
 .../np_elemwise_broadcast_logic_op_greater.cc      |  38 ++++
 .../np_elemwise_broadcast_logic_op_greater.cu      |  35 +++
 ...np_elemwise_broadcast_logic_op_greater_equal.cc |  38 ++++
 ...np_elemwise_broadcast_logic_op_greater_equal.cu |  35 +++
 .../numpy/np_elemwise_broadcast_logic_op_less.cc   |  38 ++++
 .../numpy/np_elemwise_broadcast_logic_op_less.cu   |  35 +++
 .../np_elemwise_broadcast_logic_op_less_equal.cc   |  38 ++++
 .../np_elemwise_broadcast_logic_op_less_equal.cu   |  35 +++
 .../np_elemwise_broadcast_logic_op_not_equal.cc    |  38 ++++
 .../np_elemwise_broadcast_logic_op_not_equal.cu    |  35 +++
 .../numpy/np_elemwise_broadcast_logic_op_or.cc     |  38 ++++
 .../numpy/np_elemwise_broadcast_logic_op_or.cu     |  35 +++
 .../numpy/np_elemwise_broadcast_logic_op_xor.cc    |  38 ++++
 .../numpy/np_elemwise_broadcast_logic_op_xor.cu    |  35 +++
 src/operator/numpy/np_elemwise_broadcast_op.cc     | 235 ---------------------
 src/operator/numpy/np_elemwise_broadcast_op.cu     |  85 --------
 src/operator/numpy/np_elemwise_broadcast_op.h      |  54 +++++
 src/operator/numpy/np_elemwise_broadcast_op_add.cc |  54 +++++
 src/operator/numpy/np_elemwise_broadcast_op_add.cu |  38 ++++
 src/operator/numpy/np_elemwise_broadcast_op_mod.cc |  54 +++++
 src/operator/numpy/np_elemwise_broadcast_op_mod.cu |  38 ++++
 src/operator/numpy/np_elemwise_broadcast_op_mul.cc |  54 +++++
 src/operator/numpy/np_elemwise_broadcast_op_mul.cu |  38 ++++
 src/operator/numpy/np_elemwise_broadcast_op_pow.cc |  54 +++++
 src/operator/numpy/np_elemwise_broadcast_op_pow.cu |  38 ++++
 .../numpy/np_elemwise_broadcast_op_scalar.cc       |  66 ++++++
 .../numpy/np_elemwise_broadcast_op_scalar.cu       |  56 +++++
 src/operator/numpy/np_elemwise_broadcast_op_sub.cc |  54 +++++
 src/operator/numpy/np_elemwise_broadcast_op_sub.cu |  38 ++++
 35 files changed, 1322 insertions(+), 449 deletions(-)

diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op.cu b/src/operator/numpy/np_elemwise_broadcast_logic_op.cu
deleted file mode 100644
index 540d2aa..0000000
--- a/src/operator/numpy/np_elemwise_broadcast_logic_op.cu
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- *  Copyright (c) 2019 by Contributors
- * \file np_elemwise_broadcast_logic_op.cu
- * \brief GPU Implementation of basic functions for elementwise binary
- * broadcast logic operator.
- */
-#include "../tensor/elemwise_binary_broadcast_op.h"
-#include "../tensor/elemwise_binary_scalar_op.h"
-
-namespace mxnet {
-namespace op {
-
-#if MXNET_USE_TVM_OP == 0
-
-#define MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(name) \
-  NNVM_REGISTER_OP(_npi_##name)                           \
-      .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"np_" #name})
-
-#define MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(name) \
-  NNVM_REGISTER_OP(_npi_##name##_scalar)                         \
-      .set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"np_" #name})
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(not_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(greater);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(less);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(greater_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(less_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(logical_and);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(logical_or);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(logical_xor);
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(not_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(greater);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(less);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(greater_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(less_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(logical_and);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(logical_or);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(logical_xor);
-
-#endif  // MXNET_USE_TVM_OP
-
-}  // namespace op
-}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op.cc b/src/operator/numpy/np_elemwise_broadcast_logic_op.h
similarity index 83%
rename from src/operator/numpy/np_elemwise_broadcast_logic_op.cc
rename to src/operator/numpy/np_elemwise_broadcast_logic_op.h
index 001cbdc..1d7e13f 100644
--- a/src/operator/numpy/np_elemwise_broadcast_logic_op.cc
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op.h
@@ -19,17 +19,23 @@
 
 /*!
  *  Copyright (c) 2019 by Contributors
- * \file np_elemwise_binary_logic_op.cc
- * \brief CPU Implementation of basic logic functions for elementwise numpy binary
+ * \file np_elemwise_broadcast_logic_op.h
+ * \brief Function definition of basic logic functions for elementwise numpy binary
  * broadcast operator.
  */
 
+#ifndef MXNET_OPERATOR_NUMPY_NP_ELEMWISE_BROADCAST_LOGIC_OP_H_
+#define MXNET_OPERATOR_NUMPY_NP_ELEMWISE_BROADCAST_LOGIC_OP_H_
+
 #if MXNET_USE_TVM_OP
 #include <tvm/runtime/c_runtime_api.h>
 #include <tvm/runtime/packed_func.h>
 #include "../tvmop/op_module.h"
 #endif  // MXNET_USE_TVM_OP
 
+#include <string>
+#include <utility>
+#include <vector>
 #include "../tensor/elemwise_binary_broadcast_op.h"
 #include "../tensor/elemwise_binary_scalar_op.h"
 
@@ -58,7 +64,7 @@ static constexpr char func_logical_xor_cpu[]   = "logical_xor_cpu";
 static constexpr char func_logical_xor_gpu[]   = "logical_xor_gpu";
 #pragma clang diagnostic pop
 
-bool NumpyBinaryLogicOpType(const nnvm::NodeAttrs& attrs,
+inline bool NumpyBinaryLogicOpType(const nnvm::NodeAttrs& attrs,
                             std::vector<int>* in_attrs,
                             std::vector<int>* out_attrs) {
   CHECK_EQ(in_attrs->size(), 2U);
@@ -69,7 +75,7 @@ bool NumpyBinaryLogicOpType(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-TBlob PrependAxes(const TBlob& src, const int dst_ndim) {
+inline TBlob PrependAxes(const TBlob& src, const int dst_ndim) {
   CHECK_LE(src.shape_.ndim(), dst_ndim);
   const int src_ndim = src.shape_.ndim();
   if (src_ndim == dst_ndim)
@@ -228,16 +234,6 @@ struct GetBinaryBroadcastCompute {
       .add_argument("lhs", "NDArray-or-Symbol", "First input to the function")                    \
       .add_argument("rhs", "NDArray-or-Symbol", "Second input to the function")
 
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(not_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(greater);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(less);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(greater_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(less_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(logical_and);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(logical_or);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(logical_xor);
-
 #if MXNET_USE_TVM_OP
 
 #define MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(name) \
@@ -253,16 +249,6 @@ MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(logical_xor);
       .set_attr<FCompute>("FCompute<gpu>",                \
                           TVMBinaryBroadcastCompute{func_##name##_gpu, "np_" #name})
 
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(not_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(greater);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(less);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(greater_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(less_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(logical_and);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(logical_or);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(logical_xor);
-
 #endif  // MXNET_USE_CUDA
 
 #else
@@ -272,19 +258,17 @@ MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(logical_xor);
       .set_attr<FCompute>("FCompute<cpu>",                \
                           BinaryBroadcastComputeLogic<cpu, mshadow_op::np_##name>)
 
+#if MXNET_USE_CUDA
+
+#define MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(name)                                     \
+  NNVM_REGISTER_OP(_npi_##name)                                                               \
+  .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"np_" #name})
+
+#endif  // MXNET_USE_CUDA
+
 #endif  // MXNET_USE_TVM_OP
 
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(not_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(greater);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(less);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(greater_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(less_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(logical_and);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(logical_or);
-MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(logical_xor);
-
-bool NumpyBinaryScalarLogicOpType(const nnvm::NodeAttrs& attrs,
+inline bool NumpyBinaryScalarLogicOpType(const nnvm::NodeAttrs& attrs,
                                   std::vector<int>* in_attrs,
                                   std::vector<int>* out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
@@ -358,15 +342,6 @@ struct TVMBinaryBroadcastScalarCompute {
       .add_argument("data", "NDArray-or-Symbol", "First input to the function")           \
       .add_arguments(NumpyBinaryScalarParam::__FIELDS__())
 
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(not_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(greater);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(less);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(greater_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(less_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(logical_and);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(logical_or);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(logical_xor);
 
 #pragma clang diagnostic push
 #pragma clang diagnostic ignored "-Wunused-const-variable"
@@ -404,16 +379,6 @@ static constexpr char func_logical_xor_scalar_gpu[]   = "logical_xor_scalar_gpu"
       .set_attr<FCompute>("FCompute<gpu>",                       \
                           TVMBinaryBroadcastScalarCompute{func_##name##_scalar_gpu})
 
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(not_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(greater);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(less);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(greater_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(less_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(logical_and);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(logical_or);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(logical_xor);
-
 #endif  // MXNET_USE_CUDA
 
 #else
@@ -426,17 +391,17 @@ MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(logical_xor);
         return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};           \
       })
 
-#endif  // MXNET_USE_TVM_OP
+#if MXNET_USE_CUDA
+
+#define MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(name)                               \
+  NNVM_REGISTER_OP(_npi_##name##_scalar)                                                       \
+  .set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"np_" #name})
 
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(not_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(greater);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(less);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(greater_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(less_equal);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(logical_and);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(logical_or);
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(logical_xor);
+#endif  // MXNET_USE_CUDA
+
+#endif  // MXNET_USE_TVM_OP
 
 }  // namespace op
 }  // namespace mxnet
+
+#endif  // MXNET_OPERATOR_NUMPY_NP_ELEMWISE_BROADCAST_LOGIC_OP_H_
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_and.cc b/src/operator/numpy/np_elemwise_broadcast_logic_op_and.cc
new file mode 100644
index 0000000..e9d4bb3
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_and.cc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_logic_op_and.cc
+ * \brief CPU Implementation of basic logic functions for logical_and operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(logical_and);
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(logical_and);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(logical_and);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(logical_and);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_and.cu b/src/operator/numpy/np_elemwise_broadcast_logic_op_and.cu
new file mode 100644
index 0000000..9eaa885
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_and.cu
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_logic_op_and.cu
+ * \brief GPU Implementation of basic functions for logical_and operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(logical_and);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(logical_and);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_equal.cc b/src/operator/numpy/np_elemwise_broadcast_logic_op_equal.cc
new file mode 100644
index 0000000..fb60541
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_equal.cc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_logic_op_equal.cc
+ * \brief CPU Implementation of basic logic functions for equal operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(equal);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(equal);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_equal.cu b/src/operator/numpy/np_elemwise_broadcast_logic_op_equal.cu
new file mode 100644
index 0000000..008e46e
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_equal.cu
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_logic_op_equal.cu
+ * \brief GPU Implementation of basic functions for equal operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(equal);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_greater.cc b/src/operator/numpy/np_elemwise_broadcast_logic_op_greater.cc
new file mode 100644
index 0000000..c5a535b
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_greater.cc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_logic_op_greater.cc
+ * \brief CPU Implementation of basic logic functions for greater operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(greater);
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(greater);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(greater);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(greater);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_greater.cu b/src/operator/numpy/np_elemwise_broadcast_logic_op_greater.cu
new file mode 100644
index 0000000..736d0b5
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_greater.cu
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_logic_op_greater.cu
+ * \brief GPU Implementation of basic functions for greater operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(greater);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(greater);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_greater_equal.cc b/src/operator/numpy/np_elemwise_broadcast_logic_op_greater_equal.cc
new file mode 100644
index 0000000..5535891
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_greater_equal.cc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_logic_op_greater_equal.cc
+ * \brief CPU Implementation of basic logic functions for greater_equal operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(greater_equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(greater_equal);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(greater_equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(greater_equal);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_greater_equal.cu b/src/operator/numpy/np_elemwise_broadcast_logic_op_greater_equal.cu
new file mode 100644
index 0000000..9c86546
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_greater_equal.cu
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_logic_op_greater_equal.cu
+ * \brief GPU Implementation of basic functions for greater_equal operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(greater_equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(greater_equal);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_less.cc b/src/operator/numpy/np_elemwise_broadcast_logic_op_less.cc
new file mode 100644
index 0000000..f2a6bfb
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_less.cc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_logic_op_less.cc
+ * \brief CPU Implementation of basic logic functions for less operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(less);
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(less);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(less);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(less);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_less.cu b/src/operator/numpy/np_elemwise_broadcast_logic_op_less.cu
new file mode 100644
index 0000000..a7a34bf
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_less.cu
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_logic_op_less.cu
+ * \brief GPU Implementation of basic functions for less operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(less);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(less);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_less_equal.cc b/src/operator/numpy/np_elemwise_broadcast_logic_op_less_equal.cc
new file mode 100644
index 0000000..cbd0910
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_less_equal.cc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_logic_op_less_equal.cc
+ * \brief CPU Implementation of basic logic functions for less_equal operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(less_equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(less_equal);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(less_equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(less_equal);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_less_equal.cu b/src/operator/numpy/np_elemwise_broadcast_logic_op_less_equal.cu
new file mode 100644
index 0000000..48502b7
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_less_equal.cu
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_logic_op_less_equal.cu
+ * \brief GPU Implementation of basic functions for less_equal operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(less_equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(less_equal);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_not_equal.cc b/src/operator/numpy/np_elemwise_broadcast_logic_op_not_equal.cc
new file mode 100644
index 0000000..939de1f
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_not_equal.cc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_logic_op_not_equal.cc
+ * \brief CPU Implementation of basic logic functions for not equal operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(not_equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(not_equal);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(not_equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(not_equal);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_not_equal.cu b/src/operator/numpy/np_elemwise_broadcast_logic_op_not_equal.cu
new file mode 100644
index 0000000..e37aa89
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_not_equal.cu
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_logic_op_not_equal.cu
+ * \brief GPU Implementation of basic functions for not equal operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(not_equal);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(not_equal);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_or.cc b/src/operator/numpy/np_elemwise_broadcast_logic_op_or.cc
new file mode 100644
index 0000000..ccf6297
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_or.cc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_logic_op_or.cc
+ * \brief CPU Implementation of basic logic functions for logical_or operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(logical_or);
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(logical_or);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(logical_or);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(logical_or);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_or.cu b/src/operator/numpy/np_elemwise_broadcast_logic_op_or.cu
new file mode 100644
index 0000000..15f429a
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_or.cu
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_logic_op_or.cu
+ * \brief GPU Implementation of basic functions for logical_or operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(logical_or);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(logical_or);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_xor.cc b/src/operator/numpy/np_elemwise_broadcast_logic_op_xor.cc
new file mode 100644
index 0000000..a9886ef
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_xor.cc
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_logic_op_xor.cc
+ * \brief CPU Implementation of basic logic functions for logical_xor operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC(logical_xor);
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_CPU(logical_xor);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC(logical_xor);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_CPU(logical_xor);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_logic_op_xor.cu b/src/operator/numpy/np_elemwise_broadcast_logic_op_xor.cu
new file mode 100644
index 0000000..9ef3210
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_logic_op_xor.cu
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_logic_op_xor.cu
+ * \brief GPU Implementation of basic functions for logical_xor operation.
+ */
+
+#include "./np_elemwise_broadcast_logic_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_LOGIC_GPU(logical_xor);
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR_LOGIC_GPU(logical_xor);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cc b/src/operator/numpy/np_elemwise_broadcast_op.cc
deleted file mode 100644
index 124b67d..0000000
--- a/src/operator/numpy/np_elemwise_broadcast_op.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- *  Copyright (c) 2019 by Contributors
- * \file np_elemwise_binary_op.cc
- * \brief CPU Implementation of basic functions for elementwise numpy binary broadcast operator.
- */
-
-#include "./np_elemwise_broadcast_op.h"
-
-namespace mxnet {
-namespace op {
-
-DMLC_REGISTER_PARAMETER(NumpyBinaryScalarParam);
-
-#define MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(name)                        \
-  NNVM_REGISTER_OP(name)                                                      \
-      .set_num_inputs(1)                                                      \
-      .set_num_outputs(1)                                                     \
-      .set_attr_parser(ParamParser<NumpyBinaryScalarParam>)                   \
-      .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)       \
-      .set_attr<nnvm::FInferType>("FInferType", NumpyBinaryScalarType)        \
-      .set_attr<FResourceRequest>(                                            \
-          "FResourceRequest",                                                 \
-          [](const NodeAttrs& attrs) {                                        \
-            return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; \
-          })                                                                  \
-      .add_argument("data", "NDArray-or-Symbol", "source input")              \
-      .add_arguments(NumpyBinaryScalarParam::__FIELDS__())
-
-bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs,
-                                   std::vector<int>* in_attrs,
-                                   std::vector<int>* out_attrs) {
-  CHECK_EQ(in_attrs->size(), 2U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  const int ltype = in_attrs->at(0);
-  const int rtype = in_attrs->at(1);
-  if (ltype != -1 && rtype != -1 && (ltype != rtype)) {
-    // Only when both input types are known and not the same, we enter the mixed-precision mode
-    TYPE_ASSIGN_CHECK(*out_attrs, 0, common::np_binary_out_infer_type(ltype, rtype));
-  } else {
-    return ElemwiseType<2, 1>(attrs, in_attrs, out_attrs);
-  }
-  return true;
-}
-
-#define MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(name)                                   \
-  NNVM_REGISTER_OP(name)                                                                          \
-      .set_num_inputs(2)                                                                          \
-      .set_num_outputs(1)                                                                         \
-      .set_attr<nnvm::FListInputNames>("FListInputNames",                                         \
-                                       [](const NodeAttrs& attrs) {                               \
-                                         return std::vector<std::string>{"lhs", "rhs"};           \
-                                       })                                                         \
-      .set_attr<mxnet::FInferShape>("FInferShape", BinaryBroadcastShape)                          \
-      .set_attr<nnvm::FInferType>("FInferType", NumpyBinaryMixedPrecisionType)                    \
-      .set_attr<nnvm::FInplaceOption>("FInplaceOption",                                           \
-                                      [](const NodeAttrs& attrs) {                                \
-                                        return std::vector<std::pair<int, int> >{{0, 0}, {1, 0}}; \
-                                      })                                                          \
-      .set_attr<FResourceRequest>(                                                                \
-          "FResourceRequest",                                                                     \
-          [](const NodeAttrs& attrs) {                                                            \
-            return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};                     \
-          })                                                                                      \
-      .add_argument("lhs", "NDArray-or-Symbol", "First input to the function")                    \
-      .add_argument("rhs", "NDArray-or-Symbol", "Second input to the function")
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_add)
-    .set_attr<FCompute>("FCompute<cpu>",
-                        NumpyBinaryBroadcastComputeWithBool<cpu,
-                                                            op::mshadow_op::plus,
-                                                            op::mshadow_op::mixed_plus,
-                                                            op::mshadow_op::mixed_plus>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_add"});
-
-NNVM_REGISTER_OP(_backward_npi_broadcast_add)
-    .set_num_inputs(3)
-    .set_num_outputs(2)
-    .set_attr<nnvm::TIsBackward>("TIsBackward", true)
-    .set_attr<nnvm::FInplaceOption>("FInplaceOption",
-                                    [](const NodeAttrs& attrs) {
-                                      return std::vector<std::pair<int, int> >{{0, 0}, {0, 1}};
-                                    })
-    .set_attr<FResourceRequest>("FResourceRequest",
-                                [](const NodeAttrs& attrs) {
-                                  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
-                                })
-    .set_attr<FCompute>("FCompute<cpu>",
-                        NumpyBinaryBackwardUseIn<cpu, mshadow_op::posone, mshadow_op::posone>);
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_subtract)
-    .set_attr<FCompute>("FCompute<cpu>",
-                        NumpyBinaryBroadcastCompute<cpu,
-                                                    op::mshadow_op::minus,
-                                                    op::mshadow_op::mixed_minus,
-                                                    op::mshadow_op::mixed_rminus>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_sub"});
-
-NNVM_REGISTER_OP(_backward_npi_broadcast_sub)
-    .set_num_inputs(3)
-    .set_num_outputs(2)
-    .set_attr<nnvm::TIsBackward>("TIsBackward", true)
-    .set_attr<nnvm::FInplaceOption>("FInplaceOption",
-                                    [](const NodeAttrs& attrs) {
-                                      return std::vector<std::pair<int, int> >{{0, 0}, {0, 1}};
-                                    })
-    .set_attr<FResourceRequest>("FResourceRequest",
-                                [](const NodeAttrs& attrs) {
-                                  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
-                                })
-    .set_attr<FCompute>("FCompute<cpu>",
-                        NumpyBinaryBackwardUseIn<cpu, mshadow_op::posone, mshadow_op::negone>);
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_multiply)
-    .set_attr<FCompute>("FCompute<cpu>",
-                        NumpyBinaryBroadcastComputeWithBool<cpu,
-                                                            op::mshadow_op::mul,
-                                                            op::mshadow_op::mixed_mul,
-                                                            op::mshadow_op::mixed_mul>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_mul"});
-
-NNVM_REGISTER_OP(_backward_npi_broadcast_mul)
-    .set_num_inputs(3)
-    .set_num_outputs(2)
-    .set_attr<nnvm::TIsBackward>("TIsBackward", true)
-    .set_attr<nnvm::FInplaceOption>("FInplaceOption",
-                                    [](const NodeAttrs& attrs) {
-                                      return std::vector<std::pair<int, int> >{{0, 1}};
-                                    })
-    .set_attr<FResourceRequest>("FResourceRequest",
-                                [](const NodeAttrs& attrs) {
-                                  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
-                                })
-    .set_attr<FCompute>("FCompute<cpu>",
-                        NumpyBinaryBackwardUseIn<cpu, mshadow_op::right, mshadow_op::left>);
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_mod)
-    .set_attr<FCompute>("FCompute<cpu>",
-                        NumpyBinaryBroadcastCompute<cpu,
-                                                    op::mshadow_op::mod,
-                                                    op::mshadow_op::mixed_mod,
-                                                    op::mshadow_op::mixed_rmod>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_mod"});
-
-NNVM_REGISTER_OP(_backward_npi_broadcast_mod)
-    .set_num_inputs(3)
-    .set_num_outputs(2)
-    .set_attr<nnvm::TIsBackward>("TIsBackward", true)
-    .set_attr<nnvm::FInplaceOption>("FInplaceOption",
-                                    [](const NodeAttrs& attrs) {
-                                      return std::vector<std::pair<int, int> >{{0, 1}};
-                                    })
-    .set_attr<FResourceRequest>("FResourceRequest",
-                                [](const NodeAttrs& attrs) {
-                                  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
-                                })
-    .set_attr<FCompute>("FCompute<cpu>",
-                        NumpyBinaryBackwardUseIn<cpu, mshadow_op::mod_grad, mshadow_op::mod_rgrad>);
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_power)
-    .set_attr<FCompute>("FCompute<cpu>",
-                        NumpyBinaryBroadcastComputeWithBool<cpu,
-                                                            op::mshadow_op::power,
-                                                            op::mshadow_op::mixed_power,
-                                                            op::mshadow_op::mixed_rpower>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_power"});
-
-NNVM_REGISTER_OP(_backward_npi_broadcast_power)
-    .set_num_inputs(3)
-    .set_num_outputs(2)
-    .set_attr<nnvm::TIsBackward>("TIsBackward", true)
-    .set_attr<nnvm::FInplaceOption>("FInplaceOption",
-                                    [](const NodeAttrs& attrs) {
-                                      return std::vector<std::pair<int, int> >{{0, 1}};
-                                    })
-    .set_attr<FResourceRequest>("FResourceRequest",
-                                [](const NodeAttrs& attrs) {
-                                  return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
-                                })
-    .set_attr<FCompute>(
-        "FCompute<cpu>",
-        NumpyBinaryBackwardUseIn<cpu, mshadow_op::power_grad, mshadow_op::power_rgrad>);
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_add_scalar)
-    .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, op::mshadow_op::plus>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_copy"});
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_subtract_scalar)
-    .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, op::mshadow_op::minus>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_copy"});
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rsubtract_scalar)
-    .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, mshadow_op::rminus>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"negative"});
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_multiply_scalar)
-    .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, op::mshadow_op::mul>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_mul_scalar"});
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_mod_scalar)
-    .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, mshadow_op::mod>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_mod_scalar"});
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rmod_scalar)
-    .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, mshadow_op::rmod>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_rmod_scalar"});
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_power_scalar)
-    .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, mshadow_op::power>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_power_scalar"});
-
-MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rpower_scalar)
-    .set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, mshadow_op::rpower>)
-    .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_rpower_scalar"});
-
-}  // namespace op
-}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op.cu b/src/operator/numpy/np_elemwise_broadcast_op.cu
deleted file mode 100644
index 635bc4d..0000000
--- a/src/operator/numpy/np_elemwise_broadcast_op.cu
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- *  Copyright (c) 2019 by Contributors
- * \file np_elemwise_broadcast_op.cu
- * \brief GPU Implementation of basic functions for elementwise binary broadcast operator.
- */
-
-#include "./np_elemwise_broadcast_op.h"
-
-namespace mxnet {
-namespace op {
-
-NNVM_REGISTER_OP(_npi_add).set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"add"});
-
-NNVM_REGISTER_OP(_backward_npi_broadcast_add)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCBackwardUseIn{"one", "one"});
-
-NNVM_REGISTER_OP(_npi_subtract)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"sub"});
-
-NNVM_REGISTER_OP(_backward_npi_broadcast_sub)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCBackwardUseIn{"one", "negone"});
-
-NNVM_REGISTER_OP(_npi_multiply)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"mul"});
-
-NNVM_REGISTER_OP(_backward_npi_broadcast_mul)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCBackwardUseIn{"right", "left"});
-
-NNVM_REGISTER_OP(_npi_mod).set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"mod"});
-
-NNVM_REGISTER_OP(_backward_npi_broadcast_mod)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCBackwardUseIn{"mod_grad", "mod_rgrad"});
-
-NNVM_REGISTER_OP(_npi_power)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"power"});
-
-NNVM_REGISTER_OP(_backward_npi_broadcast_power)
-    .set_attr<FCompute>("FCompute<gpu>",
-                        BinaryBroadcastRTCBackwardUseIn{"power_grad", "power_rgrad"});
-
-NNVM_REGISTER_OP(_npi_add_scalar)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"add"});
-
-NNVM_REGISTER_OP(_npi_subtract_scalar)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"sub"});
-
-NNVM_REGISTER_OP(_npi_rsubtract_scalar)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"rsub"});
-
-NNVM_REGISTER_OP(_npi_multiply_scalar)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"mul"});
-
-NNVM_REGISTER_OP(_npi_mod_scalar)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"mod"});
-
-NNVM_REGISTER_OP(_npi_rmod_scalar)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"rmod"});
-
-NNVM_REGISTER_OP(_npi_power_scalar)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"power"});
-
-NNVM_REGISTER_OP(_npi_rpower_scalar)
-    .set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"rpow"});
-
-}  // namespace op
-}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op.h b/src/operator/numpy/np_elemwise_broadcast_op.h
index 1f01bc6..0a4bf8d 100644
--- a/src/operator/numpy/np_elemwise_broadcast_op.h
+++ b/src/operator/numpy/np_elemwise_broadcast_op.h
@@ -26,6 +26,7 @@
 #define MXNET_OPERATOR_NUMPY_NP_ELEMWISE_BROADCAST_OP_H_
 
 #include <algorithm>
+#include <utility>
 #include <vector>
 #include <string>
 
@@ -533,6 +534,59 @@ void NumpyBinaryBackwardUseIn(const nnvm::NodeAttrs& attrs,
   }
 }
 
+#define MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(name)                        \
+  NNVM_REGISTER_OP(name)                                                      \
+      .set_num_inputs(1)                                                      \
+      .set_num_outputs(1)                                                     \
+      .set_attr_parser(ParamParser<NumpyBinaryScalarParam>)                   \
+      .set_attr<mxnet::FInferShape>("FInferShape", ElemwiseShape<1, 1>)       \
+      .set_attr<nnvm::FInferType>("FInferType", NumpyBinaryScalarType)        \
+      .set_attr<FResourceRequest>(                                            \
+          "FResourceRequest",                                                 \
+          [](const NodeAttrs& attrs) {                                        \
+            return std::vector<ResourceRequest>{ResourceRequest::kTempSpace}; \
+          })                                                                  \
+      .add_argument("data", "NDArray-or-Symbol", "source input")              \
+      .add_arguments(NumpyBinaryScalarParam::__FIELDS__())
+
+inline bool NumpyBinaryMixedPrecisionType(const nnvm::NodeAttrs& attrs,
+                                   std::vector<int>* in_attrs,
+                                   std::vector<int>* out_attrs) {
+  CHECK_EQ(in_attrs->size(), 2U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  const int ltype = in_attrs->at(0);
+  const int rtype = in_attrs->at(1);
+  if (ltype != -1 && rtype != -1 && (ltype != rtype)) {
+    // Only when both input types are known and not the same, we enter the mixed-precision mode
+    TYPE_ASSIGN_CHECK(*out_attrs, 0, common::np_binary_out_infer_type(ltype, rtype));
+  } else {
+    return ElemwiseType<2, 1>(attrs, in_attrs, out_attrs);
+  }
+  return true;
+}
+
+#define MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(name)                                   \
+  NNVM_REGISTER_OP(name)                                                                          \
+      .set_num_inputs(2)                                                                          \
+      .set_num_outputs(1)                                                                         \
+      .set_attr<nnvm::FListInputNames>("FListInputNames",                                         \
+                                       [](const NodeAttrs& attrs) {                               \
+                                         return std::vector<std::string>{"lhs", "rhs"};           \
+                                       })                                                         \
+      .set_attr<mxnet::FInferShape>("FInferShape", BinaryBroadcastShape)                          \
+      .set_attr<nnvm::FInferType>("FInferType", NumpyBinaryMixedPrecisionType)                    \
+      .set_attr<nnvm::FInplaceOption>("FInplaceOption",                                           \
+                                      [](const NodeAttrs& attrs) {                                \
+                                        return std::vector<std::pair<int, int> >{{0, 0}, {1, 0}}; \
+                                      })                                                          \
+      .set_attr<FResourceRequest>(                                                                \
+          "FResourceRequest",                                                                     \
+          [](const NodeAttrs& attrs) {                                                            \
+            return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};                     \
+          })                                                                                      \
+      .add_argument("lhs", "NDArray-or-Symbol", "First input to the function")                    \
+      .add_argument("rhs", "NDArray-or-Symbol", "Second input to the function")
+
 }  // namespace op
 }  // namespace mxnet
 #endif  // MXNET_OPERATOR_NUMPY_NP_ELEMWISE_BROADCAST_OP_H_
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_add.cc b/src/operator/numpy/np_elemwise_broadcast_op_add.cc
new file mode 100644
index 0000000..c39d6cd
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_add.cc
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_op_add.cc
+ * \brief CPU Implementation of basic functions for elementwise numpy binary add.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_add)
+.set_attr<FCompute>(
+  "FCompute<cpu>",
+  NumpyBinaryBroadcastComputeWithBool<cpu, op::mshadow_op::plus, op::mshadow_op::mixed_plus,
+                                      op::mshadow_op::mixed_plus>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_add"});
+
+NNVM_REGISTER_OP(_backward_npi_broadcast_add)
+.set_num_inputs(3)
+.set_num_outputs(2)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 0}, {0, 1}};
+  })
+.set_attr<FResourceRequest>("FResourceRequest",
+  [](const NodeAttrs& attrs) {
+    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+  })
+.set_attr<FCompute>("FCompute<cpu>", NumpyBinaryBackwardUseIn<cpu, mshadow_op::posone,
+                                                                mshadow_op::posone>);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_add.cu b/src/operator/numpy/np_elemwise_broadcast_op_add.cu
new file mode 100644
index 0000000..1448230
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_add.cu
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_op_add.cu
+ * \brief GPU Implementation of basic functions for elementwise binary broadcast add operator.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+NNVM_REGISTER_OP(_npi_add)
+.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"add"});
+
+NNVM_REGISTER_OP(_backward_npi_broadcast_add)
+.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCBackwardUseIn{"one", "one"});
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_mod.cc b/src/operator/numpy/np_elemwise_broadcast_op_mod.cc
new file mode 100644
index 0000000..76176b3
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_mod.cc
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_op_mod.cc
+ * \brief CPU Implementation of basic functions for elementwise numpy binary mod.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_mod)
+.set_attr<FCompute>(
+  "FCompute<cpu>",
+  NumpyBinaryBroadcastCompute<cpu, op::mshadow_op::mod, op::mshadow_op::mixed_mod,
+                                      op::mshadow_op::mixed_rmod>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_mod"});
+
+NNVM_REGISTER_OP(_backward_npi_broadcast_mod)
+.set_num_inputs(3)
+.set_num_outputs(2)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 1}};
+  })
+.set_attr<FResourceRequest>("FResourceRequest",
+  [](const NodeAttrs& attrs) {
+    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+  })
+.set_attr<FCompute>("FCompute<cpu>", NumpyBinaryBackwardUseIn<cpu, mshadow_op::mod_grad,
+                                                              mshadow_op::mod_rgrad>);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_mod.cu b/src/operator/numpy/np_elemwise_broadcast_op_mod.cu
new file mode 100644
index 0000000..6cb2ffa
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_mod.cu
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_op_mod.cu
+ * \brief GPU Implementation of basic functions for elementwise binary broadcast mod operator.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+NNVM_REGISTER_OP(_npi_mod)
+.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"mod"});
+
+NNVM_REGISTER_OP(_backward_npi_broadcast_mod)
+.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCBackwardUseIn{"mod_grad", "mod_rgrad"});
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_mul.cc b/src/operator/numpy/np_elemwise_broadcast_op_mul.cc
new file mode 100644
index 0000000..d8207a8
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_mul.cc
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_op_mul.cc
+ * \brief CPU Implementation of basic functions for elementwise numpy binary multiply.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_multiply)
+.set_attr<FCompute>(
+  "FCompute<cpu>",
+  NumpyBinaryBroadcastComputeWithBool<cpu, op::mshadow_op::mul, op::mshadow_op::mixed_mul,
+                                      op::mshadow_op::mixed_mul>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_mul"});
+
+NNVM_REGISTER_OP(_backward_npi_broadcast_mul)
+.set_num_inputs(3)
+.set_num_outputs(2)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 1}};
+  })
+.set_attr<FResourceRequest>("FResourceRequest",
+  [](const NodeAttrs& attrs) {
+    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+  })
+.set_attr<FCompute>("FCompute<cpu>", NumpyBinaryBackwardUseIn<cpu, mshadow_op::right,
+                                                              mshadow_op::left>);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_mul.cu b/src/operator/numpy/np_elemwise_broadcast_op_mul.cu
new file mode 100644
index 0000000..c8235e7
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_mul.cu
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_op_mul.cu
+ * \brief GPU Implementation of basic functions for elementwise binary broadcast multiply operator.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+NNVM_REGISTER_OP(_npi_multiply)
+.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"mul"});
+
+NNVM_REGISTER_OP(_backward_npi_broadcast_mul)
+.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCBackwardUseIn{"right", "left"});
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_pow.cc b/src/operator/numpy/np_elemwise_broadcast_op_pow.cc
new file mode 100644
index 0000000..e7a4f23
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_pow.cc
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_op_pow.cc
+ * \brief CPU Implementation of basic functions for elementwise numpy binary power.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_power)
+.set_attr<FCompute>(
+  "FCompute<cpu>",
+  NumpyBinaryBroadcastComputeWithBool<cpu, op::mshadow_op::power, op::mshadow_op::mixed_power,
+                                      op::mshadow_op::mixed_rpower>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_power"});
+
+NNVM_REGISTER_OP(_backward_npi_broadcast_power)
+.set_num_inputs(3)
+.set_num_outputs(2)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 1}};
+  })
+.set_attr<FResourceRequest>("FResourceRequest",
+  [](const NodeAttrs& attrs) {
+    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+  })
+.set_attr<FCompute>("FCompute<cpu>", NumpyBinaryBackwardUseIn<cpu, mshadow_op::power_grad,
+                                                              mshadow_op::power_rgrad>);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_pow.cu b/src/operator/numpy/np_elemwise_broadcast_op_pow.cu
new file mode 100644
index 0000000..234b5c8
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_pow.cu
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_op_pow.cu
+ * \brief GPU Implementation of basic functions for elementwise binary broadcast power operator.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+NNVM_REGISTER_OP(_npi_power)
+.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"power"});
+
+NNVM_REGISTER_OP(_backward_npi_broadcast_power)
+.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCBackwardUseIn{"power_grad", "power_rgrad"});
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc
new file mode 100644
index 0000000..c3a4f11
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cc
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_op.cc
+ * \brief CPU Implementation of basic functions for elementwise numpy binary broadcast operator.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(NumpyBinaryScalarParam);
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_add_scalar)
+.set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, op::mshadow_op::plus>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_copy"});
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_subtract_scalar)
+.set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, op::mshadow_op::minus>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_copy"});
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rsubtract_scalar)
+.set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, mshadow_op::rminus>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"negative"});
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_multiply_scalar)
+.set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, op::mshadow_op::mul>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_mul_scalar"});
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_mod_scalar)
+.set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, mshadow_op::mod>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_mod_scalar"});
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rmod_scalar)
+.set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, mshadow_op::rmod>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_rmod_scalar"});
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_power_scalar)
+.set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, mshadow_op::power>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_power_scalar"});
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_SCALAR(_npi_rpower_scalar)
+.set_attr<FCompute>("FCompute<cpu>", BinaryScalarOp::Compute<cpu, mshadow_op::rpower>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseOut{"_backward_rpower_scalar"});
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu
new file mode 100644
index 0000000..613b026
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_scalar.cu
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_op_add.cu
+ * \brief GPU Implementation of basic functions for elementwise binary add.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+NNVM_REGISTER_OP(_npi_add_scalar)
+.set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"add"});
+
+NNVM_REGISTER_OP(_npi_subtract_scalar)
+.set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"sub"});
+
+NNVM_REGISTER_OP(_npi_rsubtract_scalar)
+.set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"rsub"});
+
+NNVM_REGISTER_OP(_npi_multiply_scalar)
+.set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"mul"});
+
+NNVM_REGISTER_OP(_npi_mod_scalar)
+.set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"mod"});
+
+NNVM_REGISTER_OP(_npi_rmod_scalar)
+.set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"rmod"});
+
+NNVM_REGISTER_OP(_npi_power_scalar)
+.set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"power"});
+
+NNVM_REGISTER_OP(_npi_rpower_scalar)
+.set_attr<FCompute>("FCompute<gpu>", BinaryScalarRTCCompute{"rpow"});
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_sub.cc b/src/operator/numpy/np_elemwise_broadcast_op_sub.cc
new file mode 100644
index 0000000..f48cd09
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_sub.cc
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_binary_op_sub.cc
+ * \brief CPU Implementation of basic functions for elementwise numpy binary subtract.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+MXNET_OPERATOR_REGISTER_NP_BINARY_MIXED_PRECISION(_npi_subtract)
+.set_attr<FCompute>(
+  "FCompute<cpu>",
+  NumpyBinaryBroadcastCompute<cpu, op::mshadow_op::minus, op::mshadow_op::mixed_minus,
+                              op::mshadow_op::mixed_rminus>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_npi_broadcast_sub"});
+
+NNVM_REGISTER_OP(_backward_npi_broadcast_sub)
+.set_num_inputs(3)
+.set_num_outputs(2)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 0}, {0, 1}};
+  })
+.set_attr<FResourceRequest>("FResourceRequest",
+  [](const NodeAttrs& attrs) {
+    return std::vector<ResourceRequest>{ResourceRequest::kTempSpace};
+  })
+.set_attr<FCompute>("FCompute<cpu>", NumpyBinaryBackwardUseIn<cpu, mshadow_op::posone,
+                                                                mshadow_op::negone>);
+
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/numpy/np_elemwise_broadcast_op_sub.cu b/src/operator/numpy/np_elemwise_broadcast_op_sub.cu
new file mode 100644
index 0000000..59d4adf
--- /dev/null
+++ b/src/operator/numpy/np_elemwise_broadcast_op_sub.cu
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  Copyright (c) 2019 by Contributors
+ * \file np_elemwise_broadcast_op_sub.cu
+ * \brief GPU Implementation of basic functions for elementwise binary broadcast subtract operator.
+ */
+
+#include "./np_elemwise_broadcast_op.h"
+
+namespace mxnet {
+namespace op {
+
+NNVM_REGISTER_OP(_npi_subtract)
+.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCCompute{"sub"});
+
+NNVM_REGISTER_OP(_backward_npi_broadcast_sub)
+.set_attr<FCompute>("FCompute<gpu>", BinaryBroadcastRTCBackwardUseIn{"one", "negone"});
+
+}  // namespace op
+}  // namespace mxnet