You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by cj...@apache.org on 2017/11/12 07:34:50 UTC
[incubator-mxnet] branch master updated: Fix changed OMP call in LaunchEx, add broadcast perf test (#8622)

This is an automated email from the ASF dual-hosted git repository.

cjolivier01 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new c11a608  Fix changed OMP call in LaunchEx, add broadcast perf test (#8622)
c11a608 is described below

commit c11a608310116678511d9bcad96f372309fbee4e
Author: Chris Olivier <cj...@gmail.com>
AuthorDate: Sat Nov 11 23:34:42 2017 -0800

    Fix changed OMP call in LaunchEx, add broadcast perf test (#8622)
---
 src/operator/mxnet_op.h              |   8 +--
 tests/cpp/operator/broadcast_perf.cc | 113 +++++++++++++++++++++++++++++++++++
 2 files changed, 117 insertions(+), 4 deletions(-)

diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h
index b2d5011..e5c3b51 100644
--- a/src/operator/mxnet_op.h
+++ b/src/operator/mxnet_op.h
@@ -373,12 +373,12 @@ struct Kernel<OP, cpu> {
   template<typename ...Args>
   inline static void LaunchEx(mshadow::Stream<cpu> *s, const int N, Args... args) {
 #ifdef _OPENMP
-    const int omp_cores = Engine::Get()->num_omp_threads_per_worker();
-    if (omp_cores <= 1) {
+    const int omp_threads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
+    if (omp_threads <= 1) {
       OP::Map(0, N, args...);
     } else {
-      int length = (N + omp_cores - 1) / omp_cores;
-      #pragma omp parallel for num_threads(omp_cores)
+      int length = (N + omp_threads - 1) / omp_threads;
+      #pragma omp parallel for num_threads(omp_threads)
       for (int i = 0; i < N; i += length) {
         OP::Map(i, i + length > N ? N - i : length, args...);
       }
diff --git a/tests/cpp/operator/broadcast_perf.cc b/tests/cpp/operator/broadcast_perf.cc
new file mode 100644
index 0000000..6986c4d
--- /dev/null
+++ b/tests/cpp/operator/broadcast_perf.cc
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ *  \file broadcast_perf.cc
+ *  \brief Perf/profile run of broadcast kernel
+ *  \author Chris Olivier
+ */
+#include <gtest/gtest.h>
+#include <mxnet/tensor_blob.h>
+#include "../include/test_op_runner.h"
+#include "../include/test_core_op.h"
+
+using namespace mxnet;
+
+using kwargs_t = test::op::kwargs_t;
+
+template<typename DType = float>
+static void RunCoreOpBidirectional(const bool isGPU,
+                                   const kwargs_t& op_kwargs,
+                                   const char *op_name,
+                                   const char *backward_op_name = "") {
+  const std::vector<TShape> shapes = { {2, 3}, {2, 1} };
+  test::op::CoreOpExecutor<DType> op(isGPU, shapes);
+  op.set_verbose(false);
+
+  op.Init(op.ArgsWithOpName(op_kwargs, op_name, backward_op_name));
+
+  PRINT_NDARRAYS(op.ctx().run_ctx, op.inputs());
+  PRINT_NDARRAYS(op.ctx().run_ctx, op.outputs());
+  op.Execute();
+  PRINT_NDARRAYS(op.ctx().run_ctx, op.outputs());
+  if (op.HasBackward()) {
+    PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_inputs());
+    PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_outputs());
+    op.ExecuteBackward();
+    PRINT_NDARRAYS(op.ctx().run_ctx, op.bwd_outputs());
+  }
+}
+
+/*!
+ * \brief Generic bidirectional sanity test
+ */
+TEST(BROADCAST_PERF, ExecuteBidirectional) {
+  RunCoreOpBidirectional(false, {}, "broadcast_add", "_backward_broadcast_add");
+}
+
+template<typename DType = float>
+static void RunCoreOpTimingTest(const bool isGPU,
+                                const kwargs_t& op_kwargs,
+                                const char *op_name,
+                                const char *backward_op_name = "") {
+  const kwargs_t kwargs = test::op::CoreOpExecutor<DType>::ArgsWithOpName(
+    op_kwargs, op_name, backward_op_name);
+
+  // prime code and cache before the performance runs
+  test::op::CoreOperatorRunner<DType> runner;
+  runner.RunBidirectional(false, { {2, 3}, {2, 1} }, kwargs, 1);
+
+  // Do the performance runs
+  std::vector<std::vector<TShape>> shapes;
+  if (test::performance_run) {
+    shapes = {
+      { {28,  28},  {28, 1} },
+      { {18,  32} , {18, 1} },
+      { {128, 128}, {128, 1} },
+      { {2560, 1280}, {2560, 1} }
+    };
+  } else {
+    shapes = {
+      { {28,  28},  {28, 1} },
+      { {128, 128}, {128, 1} }
+    };
+  }
+  const char *pu = isGPU ? "GPU" : "CPU";
+  for (const std::vector<TShape> &shape : shapes) {
+    runner.TimingTest(std::string(op_name) + " Operator " + pu, isGPU, false, kwargs,
+                      2, 10, shape);
+  }
+}
+
+/*!
+ * \brief ActivationOp timing test for CPU
+ */
+TEST(BROADCAST_PERF, TimingCPU) {
+  RunCoreOpTimingTest(false, {}, "broadcast_add", "_backward_broadcast_add");
+}
+
+#if MXNET_USE_CUDA == 1
+/*!
+ * \brief ActivationOp timing test for GPU
+ */
+TEST(BROADCAST_PERF, TimingGPU) {
+  RunCoreOpTimingTest(true, {}, "broadcast_add", "_backward_broadcast_add");
+}
+#endif  // MXNET_USE_CUDA == 1
+

-- 
To stop receiving notification emails like this one, please contact
['"commits@mxnet.apache.org" <co...@mxnet.apache.org>'].