You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ak...@apache.org on 2021/07/28 16:00:18 UTC
[incubator-mxnet] branch master updated: Add new benchmark function
for single operator comparison (#20388)
This is an automated email from the ASF dual-hosted git repository.
akarbown pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 7077bc4 Add new benchmark function for single operator comparison (#20388)
7077bc4 is described below
commit 7077bc4c9f70fcb95c5882b9a63ed09ed7cf37f0
Author: AdamGrabowski <ad...@intel.com>
AuthorDate: Wed Jul 28 17:58:13 2021 +0200
Add new benchmark function for single operator comparison (#20388)
---
benchmark/opperf/README.md | 33 +++++++++
benchmark/opperf/rules/default_params.py | 5 ++
benchmark/opperf/utils/benchmark_utils.py | 110 ++++++++++++++++++++++++++----
3 files changed, 133 insertions(+), 15 deletions(-)
diff --git a/benchmark/opperf/README.md b/benchmark/opperf/README.md
index bb3fb8e..806d91a 100644
--- a/benchmark/opperf/README.md
+++ b/benchmark/opperf/README.md
@@ -172,6 +172,39 @@ Output for the above benchmark run, on a CPU machine, would look something like
Currently, opperf supports operators in `mx.nd.*` namespace.
However, locally, one can profile internal operators in `mx.nd.internal.*` namespace.
+## Usecase 6 - Compare performance for chosen operator from both NDArray library and its Numpy/Numpy_extension counterpart
+For example, you want to compare add operator from `mx.nd` and `mx.np`. You just run the following python script.
+
+```
+#!/usr/bin/python
+from benchmark.opperf.utils.benchmark_utils import run_benchmark_operator
+
+run_benchmark_operator(name = "add", run_backward=True)
+```
+
+Output for the above benchmark run, on a CPU machine, would look something like below:
+
+```
+<module 'mxnet.ndarray'>
+[{'add': [{'inputs': {'lhs': (128, 128), 'rhs': (128, 128)},
+ 'max_storage_mem_alloc_cpu/0': 32.768,
+ 'avg_time_forward_add': 0.0496,
+ 'avg_time_backward_add': 0.0793}]}]
+<module 'mxnet.numpy'>
+[{'add': [{'inputs': {'x1': (128, 128), 'x2': (128, 128)},
+ 'max_storage_mem_alloc_cpu/0': 32.768,
+ 'avg_time_forward_add': 0.0484,
+ 'avg_time_backward_add': 0.0898}]}]
+
+```
+This function uses `run_performance_test` function mentioned in Usecase 3 and Usecase 4 and it is possible to change all parameters from it.
+All arguments that are of type NDArray will be automatically provided with shape that is passed as `size`.
+If any fuction requires more arguments or different shaped NDArrays, provide those arguments as `additional_inputs` as it is shown below:
+```
+run_benchmark_operator(name = "pick", size = (128,128), additional_inputs = {"index": (128,1)})
+```
+
+
#### Changes
Remove the hasattr check for `op.__name__` to be in `mx.nd`
diff --git a/benchmark/opperf/rules/default_params.py b/benchmark/opperf/rules/default_params.py
index 4c90338..0474eea 100644
--- a/benchmark/opperf/rules/default_params.py
+++ b/benchmark/opperf/rules/default_params.py
@@ -781,3 +781,8 @@ PARAMS_OF_TYPE_NDARRAY = ["lhs", "rhs", "data", "base", "exp", "sample",
"grads_sum_sq", "mhs", "data1", "data2", "loc", "parameters", "state",
"state_cell"]
+PARAMS_OF_TYPE_NP_ARRAY = ["x1", "x2", "prototype", "object", "a", "b", "fill_value", "array", "x", "arr",
+ "values", "ary", "seq", "arrays", "tup", "indices", "m", "ar", "q", "p", "condition",
+ "arys", "v", "A", "xp", "fp", "data", "mask", "gamma", "beta", "running_mean",
+ "running_var", "weight", "index", "lhs", "rhs"]
+
diff --git a/benchmark/opperf/utils/benchmark_utils.py b/benchmark/opperf/utils/benchmark_utils.py
index b3bf821..38a1c15 100644
--- a/benchmark/opperf/utils/benchmark_utils.py
+++ b/benchmark/opperf/utils/benchmark_utils.py
@@ -16,35 +16,90 @@
# under the License.
import logging
+import inspect
import mxnet as mx
from mxnet import nd
+from mxnet import np
from .ndarray_utils import get_mx_ndarray, nd_forward_and_profile, nd_forward_backward_and_profile
from .common_utils import merge_map_list
from .op_registry_utils import prepare_op_inputs
-from benchmark.opperf.rules.default_params import PARAMS_OF_TYPE_NDARRAY
+from benchmark.opperf.rules.default_params import PARAMS_OF_TYPE_NDARRAY, PARAMS_OF_TYPE_NP_ARRAY
from .profiler_utils import cpp_profile, python_profile
no_backward = {'gather_nd', 'softmax_cross_entropy', 'linalg_gelqf', 'linalg_slogdet', 'moments', 'SequenceLast', 'Embedding'}
-def _prepare_op_inputs(inputs, run_backward, dtype, ctx):
+def _prepare_op_inputs(inputs, run_backward, dtype, ctx, module):
mx.random.seed(41)
kwargs_list = []
+ if module == 'mxnet.numpy_extension' or module == 'mxnet.numpy':
+ PARAMS_TYPE = PARAMS_OF_TYPE_NP_ARRAY
+ get_array_fn = get_mx_np_ndarray
+ else:
+ PARAMS_TYPE = PARAMS_OF_TYPE_NDARRAY
+ get_array_fn = get_mx_ndarray
for inp in inputs:
kwargs = {}
for key, value in inp.items():
- if key in PARAMS_OF_TYPE_NDARRAY:
- kwargs[key] = get_mx_ndarray(ctx=ctx, in_tensor=value,
- dtype=dtype,
- initializer=nd.normal,
- attach_grad=run_backward)
+ if key in PARAMS_TYPE:
+ kwargs[key] = get_array_fn(ctx=ctx, in_tensor=value,
+ dtype=dtype,
+ initializer=nd.normal,
+ attach_grad=run_backward)
else:
kwargs[key] = value
kwargs_list.append(kwargs)
return kwargs_list
+def get_mx_np_ndarray(ctx, in_tensor, dtype, initializer, attach_grad=True):
+ """Helper function to prepare a MXNet Numpy NDArray tensor in given Context (ctx) of type (dtype).
+ You can get a new Tensor by providing only "Shape" or "Numpy NDArray" or another MXNet NDArray as
+ "in_tensor".
+
+ NOTE: This is a sync call and waits for the Tensor to be created.
+
+ Parameters
+ ----------
+ ctx: mx.ctx, default mx.cpu()
+ Context of the new MXNet NDArray Tensor.
+ in_tensor: Numpy NDArray or MXNet NDArray or Tuple of shape
+ Can be a tuple of shape or Numpy NDArray or MXNet NDArray.
+ dtype: str
+ Precision or Dtype of the expected Tensor. Ex: "float32", "Int64"
+ initializer:
+ Function reference to the initialize to use. Ex: mx.nd.random.normal, mx.nd.zeros
+ attach_grad: Boolean, default True
+ To attach a gradient for the Tensor. Default is True.
+
+ Returns
+ -------
+ MXNet NDArray Tensor.
+ """
+ if isinstance(in_tensor, int) or isinstance(in_tensor, float):
+ return in_tensor
+
+ if isinstance(in_tensor, tuple):
+ nd_ndarray = get_mx_ndarray(ctx=ctx, in_tensor=in_tensor,
+ dtype="float32",
+ initializer=initializer,
+ attach_grad=attach_grad)
+ tensor = nd_ndarray.as_np_ndarray().astype(dtype=dtype)
+ elif isinstance(in_tensor, list):
+ tensor = np.array(in_tensor, ctx=ctx)
+ elif isinstance(in_tensor, nd.NDArray):
+ tensor = in_tensor.as_np_ndarray()
+ elif isinstance(in_tensor, np.ndarray):
+ tensor = in_tensor.as_in_context(ctx)
+ else:
+ raise ValueError("Invalid input type for creating input tensor. Input can be tuple() of shape or Numpy Array or"
+ " MXNet NDArray. Given - ", in_tensor)
+ if attach_grad:
+ tensor.attach_grad()
+
+ tensor.wait_to_read()
+ return tensor
def parse_input_ndarray(input_dict):
"""Parse input for ndarray and extract array shape for better readability
@@ -96,7 +151,7 @@ def parse_input_ndarray(input_dict):
return no_new_line_input_dict
-def _run_nd_operator_performance_test(op, inputs, run_backward, warmup, runs, kwargs_list, profiler):
+def _run_operator_performance_test(op, inputs, run_backward, warmup, runs, kwargs_list, profiler):
if profiler == 'native':
if run_backward:
benchmark_helper_func = cpp_profile(nd_forward_backward_and_profile)
@@ -141,10 +196,11 @@ def run_performance_test(ops, inputs, run_backward=True,
Parameters
----------
ops: [Str]
- One or list of operators to benchmark. Should be an NDArray operator.
+ One or list of operators to benchmark. Should be an NDArray, Numpy or Numpy_extension operator.
inputs: map
Inputs for operator. Key should be name of parameter for operator.
- Example: inputs = {"lhs": (1024, 1024), "rhs": (1024, 1024)} for mx.nd.add
+ Example: inputs = {"lhs": (1024, 1024), "rhs": (1024, 1024)} for mx.nd.add or
+ inputs = {"x1": (1024, 1024), "x2": (1024, 1024)} for mx.np.add
run_backward: Boolean, Default is True
Should we have backward operator benchmarks.
dtype: Str, default 'float32'
@@ -166,20 +222,44 @@ def run_performance_test(ops, inputs, run_backward=True,
Note: when run_performance_test is called on the nd.Embedding operator with run_backward=True, an error will
be thrown. Track issue here: https://github.com/apache/incubator-mxnet/issues/11314
"""
- kwargs_list = _prepare_op_inputs(inputs, run_backward, dtype, ctx)
-
if not isinstance(ops, list):
ops = [ops]
op_benchmark_result = []
for op in ops:
- if hasattr(mx.nd, op.__name__):
- benchmark_result = _run_nd_operator_performance_test(op, inputs, run_backward, warmup, runs, kwargs_list, profiler)
+ if hasattr(mx.nd, op.__name__) or hasattr(mx.np, op.__name__) or hasattr(mx.npx, op.__name__):
+ kwargs_list = _prepare_op_inputs(inputs, run_backward, dtype, ctx, op.__module__)
+ benchmark_result = _run_operator_performance_test(op, inputs, run_backward, warmup, runs, kwargs_list, profiler)
else:
- raise ValueError("Unknown NDArray operator provided to benchmark. - ", op.__name__)
+ raise ValueError("Unknown {0} operator provided to benchmark. - {1}".format(op.__module__, op.__name__))
op_benchmark_result.append(benchmark_result)
return op_benchmark_result
+def run_benchmark_operator(name, size = (128,128), additional_inputs = {},
+ dtype = 'float32', run_backward = False, ctx = mx.cpu(),
+ warmup=10, runs=50, profiler="native"):
+ arg_list = {mx.nd: PARAMS_OF_TYPE_NDARRAY, mx.np: PARAMS_OF_TYPE_NP_ARRAY, mx.npx: PARAMS_OF_TYPE_NP_ARRAY}
+ modules = [mx.nd, mx.np, mx.npx]
+ responses = []
+ for module in modules:
+ if hasattr(module, name):
+ function = getattr(module, name)
+ args = inspect.getargspec(function).args
+ inputs = {}
+ for arg in args:
+ if arg in additional_inputs.keys():
+ inputs.update({arg: additional_inputs[arg]})
+ elif arg in arg_list[module]:
+ inputs.update({arg:size})
+ res = run_performance_test(function, run_backward=run_backward, dtype=dtype, ctx=ctx,
+ inputs=[inputs],
+ warmup=warmup, runs=runs, profiler=profiler)
+ responses.append(res)
+ else:
+ responses.append(str(module.__name__) + " does not have operator " + name)
+ for i in range(len(modules)):
+ print(modules[i].__name__)
+ print(responses[i])
def run_op_benchmarks(ops, dtype, ctx, profiler, int64_tensor, warmup, runs):
# Running im2col either forwards or backwards on GPU results in errors