You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ak...@apache.org on 2021/08/26 10:35:51 UTC
[incubator-mxnet] branch v1.x updated: [v1.x] Common directory: mkldnn benchmarks (#20528)

This is an automated email from the ASF dual-hosted git repository.

akarbown pushed a commit to branch v1.x
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/v1.x by this push:
     new ced581d  [v1.x] Common directory: mkldnn benchmarks (#20528)
ced581d is described below

commit ced581d30df9a40d2cb5574b5860f73a0b9aa857
Author: mozga <ma...@intel.com>
AuthorDate: Thu Aug 26 12:33:30 2021 +0200

    [v1.x] Common directory: mkldnn benchmarks (#20528)
---
 .../mkldnn_benchmarks/adaptive_pooling_micro.py    |  64 ++++++++++++
 benchmark/opperf/mkldnn_benchmarks/fc_sum_micro.py | 116 +++++++++++++++++++++
 2 files changed, 180 insertions(+)

diff --git a/benchmark/opperf/mkldnn_benchmarks/adaptive_pooling_micro.py b/benchmark/opperf/mkldnn_benchmarks/adaptive_pooling_micro.py
new file mode 100644
index 0000000..793594c
--- /dev/null
+++ b/benchmark/opperf/mkldnn_benchmarks/adaptive_pooling_micro.py
@@ -0,0 +1,64 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#!/usr/bin/python
+import mxnet as mx
+import time
+import numpy as np
+
+
+def isSupported(x, y):
+    for i in range(2, len(x)):
+        s1 = x[i]
+        s2 = y[i]
+        if s2 == 0:
+            return False
+        if s1 % s2 != 0:
+            return False
+
+    IH = x[2]
+    IW = x[3]
+    OH = y[2]
+    OW = y[3]
+
+    strides_H = np.floor((IH << 1) / OH) - np.floor(IH / OH)
+    strides_W = np.floor((IW << 1) / OW) - np.floor(IW / OW)
+    kernel_H = np.ceil((IH << 1) / OH) - np.floor(IH / OH)
+    kernel_W = np.ceil((IW << 1) / OW) - np.floor(IW / OW)
+    pad_l_top = (strides_H * (OH - 1) + kernel_H - IH) / 2
+    pad_l_left = (strides_W * (OW - 1) + kernel_W - IW) / 2
+
+    return pad_l_top == 0 and pad_l_left == 0
+
+
+def time_procedure(shape, output_height, count):
+    data = mx.nd.random_uniform(shape=shape, low=-1.0, high=1.0)
+    mx.nd.waitall()
+    begin = time.time()
+    for i in range(0, count):
+        out = mx.nd.contrib.AdaptiveAvgPooling2D(data, output_size=output_height)
+        mx.nd.waitall()
+    return (time.time() - begin) / count
+
+
+count = 200
+for x in [1, 2, 4, 8, 16, 32]:
+    for y in [1, 2, 4, 8, 16, 32, 128, 256, 512, 1024, 2048]:
+        shape = (x, x, y, y)
+        for i in [1, 2, 4, 8, 16, 32]:
+            timing = time_procedure(shape, i, count)
+            print("{}x{:5d}:{:5d} | {:.7f}".format(shape, i, isSupported([x, x, y, y], [x, x, i, i]), timing))
diff --git a/benchmark/opperf/mkldnn_benchmarks/fc_sum_micro.py b/benchmark/opperf/mkldnn_benchmarks/fc_sum_micro.py
new file mode 100644
index 0000000..caa21d3
--- /dev/null
+++ b/benchmark/opperf/mkldnn_benchmarks/fc_sum_micro.py
@@ -0,0 +1,116 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#!/usr/bin/python
+import mxnet as mx
+from mxnet import nd
+from mxnet.gluon import nn
+import time
+SHAPES = [(1, 224), (16, 1024), (32, 4096), (512, 512)]
+num_hidden = [512, 1024, 4096]
+rounds = 5000
+warmup = 100
+
+
+class FCWithSum(nn.HybridBlock):
+    def __init__(self, num_in, num_hidden, **kwargs):
+        super(FCWithSum, self).__init__(**kwargs)
+        self.fc0 = nn.Dense(units=num_hidden, in_units=num_in)
+        self.fc1 = nn.Dense(units=num_hidden)
+
+    def hybrid_forward(self, F, data0, data1, data2):
+        _fc0 = self.fc0(data0)
+        _fc1 = self.fc1(data1)
+        _sum0 = data2 + _fc0
+        _sum1 = _fc1 + _sum0
+        return _sum1
+
+
+def benchmark_float():
+    for shape in SHAPES:
+        for nhid in num_hidden:
+            net = FCWithSum(shape[1], nhid)
+            net.initialize()
+            net.hybridize(static_alloc=True, static_shape=True)
+            data0 = mx.nd.random_uniform(shape=shape, low=-1.0, high=1.0)
+            data1 = mx.nd.random_uniform(shape=shape, low=-1.0, high=1.0)
+            shape2 = (shape[0], nhid)
+            data2 = mx.nd.random_uniform(shape=shape2, low=-1.0, high=1.0)
+            tic = 0
+            for i in range(rounds + warmup):
+                if i == warmup:
+                    tic = time.time()
+                o = net(data0, data1, data2)
+                o.wait_to_read()
+            toc = time.time()
+            print("Shape: ({:4}, {:4}) | num_hidden: {:4} | Time: {:8.3f} s | Mean: {:8.3f} ms".format(
+                shape[0], shape[1], nhid, toc - tic, 1000 * (toc-tic)/rounds))
+
+
+class CalibIter(mx.io.DataIter):
+    def __init__(self, batch, data_shape, batch_size):
+        super(CalibIter, self).__init__(batch_size)
+        self.label_shape = (batch_size,)
+        self.data_shape = data_shape
+        if isinstance(data_shape, tuple):
+            self.provide_data = [('data', data_shape)]
+        else:
+            self.provide_data = data_shape
+        self.provide_label = []
+        self.batch = batch
+
+    def __iter__(self):
+        yield self.batch
+
+
+def benchmark_int8():
+    for shape in SHAPES:
+        for nhid in num_hidden:
+            net = FCWithSum(shape[1], nhid)
+            net.initialize()
+            net.hybridize(static_alloc=True, static_shape=True)
+
+            data0 = mx.nd.random_uniform(shape=shape, low=-1.0, high=1.0)
+            data1 = mx.nd.random_uniform(shape=shape, low=-1.0, high=1.0)
+            shape2 = (shape[0], nhid)
+            data2 = mx.nd.random_uniform(shape=shape2, low=-1.0, high=1.0)
+            batch = mx.io.DataBatch([data0, data1, data2], [])
+            calib_data = CalibIter(batch, [mx.io.DataDesc("data0", shape=shape, dtype='float32'),
+                                           mx.io.DataDesc("data1", shape=shape, dtype='float32'),
+                                           mx.io.DataDesc("data2", shape=shape2, dtype='float32')], 1)
+            net_quantized = mx.contrib.quant.quantize_net_v2(net, quantized_dtype='auto',
+                                                             exclude_layers=None,
+                                                             exclude_layers_match=None,
+                                                             calib_data=calib_data,
+                                                             calib_mode='naive',
+                                                             quantize_mode='smart',
+                                                             num_calib_examples=1,
+                                                             ctx=mx.current_context())
+            tic = 0
+            for i in range(rounds + warmup):
+                if i == warmup:
+                    tic = time.time()
+                o = net_quantized(data0, data1, data2)
+                o.wait_to_read()
+            toc = time.time()
+            print("Shape: ({:4}, {:4}) | num_hidden: {:4} | Time: {:8.3f} s | Mean: {:8.3f} ms".format(
+                shape[0], shape[1], nhid, toc - tic, 1000 * (toc-tic)/rounds))
+
+
+benchmark_int8()
+print("------- float: ------")
+benchmark_float()