You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by GitBox <gi...@apache.org> on 2021/09/23 08:15:59 UTC

[GitHub] [incubator-mxnet] bartekkuncer commented on a change in pull request #20567: [Performance] Add oneDNN support for temperature parameter in Softmax

bartekkuncer commented on a change in pull request #20567:
URL: https://github.com/apache/incubator-mxnet/pull/20567#discussion_r714562666



##########
File path: src/operator/nn/mkldnn/mkldnn_softmax.cc
##########
@@ -23,189 +23,208 @@
  * \author Da Zheng
  */
 
-#include "./mkldnn_base-inl.h"
-#include "./mkldnn_ops-inl.h"
+#if MXNET_USE_ONEDNN == 1
 
-#include "../softmax-inl.h"
+#include "./mkldnn_softmax-inl.h"
 
-#if MXNET_USE_ONEDNN == 1
 namespace mxnet {
 namespace op {
 
-static mkldnn::softmax_forward::primitive_desc GetSoftmaxFwdPd(bool is_train,
-                                                               const int axis,
-                                                               const mkldnn::memory& input_mem) {
-  mkldnn::memory::desc data_md = input_mem.get_desc();
-  auto cpu_engine              = CpuEngine::Get()->get_engine();
-  auto prop = is_train ? mkldnn::prop_kind::forward_training : mkldnn::prop_kind::forward_scoring;
-  auto desc = mkldnn::softmax_forward::desc(prop, data_md, axis);
-  return mkldnn::softmax_forward::primitive_desc(desc, cpu_engine);
-}
-
-static mkldnn::softmax_backward::primitive_desc GetSoftmaxBwdPd(
-    const mkldnn::memory& diff_mem,
-    const mkldnn::memory& data_mem,
-    const int axis,
-    const mkldnn::softmax_forward::primitive_desc& hint_fwd_pd) {
-  mkldnn::memory::desc diff_md = diff_mem.get_desc();
-  mkldnn::memory::desc data_md = data_mem.get_desc();
-  auto cpu_engine              = CpuEngine::Get()->get_engine();
-  auto desc                    = mkldnn::softmax_backward::desc(diff_md, data_md, axis);
-  return mkldnn::softmax_backward::primitive_desc(desc, cpu_engine, hint_fwd_pd);
-}
-
 bool SupportMKLDNNSoftmax(const SoftmaxParam& param, const NDArray& data, const NDArray& output) {
-  // MKLDNN does not support temperature argument in their softmax function
-  // now. Need update this once they start to support it.
   const int ndim      = data.shape().ndim();
   const int in_dtype  = data.dtype();
   const int out_dtype = output.dtype();
   const int axis      = CheckAxis(param.axis, ndim);
-  // MKLDNN does not support temperature argument in their softmax function
-  // now. Need update this once they start to support it.
+
+  if (param.temperature.has_value() && param.temperature.value() == 0.0) {
+    return false;
+  }
+
   // Currently, MKLDNN shows bad performance when softmax is not performed on the last dimension
-  if (param.temperature.has_value() || in_dtype != mshadow::kFloat32 || in_dtype != out_dtype ||
-      axis != (ndim - 1)) {
+  if (in_dtype != mshadow::kFloat32 || in_dtype != out_dtype || axis != (ndim - 1)) {
     return false;
   }
 
-  // only supports ndim = 1, 2, 3, 4 for now
-  return (ndim >= 1 && ndim <= 4);
+  // only supports up to 6 ndim
+  return (ndim >= 1 && ndim <= 6);
 }
 
-class MKLDNNSoftmaxFwd {
- public:
-  mkldnn::softmax_forward::primitive_desc pd;
-
-  MKLDNNSoftmaxFwd(const bool is_train, const int axis, const mkldnn::memory& input)
-      : pd(GetSoftmaxFwdPd(is_train, axis, input)) {
-    fwd_ = std::make_shared<mkldnn::softmax_forward>(pd);
-  }
+void MKLDNNSoftmaxForward(const nnvm::NodeAttrs& attrs,
+                          const OpContext& ctx,
+                          const NDArray& in_data,
+                          const OpReqType& req,
+                          const NDArray& out_data) {
+  if (req == kNullOp)
+    return;
+  // same as the FCompute path, softmax only supports kWriteTo and kWriteInplace for now.
+  CHECK_NE(req, kAddTo);
 
-  const mkldnn::softmax_forward& GetFwd() const {
-    return *fwd_;
+  const auto& param = nnvm::get<SoftmaxParam>(attrs.parsed);
+  if (param.temperature.has_value()) {
+    TmpMemMgr::Get()->Init(ctx.requested[0]);
   }
 
- private:
-  std::shared_ptr<mkldnn::softmax_forward> fwd_;
-};
+  const bool is_train = ctx.is_train;
+  const auto tensors  = MKLDNNSoftmaxFwd::Tensors(in_data, out_data);
+  const auto& fwd     = MKLDNNSoftmaxFwd::GetCached(param, tensors, is_train);
+  fwd.Execute(tensors);
+}
 
 typedef ParamOpSign<SoftmaxParam> MKLDNNSoftmaxSignature;
-
-static MKLDNNSoftmaxFwd& GetSoftmaxFwd(const SoftmaxParam& param,
-                                       const int real_axis,
-                                       const bool is_train,
-                                       const NDArray& data,
-                                       const NDArray& output) {
+MKLDNNSoftmaxFwd& MKLDNNSoftmaxFwd::GetCached(const SoftmaxParam& param,
+                                              const Tensors& tensors,
+                                              const bool is_train) {
 #if DMLC_CXX11_THREAD_LOCAL
   static thread_local std::unordered_map<MKLDNNSoftmaxSignature, MKLDNNSoftmaxFwd, OpHash> fwds;
 #else
   static MX_THREAD_LOCAL std::unordered_map<MKLDNNSoftmaxSignature, MKLDNNSoftmaxFwd, OpHash> fwds;
 #endif
 
   MKLDNNSoftmaxSignature key(param);
-  key.AddSign(real_axis);
+  float temperature = param.temperature.has_value() ? param.temperature.value() : 1.0f;

Review comment:
       const?

##########
File path: src/operator/nn/mkldnn/mkldnn_softmax-inl.h
##########
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_softmax-inl.h
+ * Naming convention:
+ *                  ________
+ *                 |Softmax|
+ *  data  -------->|  FWD  |---> out
+ *                 |_______|
+ *                 ________
+ *                |Softmax|<--- out
+ *  data_grad <---|  BWD  |
+ *                |_______|<--- out_grad
+ */
+
+#ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_SOFTMAX_INL_H_
+#define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_SOFTMAX_INL_H_
+
+#if MXNET_USE_ONEDNN == 1
+#include <vector>
+
+#include "./mkldnn_base-inl.h"
+#include "./mkldnn_ops-inl.h"
+
+#include "../softmax-inl.h"
+
+namespace mxnet {
+namespace op {
+
+using softmax_fwd_t    = mkldnn::softmax_forward;
+using softmax_fwd_pd_t = mkldnn::softmax_forward::primitive_desc;
+
+using softmax_bwd_t    = mkldnn::softmax_backward;
+using softmax_bwd_pd_t = mkldnn::softmax_backward::primitive_desc;
+
+using linear_t    = mkldnn::eltwise_forward;
+using linear_pd_t = mkldnn::eltwise_forward::primitive_desc;
+
+class MKLDNNSoftmaxFwd {
+ public:
+  struct Tensors {
+    Tensors(const NDArray& data, const NDArray& out);
+
+    const NDArray& data;
+    const NDArray& out;
+  };
+
+  static MKLDNNSoftmaxFwd& GetCached(const SoftmaxParam& param,
+                                     const Tensors& tensors,
+                                     const bool is_train);
+
+  static softmax_fwd_pd_t GetSoftmaxFwdPd(const mkldnn::memory& input_mem,
+                                          const int axis,
+                                          const bool is_train);
+
+  static linear_pd_t GetTemperaturePd(const mkldnn::memory& input_mem, const float temperature);
+
+  MKLDNNSoftmaxFwd(const SoftmaxParam& param, const Tensors& tensors, const bool is_train);
+  void Execute(const Tensors& tensors) const;
+
+ private:
+  std::shared_ptr<softmax_fwd_pd_t> softmax_pd;
+  std::shared_ptr<softmax_fwd_t> softmax_fwd;
+  std::shared_ptr<linear_pd_t> temperature_pd;
+  std::shared_ptr<linear_t> temperature_fwd;
+};
+
+MKLDNNSoftmaxFwd::Tensors::Tensors(const NDArray& data, const NDArray& output)
+    : data(data), out(output) {}
+
+MKLDNNSoftmaxFwd::MKLDNNSoftmaxFwd(const SoftmaxParam& param,
+                                   const Tensors& tensors,
+                                   const bool is_train) {
+  float temperature = param.temperature.has_value() ? param.temperature.value() : 1.0f;
+  int axis          = CheckAxis(param.axis, tensors.data.shape().ndim());

Review comment:
       const?

##########
File path: src/operator/nn/mkldnn/mkldnn_softmax-inl.h
##########
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_softmax-inl.h
+ * Naming convention:
+ *                  ________
+ *                 |Softmax|
+ *  data  -------->|  FWD  |---> out
+ *                 |_______|
+ *                 ________
+ *                |Softmax|<--- out
+ *  data_grad <---|  BWD  |
+ *                |_______|<--- out_grad
+ */
+
+#ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_SOFTMAX_INL_H_
+#define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_SOFTMAX_INL_H_
+
+#if MXNET_USE_ONEDNN == 1
+#include <vector>
+
+#include "./mkldnn_base-inl.h"
+#include "./mkldnn_ops-inl.h"
+
+#include "../softmax-inl.h"
+
+namespace mxnet {
+namespace op {
+
+using softmax_fwd_t    = mkldnn::softmax_forward;
+using softmax_fwd_pd_t = mkldnn::softmax_forward::primitive_desc;
+
+using softmax_bwd_t    = mkldnn::softmax_backward;
+using softmax_bwd_pd_t = mkldnn::softmax_backward::primitive_desc;
+
+using linear_t    = mkldnn::eltwise_forward;
+using linear_pd_t = mkldnn::eltwise_forward::primitive_desc;
+
+class MKLDNNSoftmaxFwd {
+ public:
+  struct Tensors {
+    Tensors(const NDArray& data, const NDArray& out);
+
+    const NDArray& data;
+    const NDArray& out;
+  };
+
+  static MKLDNNSoftmaxFwd& GetCached(const SoftmaxParam& param,
+                                     const Tensors& tensors,
+                                     const bool is_train);
+
+  static softmax_fwd_pd_t GetSoftmaxFwdPd(const mkldnn::memory& input_mem,
+                                          const int axis,
+                                          const bool is_train);
+
+  static linear_pd_t GetTemperaturePd(const mkldnn::memory& input_mem, const float temperature);
+
+  MKLDNNSoftmaxFwd(const SoftmaxParam& param, const Tensors& tensors, const bool is_train);
+  void Execute(const Tensors& tensors) const;
+
+ private:
+  std::shared_ptr<softmax_fwd_pd_t> softmax_pd;
+  std::shared_ptr<softmax_fwd_t> softmax_fwd;
+  std::shared_ptr<linear_pd_t> temperature_pd;
+  std::shared_ptr<linear_t> temperature_fwd;
+};
+
+MKLDNNSoftmaxFwd::Tensors::Tensors(const NDArray& data, const NDArray& output)
+    : data(data), out(output) {}
+
+MKLDNNSoftmaxFwd::MKLDNNSoftmaxFwd(const SoftmaxParam& param,
+                                   const Tensors& tensors,
+                                   const bool is_train) {
+  float temperature = param.temperature.has_value() ? param.temperature.value() : 1.0f;
+  int axis          = CheckAxis(param.axis, tensors.data.shape().ndim());

Review comment:
       Should not this function have different name to describe its actual purpose?

##########
File path: src/operator/nn/mkldnn/mkldnn_softmax-inl.h
##########
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file mkldnn_softmax-inl.h
+ * Naming convention:
+ *                  ________
+ *                 |Softmax|
+ *  data  -------->|  FWD  |---> out
+ *                 |_______|
+ *                 ________
+ *                |Softmax|<--- out
+ *  data_grad <---|  BWD  |
+ *                |_______|<--- out_grad
+ */
+
+#ifndef MXNET_OPERATOR_NN_MKLDNN_MKLDNN_SOFTMAX_INL_H_
+#define MXNET_OPERATOR_NN_MKLDNN_MKLDNN_SOFTMAX_INL_H_
+
+#if MXNET_USE_ONEDNN == 1
+#include <vector>
+
+#include "./mkldnn_base-inl.h"
+#include "./mkldnn_ops-inl.h"
+
+#include "../softmax-inl.h"
+
+namespace mxnet {
+namespace op {
+
+using softmax_fwd_t    = mkldnn::softmax_forward;
+using softmax_fwd_pd_t = mkldnn::softmax_forward::primitive_desc;
+
+using softmax_bwd_t    = mkldnn::softmax_backward;
+using softmax_bwd_pd_t = mkldnn::softmax_backward::primitive_desc;
+
+using linear_t    = mkldnn::eltwise_forward;
+using linear_pd_t = mkldnn::eltwise_forward::primitive_desc;
+
+class MKLDNNSoftmaxFwd {
+ public:
+  struct Tensors {
+    Tensors(const NDArray& data, const NDArray& out);
+
+    const NDArray& data;
+    const NDArray& out;
+  };
+
+  static MKLDNNSoftmaxFwd& GetCached(const SoftmaxParam& param,
+                                     const Tensors& tensors,
+                                     const bool is_train);
+
+  static softmax_fwd_pd_t GetSoftmaxFwdPd(const mkldnn::memory& input_mem,
+                                          const int axis,
+                                          const bool is_train);
+
+  static linear_pd_t GetTemperaturePd(const mkldnn::memory& input_mem, const float temperature);
+
+  MKLDNNSoftmaxFwd(const SoftmaxParam& param, const Tensors& tensors, const bool is_train);
+  void Execute(const Tensors& tensors) const;
+
+ private:
+  std::shared_ptr<softmax_fwd_pd_t> softmax_pd;
+  std::shared_ptr<softmax_fwd_t> softmax_fwd;
+  std::shared_ptr<linear_pd_t> temperature_pd;
+  std::shared_ptr<linear_t> temperature_fwd;
+};
+
+MKLDNNSoftmaxFwd::Tensors::Tensors(const NDArray& data, const NDArray& output)
+    : data(data), out(output) {}
+
+MKLDNNSoftmaxFwd::MKLDNNSoftmaxFwd(const SoftmaxParam& param,
+                                   const Tensors& tensors,
+                                   const bool is_train) {
+  float temperature = param.temperature.has_value() ? param.temperature.value() : 1.0f;
+  int axis          = CheckAxis(param.axis, tensors.data.shape().ndim());
+  auto input_mem    = tensors.data.GetMKLDNNData();
+
+  softmax_pd  = std::make_shared<softmax_fwd_pd_t>(GetSoftmaxFwdPd(*input_mem, axis, is_train));
+  softmax_fwd = std::make_shared<softmax_fwd_t>(*softmax_pd);
+
+  if (temperature != 1.0f) {
+    temperature_pd  = std::make_shared<linear_pd_t>(GetTemperaturePd(*input_mem, temperature));
+    temperature_fwd = std::make_shared<linear_t>(*temperature_pd);
+  }
+}
+
+class MKLDNNSoftmaxBwd {
+ public:
+  struct Tensors {
+    Tensors(const std::vector<NDArray>& inputs, const std::vector<NDArray>& outputs);
+    const NDArray& out_grad;
+    const NDArray& out;
+    const NDArray& data_grad;
+  };
+  static MKLDNNSoftmaxBwd& GetCached(const SoftmaxParam& param, const Tensors& tensors);
+
+  static softmax_bwd_pd_t GetSoftmaxBwdPd(const mkldnn::memory& out_grad_mem,
+                                          const mkldnn::memory& out_mem,
+                                          const int axis,
+                                          const softmax_fwd_pd_t& hint_fwd_pd);
+
+  MKLDNNSoftmaxBwd(const SoftmaxParam& param, const Tensors& tensors);
+  void Execute(const Tensors& tensors, const std::vector<OpReqType>& req) const;
+
+ private:
+  std::shared_ptr<softmax_bwd_pd_t> softmax_bwd_pd;
+  std::shared_ptr<softmax_bwd_t> softmax_bwd;
+  std::shared_ptr<linear_pd_t> temperature_pd;
+  std::shared_ptr<linear_t> temperature_fwd;
+};
+
+MKLDNNSoftmaxBwd::Tensors::Tensors(const std::vector<NDArray>& inputs,
+                                   const std::vector<NDArray>& outputs)
+    : out_grad(inputs[0]), out(inputs[1]), data_grad(outputs[0]) {}
+
+MKLDNNSoftmaxBwd::MKLDNNSoftmaxBwd(const SoftmaxParam& param, const Tensors& tensors) {
+  float temperature   = param.temperature.has_value() ? param.temperature.value() : 1.0f;

Review comment:
       const?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@mxnet.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org