You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by zh...@apache.org on 2018/11/20 07:02:10 UTC
[incubator-mxnet] branch master updated: [MXNET-1215] Allow dynamic
shape exists in imperative mode (#13283)
This is an automated email from the ASF dual-hosted git repository.
zhasheng pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 779bdc5 [MXNET-1215] Allow dynamic shape exists in imperative mode (#13283)
779bdc5 is described below
commit 779bdc5e7ee3abd6a2d23e8bd97d47fc08ae6bc5
Author: Junru Shao <ju...@gmail.com>
AuthorDate: Tue Nov 20 02:01:56 2018 -0500
[MXNET-1215] Allow dynamic shape exists in imperative mode (#13283)
* Add NDArray with lazy shape, Add boolean mask operator
* Fix lints
* Address comments and refactor forward pass
* Add backward pass for boolean index
* Fix tests....
* Add unittests
* Make lint happy
* Address comments
* Address comments, and complete docstring
* Update docstring
---
include/mxnet/ndarray.h | 36 ++++++++-
src/imperative/imperative.cc | 12 ++-
src/imperative/imperative_utils.h | 17 ++--
src/operator/contrib/boolean_mask-inl.h | 134 ++++++++++++++++++++++++++++++++
src/operator/contrib/boolean_mask.cc | 114 +++++++++++++++++++++++++++
tests/python/unittest/test_operator.py | 18 +++++
6 files changed, 320 insertions(+), 11 deletions(-)
diff --git a/include/mxnet/ndarray.h b/include/mxnet/ndarray.h
index e877d35..4ba13ca 100644
--- a/include/mxnet/ndarray.h
+++ b/include/mxnet/ndarray.h
@@ -103,7 +103,18 @@ class NDArray {
bool delay_alloc = true, int dtype = mshadow::default_type_flag,
std::vector<int> aux_types = {}, std::vector<TShape> aux_shapes = {},
TShape storage_shape = TShape(mshadow::Shape1(0)));
-
+ /*!
+ * \brief constructs a new dynamic NDArray whose shape is unknown,
+ * hence the NDArray is inherently lazily created
+ * \param ctx context of NDArray
+ * \param dtype data type of this ndarray
+ */
+ explicit NDArray(Context ctx, int dtype = mshadow::default_type_flag) {
+ ptr_ = std::make_shared<Chunk>(TShape(mshadow::Shape1(0)), ctx, true, dtype);
+ dtype_ = dtype;
+ storage_type_ = kDefaultStorage;
+ entry_ = {nullptr, 0, 0};
+ }
/*!
* \brief constructing a static NDArray that shares data with TBlob
* Use with caution: allocate ONLY ONE NDArray for each TBlob,
@@ -157,7 +168,20 @@ class NDArray {
: ptr_(std::make_shared<Chunk>(stype, data, aux_data, dev_id)), shape_(shape),
dtype_(data.type_flag_), storage_type_(stype), entry_({nullptr, 0, 0}) {
}
-
+ /*!
+ * \brief initialize the NDArray, assuming it is not assigned a meaningful shape before
+ * \param shape the shape of the NDArray
+ */
+ void Init(const TShape &shape) {
+ ptr_->Init(shape, this->dtype_);
+ this->shape_ = shape;
+ }
+ /*!
+ * \brief set the correct shape of NDArray directly from the storage_shape of its own chunk.
+ */
+ void SetShapeFromChunk() {
+ shape_ = ptr_->storage_shape;
+ }
/*
* This indicates whether an array is a view of another array (created by
* reshape or slice). If an array is a view and the the data is stored in
@@ -960,7 +984,13 @@ class NDArray {
#endif
}
}
-
+ /*! \brief initialize the shape and dtype, assuming it is not initialized before. */
+ void Init(const TShape &shape, int dtype) {
+ auto size = shape.Size();
+ storage_shape = shape;
+ shandle.size = size * mshadow::mshadow_sizeof(dtype);
+ this->CheckAndAlloc();
+ }
inline void CheckAndAlloc(const TShape &shape, const std::vector<TShape> &aux_shapes,
int dtype) {
// calculate size, perform allocation
diff --git a/src/imperative/imperative.cc b/src/imperative/imperative.cc
index fcbc09c..a381b23 100644
--- a/src/imperative/imperative.cc
+++ b/src/imperative/imperative.cc
@@ -106,8 +106,16 @@ OpStatePtr Imperative::Invoke(
SetShapeType(ctx, attrs, inputs, outputs, &dispatch_mode);
std::vector<OpReqType> req;
SetWriteInplaceReq(inputs, outputs, &req);
-
- return InvokeOp(ctx, attrs, inputs, outputs, req, dispatch_mode);
+ OpStatePtr ret = InvokeOp(ctx, attrs, inputs, outputs, req, dispatch_mode);
+ // the followinng loop is used for finding out the correct shape when some shapes are dynamic
+ for (size_t i = 0; i < outputs.size(); i++) {
+ if (outputs[i]->shape().ndim() == 0) {
+ // the WaitToRead overhead here does not seem to be avoidable
+ outputs[i]->WaitToRead();
+ outputs[i]->SetShapeFromChunk();
+ }
+ }
+ return ret;
}
void Imperative::MarkVariables(
diff --git a/src/imperative/imperative_utils.h b/src/imperative/imperative_utils.h
index 9c86843..4b0d131 100644
--- a/src/imperative/imperative_utils.h
+++ b/src/imperative/imperative_utils.h
@@ -117,11 +117,13 @@ inline void SetShapeType(const Context& ctx,
for (auto& i : outputs) {
out_shapes.push_back(i->shape());
}
- CHECK(infershape.count(attrs.op))
- << "Operator " << attrs.op->name << " is missing FInferShape attribute";
- CHECK(infershape[attrs.op](attrs, &in_shapes, &out_shapes));
- CHECK_EQ(out_shapes.size(), outputs.size());
-
+ bool is_dynamic_shape_existing = false;
+ if (!infershape.count(attrs.op)) {
+ is_dynamic_shape_existing = true;
+ } else {
+ CHECK(infershape[attrs.op](attrs, &in_shapes, &out_shapes));
+ CHECK_EQ(out_shapes.size(), outputs.size());
+ }
// infer type
std::vector<int>& in_types = ret->arg_types;
in_types.clear();
@@ -178,7 +180,10 @@ inline void SetShapeType(const Context& ctx,
for (size_t i = 0; i < outputs.size(); ++i) {
NDArrayStorageType storage_type = static_cast<NDArrayStorageType>(out_storage_types[i]);
if (outputs[i]->is_none()) {
- if (storage_type == kDefaultStorage) {
+ if (is_dynamic_shape_existing) {
+ // once there is dynamic shape somewhere, we could not pre-determine the shape.
+ *outputs[i] = NDArray(ctx, out_types[i]);
+ } else if (storage_type == kDefaultStorage) {
*outputs[i] = NDArray(out_shapes[i], ctx, true, out_types[i]);
} else {
*outputs[i] = NDArray(storage_type, out_shapes[i], ctx, true, out_types[i]);
diff --git a/src/operator/contrib/boolean_mask-inl.h b/src/operator/contrib/boolean_mask-inl.h
new file mode 100644
index 0000000..ac0681b
--- /dev/null
+++ b/src/operator/contrib/boolean_mask-inl.h
@@ -0,0 +1,134 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file boolean_mask-inl.h
+*/
+
+#ifndef MXNET_OPERATOR_CONTRIB_BOOLEAN_MASK_INL_H_
+#define MXNET_OPERATOR_CONTRIB_BOOLEAN_MASK_INL_H_
+
+#include <dmlc/logging.h>
+#include <dmlc/parameter.h>
+#include <mxnet/operator.h>
+#include <mxnet/ndarray.h>
+#include <map>
+#include <vector>
+#include <string>
+#include <utility>
+#include <algorithm>
+#include "../operator_common.h"
+#include "../mxnet_op.h"
+#include "../tensor/init_op.h"
+#include "../mshadow_op.h"
+#include "../elemwise_op_common.h"
+
+namespace mxnet {
+namespace op {
+
+struct BooleanMaskParam : public dmlc::Parameter<BooleanMaskParam> {
+ int axis;
+ DMLC_DECLARE_PARAMETER(BooleanMaskParam) {
+ DMLC_DECLARE_FIELD(axis).set_default(0)
+ .describe("An integer that represents the axis in NDArray to mask from.");
+ }
+};
+
+template<typename xpu>
+inline void BooleanMaskForward(const nnvm::NodeAttrs& attrs,
+ const OpContext &ctx,
+ const std::vector<NDArray> &inputs,
+ const std::vector<OpReqType> &req,
+ const std::vector<NDArray> &outputs) {
+ // TODO(@junrushao1994): This implementation is a proof-of-concept,
+ // hence very slow actually. Performance should be improved in the future.
+ CHECK_EQ(inputs.size(), 2U);
+ CHECK_EQ(outputs.size(), 1U);
+ const BooleanMaskParam& param = nnvm::get<BooleanMaskParam>(attrs.parsed);
+ const int axis = param.axis;
+ const NDArray &data = inputs[0];
+ const NDArray &idx = inputs[1];
+ const NDArray &out = outputs[0];
+ CHECK_EQ(axis, 0) << "Not supported yet";
+ CHECK_EQ(data.shape()[axis], idx.shape()[0]);
+ CHECK_EQ(idx.shape().ndim(), 1U);
+ // count the number of 1s in `idx`, so that we could know the output dimension
+ size_t valid_num = 0;
+ MSHADOW_TYPE_SWITCH(idx.dtype(), DType, {
+ DType* idx_dptr = idx.data().dptr<DType>();
+ int length = idx.shape()[0];
+ for (int i = 0; i < length; i++) {
+ if (idx_dptr[i]) {
+ ++valid_num;
+ }
+ }
+ });
+ // set the output shape forcefully
+ TShape s = data.shape();
+ s[axis] = valid_num;
+ const_cast<NDArray &>(out).Init(s);
+ // do the copy
+ MSHADOW_TYPE_SWITCH(idx.dtype(), DType, {
+ DType* idx_dptr = idx.data().dptr<DType>();
+ int length = idx.shape()[0];
+ mshadow::Stream<xpu> *stream = ctx.get_stream<xpu>();
+ for (int i = 0, j = 0; i < length; ++i) {
+ if (idx_dptr[i]) {
+ NDArray src = data.At(i);
+ NDArray dst = out.At(j++);
+ CHECK(src.shape() == dst.shape());
+ mxnet_op::copy(stream, dst.data(), src.data());
+ }
+ }
+ });
+}
+
+template<typename xpu>
+inline void BooleanMaskBackward(const nnvm::NodeAttrs& attrs,
+ const OpContext &ctx,
+ const std::vector<NDArray> &inputs,
+ const std::vector<OpReqType> &req,
+ const std::vector<NDArray> &outputs) {
+ CHECK_EQ(inputs.size(), 3U);
+ CHECK_EQ(outputs.size(), 2U);
+ // inputs: {ograd, data, idx}
+ // outputs: {igrad_data, igrad_idx}
+ const NDArray& ograd = inputs[0];
+ const NDArray& idx = inputs[2];
+ const NDArray& igrad_data = outputs[0];
+ MSHADOW_TYPE_SWITCH(idx.dtype(), DType, {
+ DType* idx_dptr = idx.data().dptr<DType>();
+ int length = idx.shape()[0];
+ mshadow::Stream<xpu> *stream = ctx.get_stream<xpu>();
+ Fill<false>(stream, igrad_data.data(), req[0], 0);
+ for (int i = 0, j = 0; i < length; ++i) {
+ if (idx_dptr[i]) {
+ NDArray src = ograd.At(j++);
+ NDArray dst = igrad_data.At(i);
+ CHECK(src.shape() == dst.shape());
+ mxnet_op::copy(stream, dst.data(), src.data());
+ }
+ }
+ });
+}
+
+} // namespace op
+} // namespace mxnet
+
+#endif // MXNET_OPERATOR_CONTRIB_BOOLEAN_MASK_INL_H_
diff --git a/src/operator/contrib/boolean_mask.cc b/src/operator/contrib/boolean_mask.cc
new file mode 100644
index 0000000..2dcafb6
--- /dev/null
+++ b/src/operator/contrib/boolean_mask.cc
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*!
+ * Copyright (c) 2018 by Contributors
+ * \file boolean_mask.cc
+*/
+
+#include "./boolean_mask-inl.h"
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(BooleanMaskParam);
+
+
+bool BooleanMaskType(const nnvm::NodeAttrs& attrs,
+ std::vector<int> *in_attrs,
+ std::vector<int> *out_attrs) {
+ CHECK_EQ(in_attrs->size(), 2);
+ CHECK_EQ(out_attrs->size(), 1);
+ TYPE_ASSIGN_CHECK(*out_attrs, 0, in_attrs->at(0));
+ TYPE_ASSIGN_CHECK(*in_attrs, 0, out_attrs->at(0));
+ return out_attrs->at(0) != -1;
+}
+
+bool BooleanMaskStorageType(const nnvm::NodeAttrs& attrs,
+ const int dev_mask,
+ DispatchMode* dispatch_mode,
+ std::vector<int> *in_attrs,
+ std::vector<int> *out_attrs) {
+ CHECK_EQ(in_attrs->size(), 2);
+ CHECK_EQ(out_attrs->size(), 1);
+ for (int &attr : *in_attrs) {
+ CHECK_EQ(attr, kDefaultStorage) << "Only default storage is supported";
+ }
+ for (int &attr : *out_attrs) {
+ attr = kDefaultStorage;
+ }
+ *dispatch_mode = DispatchMode::kFComputeEx;
+ return true;
+}
+
+bool BooleanMaskBackStorageType(const nnvm::NodeAttrs& attrs,
+ const int dev_mask,
+ DispatchMode* dispatch_mode,
+ std::vector<int> *in_attrs,
+ std::vector<int> *out_attrs) {
+ CHECK_EQ(in_attrs->size(), 3);
+ CHECK_EQ(out_attrs->size(), 2);
+ for (int &attr : *in_attrs) {
+ CHECK_EQ(attr, kDefaultStorage) << "Only default storage is supported";
+ }
+ for (int &attr : *out_attrs) {
+ attr = kDefaultStorage;
+ }
+ for (size_t i = 0; i < out_attrs->size(); i++)
+ out_attrs->at(i) = kDefaultStorage;
+ *dispatch_mode = DispatchMode::kFComputeEx;
+ return true;
+}
+
+NNVM_REGISTER_OP(_contrib_boolean_mask)
+.describe(R"code(
+Experimental CPU-only support for boolean masking.
+Given an n-d NDArray data, and a 1-d NDArray index,
+the operator produces an un-predeterminable shaped n-d NDArray out,
+which stands for the rows in x where the corresonding element in index is non-zero.
+
+>>> data = mx.nd.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]])
+>>> index = mx.nd.array([0, 1, 0])
+>>> out = mx.nd.contrib.boolean_mask(data, index)
+>>> out
+
+[[4. 5. 6.]]
+<NDArray 1x3 @cpu(0)>
+
+)code" ADD_FILELINE)
+.set_attr_parser(ParamParser<BooleanMaskParam>)
+.set_num_inputs(2)
+.set_num_outputs(1)
+.set_attr<nnvm::FInferType>("FInferType", BooleanMaskType)
+.set_attr<FComputeEx>("FComputeEx<cpu>", BooleanMaskForward<cpu>)
+.set_attr<FInferStorageType>("FInferStorageType", BooleanMaskStorageType)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseIn{"_backward_contrib_boolean_mask"})
+.add_argument("data", "NDArray-or-Symbol", "Data")
+.add_argument("index", "NDArray-or-Symbol", "Mask")
+.add_arguments(BooleanMaskParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_backward_contrib_boolean_mask)
+.set_num_inputs(3)
+.set_num_outputs(2)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<FInferStorageType>("FInferStorageType", BooleanMaskBackStorageType)
+.set_attr<FComputeEx>("FComputeEx<cpu>", BooleanMaskBackward<cpu>)
+.add_arguments(BooleanMaskParam::__FIELDS__());
+
+} // namespace op
+} // namespace mxnet
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 1bf9ca0..c270197 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -4793,6 +4793,24 @@ def test_index_copy():
assert same(x.grad.asnumpy(), x_grad.asnumpy())
assert same(t.grad.asnumpy(), t_grad.asnumpy())
+
+@with_seed()
+def test_boolean_mask():
+ if default_context().device_type != 'cpu':
+ return
+ data = mx.nd.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]])
+ index = mx.nd.array([0, 1, 0])
+ data.attach_grad()
+ with mx.autograd.record():
+ out = mx.nd.contrib.boolean_mask(data, index)
+ out.backward()
+ data.grad.wait_to_read()
+ expected = np.array([[4, 5, 6]])
+ expected_grad = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]])
+ assert same(out.asnumpy(), expected)
+ assert same(data.grad.asnumpy(), expected_grad)
+
+
@with_seed()
def test_div_sqrt_dim():
data_tmp = np.random.normal(0, 1, (5, 10, 8))