You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by jx...@apache.org on 2017/11/21 00:31:52 UTC

[incubator-mxnet] 07/08: image to_tensor (#8691)

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch vision
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 303e8821d858f44da3b148e3535a94d3b59e1549
Author: Yizhi Liu <ja...@gmail.com>
AuthorDate: Thu Nov 16 23:01:37 2017 -0800

    image to_tensor (#8691)
---
 src/operator/image/image_common.h     |  7 +--
 src/operator/image/image_random-inl.h | 92 +++++++++++++++++++++++++++--------
 src/operator/image/image_random.cc    | 21 ++++++--
 src/operator/mxnet_op.h               |  3 +-
 4 files changed, 95 insertions(+), 28 deletions(-)

diff --git a/src/operator/image/image_common.h b/src/operator/image/image_common.h
index 7cf3f96..3b6b8e3 100644
--- a/src/operator/image/image_common.h
+++ b/src/operator/image/image_common.h
@@ -81,8 +81,9 @@ static cv::Mat mat_convert(TBlob input, int hight, int weight, int channel) {
   }
   return m;
 }
-} // namespace op
-} // namespace mxnet
+}  // namespace op
+}  // namespace mxnet
 
 
-#endif // MXNET_OPERATOR_IMAGE_IMAGE_COMMON_H_
\ No newline at end of file
+#endif  // MXNET_OPERATOR_IMAGE_IMAGE_COMMON_H_
+
diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index 8a3acf6..c50ecb7 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -25,15 +25,14 @@
 #ifndef MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
 #define MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
 
-#include <vector>
 #include <mxnet/base.h>
+#include <vector>
 #include <opencv2/opencv.hpp>
 #include <opencv2/core/mat.hpp>
 #include "../mxnet_op.h"
 #include "image_common.h"
 #include "../../operator/operator_common.h"
 
-
 namespace mxnet {
 namespace op {
 
@@ -47,13 +46,66 @@ static void RandomFlip(const nnvm::NodeAttrs &attrs,
                        const std::vector<OpReqType> &req,
                        const std::vector<TBlob> &outputs) {
 }
+
+inline bool ToTensorType(const nnvm::NodeAttrs& attrs,
+                         std::vector<int> *in_attrs,
+                         std::vector<int> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  CHECK_EQ((*in_attrs)[0], mshadow::kUint8)
+    << "`to_tensor` only supports uint8 input";
+  TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat32);
+  return (*in_attrs)[0] != -1;
+}
+
+inline bool ToTensorShape(const nnvm::NodeAttrs& attrs,
+                          std::vector<TShape> *in_attrs,
+                          std::vector<TShape> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  TShape &shp = (*in_attrs)[0];
+  CHECK_EQ(shp.ndim(), 3U) << "`to_tensor` only supports 3 dimensions";
+  TShape ret(3);
+  ret[0] = shp[2];
+  ret[1] = shp[0];
+  ret[2] = shp[1];
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
+  return true;
+}
+
 template<typename xpu>
 static void ToTensor(const nnvm::NodeAttrs &attrs,
                      const OpContext &ctx,
                      const std::vector<TBlob> &inputs,
                      const std::vector<OpReqType> &req,
                      const std::vector<TBlob> &outputs) {
+  auto input = inputs[0];
+  auto output = outputs[0];
+
+  int height = input.shape_[0];
+  int weight = input.shape_[1];
+  int channel = input.shape_[2];
+
+  typedef float   DstDType;
+  typedef uint8_t SrcDType;
+
+  CHECK_EQ(req[0], kWriteTo)
+    << "`to_tensor` does not support inplace";
+
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
+    auto input_3d =  input.get<xpu, 3, SrcDType>(s);
+    auto output_3d = output.get<xpu, 3, DstDType>(s);
+    for (int h = 0; h < height; ++h) {
+      for (int w = 0; w < weight; ++w) {
+        for (int c = 0; c < channel; ++c) {
+          Assign(output_3d[c][h][w], Req, DstDType(input_3d[h][w][c] / 255.0));
+        }
+      }
+    }
+  });
 }
+
 template<typename xpu>
 static void Normalize(const nnvm::NodeAttrs &attrs,
                       const OpContext &ctx,
@@ -81,20 +133,20 @@ static void RandomBrightness(const nnvm::NodeAttrs &attrs,
   auto input = inputs[0];
   auto output = outputs[0];
   int channel = input.shape_[0];
-  int hight = input.shape_[1];
+  int height = input.shape_[1];
   int weight = input.shape_[2];
   Stream<xpu> *s = ctx.get_stream<xpu>();
   Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
 
   const RandomBrightnessParam &param = nnvm::get<RandomBrightnessParam>(attrs.parsed);
-  float alpha_b = 1.0 + std::uniform_real_distribution<float>(-param.max_brightness, param.max_brightness)(prnd->GetRndEngine());
+  float alpha_b = 1.0 + std::uniform_real_distribution<float>(
+    -param.max_brightness, param.max_brightness)(prnd->GetRndEngine());
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
     MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
       mxnet_op::Kernel<mxnet_op::op_with_req<mshadow::op::mul, Req>, xpu>::Launch(
         s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(), DType(alpha_b));
     });
   });
-
 }
 
 struct RandomContrastParam : public dmlc::Parameter<RandomContrastParam> {
@@ -125,14 +177,15 @@ static void RandomContrast(const nnvm::NodeAttrs &attrs,
   auto input = inputs[0];
   auto output = outputs[0];
   int channel = input.shape_[0];
-  int hight = input.shape_[1];
+  int height = input.shape_[1];
   int weight = input.shape_[2];
   Stream<xpu> *s = ctx.get_stream<xpu>();
   Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
 
 
   const RandomContrastParam &param = nnvm::get<RandomContrastParam>(attrs.parsed);
-  float alpha_c = 1.0 + std::uniform_real_distribution<float>(-param.max_contrast, param.max_contrast)(prnd->GetRndEngine());
+  float alpha_c = 1.0 + std::uniform_real_distribution<float>(
+    -param.max_contrast, param.max_contrast)(prnd->GetRndEngine());
 
   const float R2YF = 0.299f;
   const float G2YF = 0.587f;
@@ -143,22 +196,21 @@ static void RandomContrast(const nnvm::NodeAttrs &attrs,
     auto input_3d = input.get<xpu, 3, DType>(s);
     DType sum = (DType)0.0;
     for (int c = 0; c < channel; ++c) {
-      for (int h = 0; h < hight; ++h) {
+      for (int h = 0; h < height; ++h) {
         for (int w = 0; w < weight; ++w) {
           sum += input_3d[c][h][w] * coeffs0[c];
         }
       }
     }
-    float gray_mean = sum / (float)(hight * weight);
+    float gray_mean = sum / static_cast<float>(height * weight);
     float beta = (1 - alpha_c) * gray_mean;
 
     MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
       mxnet_op::Kernel<mxnet_op::op_with_req<mul_add, Req>, xpu>::Launch(
-        s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(), DType(alpha_c), DType(beta));
+        s, inputs[0].Size(), outputs[0].dptr<DType>(),
+        inputs[0].dptr<DType>(), DType(alpha_c), DType(beta));
     });
-
   });
-
 }
 
 struct RandomSaturationParam : public dmlc::Parameter<RandomSaturationParam> {
@@ -180,12 +232,13 @@ static void RandomSaturation(const nnvm::NodeAttrs &attrs,
   auto input = inputs[0];
   auto output = outputs[0];
   int channel = input.shape_[0];
-  int hight = input.shape_[1];
+  int height = input.shape_[1];
   int weight = input.shape_[2];
   Stream<xpu> *s = ctx.get_stream<xpu>();
   Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
   const RandomSaturationParam &param = nnvm::get<RandomSaturationParam>(attrs.parsed);
-  float alpha_s = 1.0 + std::uniform_real_distribution<float>(-param.max_saturation, param.max_saturation)(prnd->GetRndEngine());
+  float alpha_s = 1.0 + std::uniform_real_distribution<float>(
+    -param.max_saturation, param.max_saturation)(prnd->GetRndEngine());
   float alpha_o = 1 - alpha_s;
   const float R2YF = 0.299f;
   const float G2YF = 0.587f;
@@ -202,20 +255,19 @@ static void RandomSaturation(const nnvm::NodeAttrs &attrs,
           Assign(output_3d, Req, input_3d)
           break;
         case 3:
-          for (int h = 0; h < hight; ++h) {
+          for (int h = 0; h < height; ++h) {
             for (int w = 0; w < weight; ++w) {
-              float gray = input_3d[0][h][w] * R2YF + input_3d[1][h][w] * G2YF + input_3d[2][h][w] * B2YF;
+              float gray =
+                input_3d[0][h][w] * R2YF + input_3d[1][h][w] * G2YF + input_3d[2][h][w] * B2YF;
               Assign(output_3d[0][h][w], Req, DType(gray * alpha_s + input_3d[0][h][w] * alpha_o))
             }
           }
           break;
         default:
           LOG(FATAL) << "not support channel" << channel;
-
       }
     });
   });
-
 }
 
 template<typename xpu>
@@ -245,7 +297,7 @@ static void RandomLighting(const nnvm::NodeAttrs &attrs,
 
 
 
-} // namespace op
-} // namespace mxnet
+}  // namespace op
+}  // namespace mxnet
 
 #endif  // MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 63f7904..26fa843 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -33,14 +33,27 @@
 namespace mxnet {
 namespace op {
 
+NNVM_REGISTER_OP(_image_to_tensor)
+.describe(R"code()code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
+  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
+})
+.set_attr<nnvm::FInferShape>("FInferShape", ToTensorShape)
+.set_attr<nnvm::FInferType>("FInferType", ToTensorType)
+.set_attr<FCompute>("FCompute<cpu>", ToTensor<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
+.add_argument("data", "NDArray-or-Symbol", "The input.");
+
 DMLC_REGISTER_PARAMETER(RandomBrightnessParam);
 NNVM_REGISTER_OP(_image_random_brightness)
 .describe(R"code()code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<RandomBrightnessParam>)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) { 
-  return std::vector<ResourceRequest>{ResourceRequest::kRandom}; 
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
+  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
 })
 .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
@@ -81,5 +94,5 @@ NNVM_REGISTER_OP(_image_random_saturation)
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomSaturationParam::__FIELDS__());
 
-}
-}
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h
index f6d91dc..707d0fa 100644
--- a/src/operator/mxnet_op.h
+++ b/src/operator/mxnet_op.h
@@ -316,7 +316,8 @@ struct op_with_req {
 
   /*! \brief input is tensor and two scalar value */
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in, const DType value_1, const DType value_2) {
+  MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in,
+                                  const DType value_1, const DType value_2) {
     KERNEL_ASSIGN(out[i], req, OP::Map(in[i], value_1, value_2));
   }
 

-- 
To stop receiving notification emails like this one, please contact
"commits@mxnet.apache.org" <co...@mxnet.apache.org>.