You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@mxnet.apache.org by jx...@apache.org on 2018/01/22 20:43:31 UTC

[incubator-mxnet] branch master updated (be8134d -> ab0d1d5)

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git.


    from be8134d  Updated proposal to use of codeowners file (#9448)
     new 3aef4e8  [WIP]Image Augmenter (#8633)
     new 992123d  [WIP]]Vision (#8649)
     new 1997319  add stub (#8650)
     new a0cff26  [WIP][vision]fix (#8657)
     new e81f2dd  fix image_random compile (#8665)
     new 7999c43  Add Gluon data transform (#8672)
     new 2957a33  image to_tensor (#8691)
     new 6b06bf2  [Image OP] Normalize (#8731)
     new 434b016  fix (#8736)
     new 6cfd5b4  add test script
     new b6698b8  Revert "add test script"
     new 29f6055  Refactor image operators (#8761)
     new 68fc799  [Image] add random lighting (#8779)
     new a15a6e7  image flip op (#8759)
     new 9dda920  [WIP]hue (#8678)
     new fb199fc  Vision (#8856)
     new f74f6e7  fix (#8857)
     new b9569ee  add comments and sanity check (#8901)
     new 62ffb92  [Vision] add test cases for flip, normalize, to_tensor (#8919)
     new ab0d1d5  lint

The 20 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docker/install/python.sh                           |   4 +-
 python/mxnet/base.py                               |   8 +-
 python/mxnet/gluon/data/dataset.py                 |  99 ++-
 .../mxnet/gluon/data/vision}/__init__.py           |   5 +-
 .../gluon/data/{vision.py => vision/datasets.py}   |   6 +-
 python/mxnet/gluon/data/vision/transforms.py       | 298 +++++++++
 python/mxnet/ndarray/__init__.py                   |   5 +-
 .../mxnet/{symbol/contrib.py => ndarray/image.py}  |   4 +-
 python/mxnet/symbol/__init__.py                    |   4 +-
 python/mxnet/symbol/{contrib.py => image.py}       |   4 +-
 src/operator/batch_norm_v1-inl.h                   |   2 +-
 src/operator/image/image_random-inl.h              | 713 +++++++++++++++++++++
 src/operator/image/image_random.cc                 | 132 ++++
 src/operator/mxnet_op.h                            |   7 +
 src/operator/random/multisample_op.h               |   2 +-
 src/operator/tensor/broadcast_reduce_op_index.cc   |   2 +-
 .../elemwise_binary_broadcast_op_extended.cc       |   2 +-
 .../tensor/elemwise_binary_broadcast_op_logic.cc   |   2 +-
 tests/python/unittest/test_gluon_data_vision.py    |  78 +++
 19 files changed, 1350 insertions(+), 27 deletions(-)
 copy {plugin/opencv => python/mxnet/gluon/data/vision}/__init__.py (91%)
 rename python/mxnet/gluon/data/{vision.py => vision/datasets.py} (99%)
 create mode 100644 python/mxnet/gluon/data/vision/transforms.py
 copy python/mxnet/{symbol/contrib.py => ndarray/image.py} (93%)
 copy python/mxnet/symbol/{contrib.py => image.py} (93%)
 create mode 100644 src/operator/image/image_random-inl.h
 create mode 100644 src/operator/image/image_random.cc
 create mode 100644 tests/python/unittest/test_gluon_data_vision.py

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 07/20: image to_tensor (#8691)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 2957a33e113258d3d783452a69fcd197d3d91295
Author: Yizhi Liu <ja...@gmail.com>
AuthorDate: Thu Nov 16 23:01:37 2017 -0800

    image to_tensor (#8691)
---
 src/operator/image/image_common.h     |  7 +--
 src/operator/image/image_random-inl.h | 92 +++++++++++++++++++++++++++--------
 src/operator/image/image_random.cc    | 21 ++++++--
 src/operator/mxnet_op.h               |  3 +-
 4 files changed, 95 insertions(+), 28 deletions(-)

diff --git a/src/operator/image/image_common.h b/src/operator/image/image_common.h
index 7cf3f96..3b6b8e3 100644
--- a/src/operator/image/image_common.h
+++ b/src/operator/image/image_common.h
@@ -81,8 +81,9 @@ static cv::Mat mat_convert(TBlob input, int hight, int weight, int channel) {
   }
   return m;
 }
-} // namespace op
-} // namespace mxnet
+}  // namespace op
+}  // namespace mxnet
 
 
-#endif // MXNET_OPERATOR_IMAGE_IMAGE_COMMON_H_
\ No newline at end of file
+#endif  // MXNET_OPERATOR_IMAGE_IMAGE_COMMON_H_
+
diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index 8a3acf6..c50ecb7 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -25,15 +25,14 @@
 #ifndef MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
 #define MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
 
-#include <vector>
 #include <mxnet/base.h>
+#include <vector>
 #include <opencv2/opencv.hpp>
 #include <opencv2/core/mat.hpp>
 #include "../mxnet_op.h"
 #include "image_common.h"
 #include "../../operator/operator_common.h"
 
-
 namespace mxnet {
 namespace op {
 
@@ -47,13 +46,66 @@ static void RandomFlip(const nnvm::NodeAttrs &attrs,
                        const std::vector<OpReqType> &req,
                        const std::vector<TBlob> &outputs) {
 }
+
+inline bool ToTensorType(const nnvm::NodeAttrs& attrs,
+                         std::vector<int> *in_attrs,
+                         std::vector<int> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  CHECK_EQ((*in_attrs)[0], mshadow::kUint8)
+    << "`to_tensor` only supports uint8 input";
+  TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat32);
+  return (*in_attrs)[0] != -1;
+}
+
+inline bool ToTensorShape(const nnvm::NodeAttrs& attrs,
+                          std::vector<TShape> *in_attrs,
+                          std::vector<TShape> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  TShape &shp = (*in_attrs)[0];
+  CHECK_EQ(shp.ndim(), 3U) << "`to_tensor` only supports 3 dimensions";
+  TShape ret(3);
+  ret[0] = shp[2];
+  ret[1] = shp[0];
+  ret[2] = shp[1];
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
+  return true;
+}
+
 template<typename xpu>
 static void ToTensor(const nnvm::NodeAttrs &attrs,
                      const OpContext &ctx,
                      const std::vector<TBlob> &inputs,
                      const std::vector<OpReqType> &req,
                      const std::vector<TBlob> &outputs) {
+  auto input = inputs[0];
+  auto output = outputs[0];
+
+  int height = input.shape_[0];
+  int weight = input.shape_[1];
+  int channel = input.shape_[2];
+
+  typedef float   DstDType;
+  typedef uint8_t SrcDType;
+
+  CHECK_EQ(req[0], kWriteTo)
+    << "`to_tensor` does not support inplace";
+
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
+    auto input_3d =  input.get<xpu, 3, SrcDType>(s);
+    auto output_3d = output.get<xpu, 3, DstDType>(s);
+    for (int h = 0; h < height; ++h) {
+      for (int w = 0; w < weight; ++w) {
+        for (int c = 0; c < channel; ++c) {
+          Assign(output_3d[c][h][w], Req, DstDType(input_3d[h][w][c] / 255.0));
+        }
+      }
+    }
+  });
 }
+
 template<typename xpu>
 static void Normalize(const nnvm::NodeAttrs &attrs,
                       const OpContext &ctx,
@@ -81,20 +133,20 @@ static void RandomBrightness(const nnvm::NodeAttrs &attrs,
   auto input = inputs[0];
   auto output = outputs[0];
   int channel = input.shape_[0];
-  int hight = input.shape_[1];
+  int height = input.shape_[1];
   int weight = input.shape_[2];
   Stream<xpu> *s = ctx.get_stream<xpu>();
   Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
 
   const RandomBrightnessParam &param = nnvm::get<RandomBrightnessParam>(attrs.parsed);
-  float alpha_b = 1.0 + std::uniform_real_distribution<float>(-param.max_brightness, param.max_brightness)(prnd->GetRndEngine());
+  float alpha_b = 1.0 + std::uniform_real_distribution<float>(
+    -param.max_brightness, param.max_brightness)(prnd->GetRndEngine());
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
     MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
       mxnet_op::Kernel<mxnet_op::op_with_req<mshadow::op::mul, Req>, xpu>::Launch(
         s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(), DType(alpha_b));
     });
   });
-
 }
 
 struct RandomContrastParam : public dmlc::Parameter<RandomContrastParam> {
@@ -125,14 +177,15 @@ static void RandomContrast(const nnvm::NodeAttrs &attrs,
   auto input = inputs[0];
   auto output = outputs[0];
   int channel = input.shape_[0];
-  int hight = input.shape_[1];
+  int height = input.shape_[1];
   int weight = input.shape_[2];
   Stream<xpu> *s = ctx.get_stream<xpu>();
   Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
 
 
   const RandomContrastParam &param = nnvm::get<RandomContrastParam>(attrs.parsed);
-  float alpha_c = 1.0 + std::uniform_real_distribution<float>(-param.max_contrast, param.max_contrast)(prnd->GetRndEngine());
+  float alpha_c = 1.0 + std::uniform_real_distribution<float>(
+    -param.max_contrast, param.max_contrast)(prnd->GetRndEngine());
 
   const float R2YF = 0.299f;
   const float G2YF = 0.587f;
@@ -143,22 +196,21 @@ static void RandomContrast(const nnvm::NodeAttrs &attrs,
     auto input_3d = input.get<xpu, 3, DType>(s);
     DType sum = (DType)0.0;
     for (int c = 0; c < channel; ++c) {
-      for (int h = 0; h < hight; ++h) {
+      for (int h = 0; h < height; ++h) {
         for (int w = 0; w < weight; ++w) {
           sum += input_3d[c][h][w] * coeffs0[c];
         }
       }
     }
-    float gray_mean = sum / (float)(hight * weight);
+    float gray_mean = sum / static_cast<float>(height * weight);
     float beta = (1 - alpha_c) * gray_mean;
 
     MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
       mxnet_op::Kernel<mxnet_op::op_with_req<mul_add, Req>, xpu>::Launch(
-        s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(), DType(alpha_c), DType(beta));
+        s, inputs[0].Size(), outputs[0].dptr<DType>(),
+        inputs[0].dptr<DType>(), DType(alpha_c), DType(beta));
     });
-
   });
-
 }
 
 struct RandomSaturationParam : public dmlc::Parameter<RandomSaturationParam> {
@@ -180,12 +232,13 @@ static void RandomSaturation(const nnvm::NodeAttrs &attrs,
   auto input = inputs[0];
   auto output = outputs[0];
   int channel = input.shape_[0];
-  int hight = input.shape_[1];
+  int height = input.shape_[1];
   int weight = input.shape_[2];
   Stream<xpu> *s = ctx.get_stream<xpu>();
   Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
   const RandomSaturationParam &param = nnvm::get<RandomSaturationParam>(attrs.parsed);
-  float alpha_s = 1.0 + std::uniform_real_distribution<float>(-param.max_saturation, param.max_saturation)(prnd->GetRndEngine());
+  float alpha_s = 1.0 + std::uniform_real_distribution<float>(
+    -param.max_saturation, param.max_saturation)(prnd->GetRndEngine());
   float alpha_o = 1 - alpha_s;
   const float R2YF = 0.299f;
   const float G2YF = 0.587f;
@@ -202,20 +255,19 @@ static void RandomSaturation(const nnvm::NodeAttrs &attrs,
           Assign(output_3d, Req, input_3d)
           break;
         case 3:
-          for (int h = 0; h < hight; ++h) {
+          for (int h = 0; h < height; ++h) {
             for (int w = 0; w < weight; ++w) {
-              float gray = input_3d[0][h][w] * R2YF + input_3d[1][h][w] * G2YF + input_3d[2][h][w] * B2YF;
+              float gray =
+                input_3d[0][h][w] * R2YF + input_3d[1][h][w] * G2YF + input_3d[2][h][w] * B2YF;
               Assign(output_3d[0][h][w], Req, DType(gray * alpha_s + input_3d[0][h][w] * alpha_o))
             }
           }
           break;
         default:
           LOG(FATAL) << "not support channel" << channel;
-
       }
     });
   });
-
 }
 
 template<typename xpu>
@@ -245,7 +297,7 @@ static void RandomLighting(const nnvm::NodeAttrs &attrs,
 
 
 
-} // namespace op
-} // namespace mxnet
+}  // namespace op
+}  // namespace mxnet
 
 #endif  // MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 63f7904..26fa843 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -33,14 +33,27 @@
 namespace mxnet {
 namespace op {
 
+NNVM_REGISTER_OP(_image_to_tensor)
+.describe(R"code()code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
+  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
+})
+.set_attr<nnvm::FInferShape>("FInferShape", ToTensorShape)
+.set_attr<nnvm::FInferType>("FInferType", ToTensorType)
+.set_attr<FCompute>("FCompute<cpu>", ToTensor<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
+.add_argument("data", "NDArray-or-Symbol", "The input.");
+
 DMLC_REGISTER_PARAMETER(RandomBrightnessParam);
 NNVM_REGISTER_OP(_image_random_brightness)
 .describe(R"code()code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<RandomBrightnessParam>)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) { 
-  return std::vector<ResourceRequest>{ResourceRequest::kRandom}; 
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
+  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
 })
 .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
@@ -81,5 +94,5 @@ NNVM_REGISTER_OP(_image_random_saturation)
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomSaturationParam::__FIELDS__());
 
-}
-}
+}  // namespace op
+}  // namespace mxnet
diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h
index 43f3a55..5a36954 100644
--- a/src/operator/mxnet_op.h
+++ b/src/operator/mxnet_op.h
@@ -368,7 +368,8 @@ struct op_with_req {
 
   /*! \brief input is tensor and two scalar value */
   template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in, const DType value_1, const DType value_2) {
+  MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in,
+                                  const DType value_1, const DType value_2) {
     KERNEL_ASSIGN(out[i], req, OP::Map(in[i], value_1, value_2));
   }
 

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 13/20: [Image] add random lighting (#8779)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 68fc799ff775c74a2f29513c09a9a708e15de79f
Author: Xingjian Shi <xs...@ust.hk>
AuthorDate: Wed Nov 22 15:53:37 2017 -0800

    [Image] add random lighting (#8779)
    
    * add random lighting
    
    * fix
---
 python/mxnet/gluon/data/vision/transforms.py    | 19 +++++
 src/operator/image/image_random-inl.h           | 95 +++++++++++++++++++++++++
 src/operator/image/image_random.cc              | 43 +++++++++--
 tests/python/unittest/test_gluon_data_vision.py | 40 +++++++++++
 4 files changed, 191 insertions(+), 6 deletions(-)

diff --git a/python/mxnet/gluon/data/vision/transforms.py b/python/mxnet/gluon/data/vision/transforms.py
index e1deef6..931d644 100644
--- a/python/mxnet/gluon/data/vision/transforms.py
+++ b/python/mxnet/gluon/data/vision/transforms.py
@@ -21,6 +21,7 @@ from .. import dataset
 from ...block import Block, HybridBlock
 from ...nn import Sequential, HybridSequential
 from .... import ndarray, initializer
+from ....base import _Null
 
 
 class Compose(Sequential):
@@ -151,3 +152,21 @@ class RandomColorJitter(HybridBlock):
 
     def hybrid_forward(self, F, x):
         return F.image.random_color_jitter(x, *self._args)
+
+
+class AdjustLighting(HybridBlock):
+    def __init__(self, alpha_rgb=_Null, eigval=_Null, eigvec=_Null):
+        super(AdjustLighting, self).__init__()
+        self._args = (alpha_rgb, eigval, eigvec)
+
+    def hybrid_forward(self, F, x):
+        return F.image.adjust_lighting(x, *self._args)
+
+
+class RandomLighting(HybridBlock):
+    def __init__(self, alpha_std=_Null, eigval=_Null, eigvec=_Null):
+        super(RandomLighting, self).__init__()
+        self._args = (alpha_std, eigval, eigvec)
+
+    def hybrid_forward(self, F, x):
+        return F.image.random_lighting(x, *self._args)
\ No newline at end of file
diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index f823c8c..ebbf60a 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -26,6 +26,7 @@
 #define MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
 
 #include <mxnet/base.h>
+#include <algorithm>
 #include <vector>
 #include <opencv2/opencv.hpp>
 #include <opencv2/core/mat.hpp>
@@ -290,11 +291,105 @@ static void RandomColorJitter(const nnvm::NodeAttrs &attrs,
                               const std::vector<TBlob> &outputs) {
 }
 
+struct AdjustLightingParam : public dmlc::Parameter<AdjustLightingParam> {
+  nnvm::Tuple<float> alpha_rgb;
+  nnvm::Tuple<float> eigval;
+  nnvm::Tuple<float> eigvec;
+  DMLC_DECLARE_PARAMETER(AdjustLightingParam) {
+    DMLC_DECLARE_FIELD(alpha_rgb)
+    .set_default({0, 0, 0})
+    .describe("The lighting alphas for the R, G, B channels.");
+    DMLC_DECLARE_FIELD(eigval)
+    .describe("Eigen value.")
+    .set_default({ 55.46, 4.794, 1.148 });
+    DMLC_DECLARE_FIELD(eigvec)
+    .describe("Eigen vector.")
+    .set_default({ -0.5675,  0.7192,  0.4009,
+                   -0.5808, -0.0045, -0.8140,
+                   -0.5808, -0.0045, -0.8140 });
+  }
+};
+
+struct RandomLightingParam : public dmlc::Parameter<RandomLightingParam> {
+  float alpha_std;
+  nnvm::Tuple<float> eigval;
+  nnvm::Tuple<float> eigvec;
+  DMLC_DECLARE_PARAMETER(RandomLightingParam) {
+    DMLC_DECLARE_FIELD(alpha_std)
+    .set_default(0.05)
+    .describe("Level of the lighting noise.");
+    DMLC_DECLARE_FIELD(eigval)
+    .describe("Eigen value.")
+    .set_default({ 55.46, 4.794, 1.148 });
+    DMLC_DECLARE_FIELD(eigvec)
+    .describe("Eigen vector.")
+    .set_default({ -0.5675,  0.7192,  0.4009,
+                   -0.5808, -0.0045, -0.8140,
+                   -0.5808, -0.0045, -0.8140 });
+  }
+};
+
+void AdjustLightingImpl(uint8_t* dst, const uint8_t* src,
+                        float alpha_r, float alpha_g, float alpha_b,
+                        const nnvm::Tuple<float> eigval, const nnvm::Tuple<float> eigvec,
+                        int H, int W) {
+    alpha_r *= eigval[0];
+    alpha_g *= eigval[1];
+    alpha_b *= eigval[2];
+    float pca_r = alpha_r * eigvec[0] + alpha_g * eigvec[1] + alpha_b * eigvec[2];
+    float pca_g = alpha_r * eigvec[3] + alpha_g * eigvec[4] + alpha_b * eigvec[5];
+    float pca_b = alpha_r * eigvec[6] + alpha_g * eigvec[7] + alpha_b * eigvec[8];
+    for (int i = 0; i < H * W; i++) {
+        int base_ind = 3 * i;
+        float in_r = static_cast<float>(src[base_ind]);
+        float in_g = static_cast<float>(src[base_ind + 1]);
+        float in_b = static_cast<float>(src[base_ind + 2]);
+        dst[base_ind] = std::min(255, std::max(0, static_cast<int>(in_r + pca_r)));
+        dst[base_ind + 1] = std::min(255, std::max(0, static_cast<int>(in_g + pca_g)));
+        dst[base_ind + 2] = std::min(255, std::max(0, static_cast<int>(in_b + pca_b)));
+    }
+}
+
+static void AdjustLighting(const nnvm::NodeAttrs &attrs,
+                           const OpContext &ctx,
+                           const std::vector<TBlob> &inputs,
+                           const std::vector<OpReqType> &req,
+                           const std::vector<TBlob> &outputs) {
+    using namespace mshadow;
+    const AdjustLightingParam &param = nnvm::get<AdjustLightingParam>(attrs.parsed);
+    CHECK_EQ(param.eigval.ndim(), 3) << "There should be 3 numbers in the eigval.";
+    CHECK_EQ(param.eigvec.ndim(), 9) << "There should be 9 numbers in the eigvec.";
+    CHECK_EQ(inputs[0].ndim(), 3);
+    CHECK_EQ(inputs[0].size(2), 3);
+    int H = inputs[0].size(0);
+    int W = inputs[0].size(1);
+    AdjustLightingImpl(outputs[0].dptr<uint8_t>(), inputs[0].dptr<uint8_t>(),
+                       param.alpha_rgb[0], param.alpha_rgb[1], param.alpha_rgb[2],
+                       param.eigval, param.eigvec, H, W);
+}
+
 static void RandomLighting(const nnvm::NodeAttrs &attrs,
                            const OpContext &ctx,
                            const std::vector<TBlob> &inputs,
                            const std::vector<OpReqType> &req,
                            const std::vector<TBlob> &outputs) {
+    using namespace mshadow;
+    const RandomLightingParam &param = nnvm::get<RandomLightingParam>(attrs.parsed);
+    CHECK_EQ(param.eigval.ndim(), 3) << "There should be 3 numbers in the eigval.";
+    CHECK_EQ(param.eigvec.ndim(), 9) << "There should be 9 numbers in the eigvec.";
+    CHECK_EQ(inputs[0].ndim(), 3);
+    CHECK_EQ(inputs[0].size(2), 3);
+    int H = inputs[0].size(0);
+    int W = inputs[0].size(1);
+    Stream<cpu> *s = ctx.get_stream<cpu>();
+    Random<cpu> *prnd = ctx.requested[0].get_random<cpu, real_t>(s);
+    std::normal_distribution<float> dist(0, param.alpha_std);
+    float alpha_r = dist(prnd->GetRndEngine());
+    float alpha_g = dist(prnd->GetRndEngine());
+    float alpha_b = dist(prnd->GetRndEngine());
+    AdjustLightingImpl(outputs[0].dptr<uint8_t>(), inputs[0].dptr<uint8_t>(),
+                       alpha_r, alpha_g, alpha_b,
+                       param.eigval, param.eigvec, H, W);
 }
 
 
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 7ff7328..5b47f50 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -35,9 +35,6 @@ NNVM_REGISTER_OP(_image_to_tensor)
 .describe(R"code()code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
-  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
-})
 .set_attr<nnvm::FInferShape>("FInferShape", ToTensorShape)
 .set_attr<nnvm::FInferType>("FInferType", ToTensorType)
 .set_attr<FCompute>("FCompute<cpu>", ToTensor)
@@ -51,9 +48,6 @@ NNVM_REGISTER_OP(_image_normalize)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<NormalizeParam>)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
-  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
-})
 .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
@@ -126,5 +120,42 @@ NNVM_REGISTER_OP(_image_random_saturation)
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomSaturationParam::__FIELDS__());
 
+DMLC_REGISTER_PARAMETER(AdjustLightingParam);
+NNVM_REGISTER_OP(_image_adjust_lighting)
+.describe(R"code(Adjust the lighting level of the input. Follow the AlexNet style.)code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<AdjustLightingParam>)
+.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 0}};
+  })
+.set_attr<FCompute>("FCompute<cpu>", AdjustLighting)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
+.add_argument("data", "NDArray-or-Symbol", "The input.")
+.add_arguments(AdjustLightingParam::__FIELDS__());
+
+DMLC_REGISTER_PARAMETER(RandomLightingParam);
+NNVM_REGISTER_OP(_image_random_lighting)
+.describe(R"code(Randomly add PCA noise. Follow the AlexNet style.)code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<RandomLightingParam>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
+  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
+})
+.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 0}};
+  })
+.set_attr<FCompute>("FCompute<cpu>", RandomLighting)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
+.add_argument("data", "NDArray-or-Symbol", "The input.")
+.add_arguments(RandomLightingParam::__FIELDS__());
+
 }  // namespace op
 }  // namespace mxnet
diff --git a/tests/python/unittest/test_gluon_data_vision.py b/tests/python/unittest/test_gluon_data_vision.py
new file mode 100644
index 0000000..0c9e5c1
--- /dev/null
+++ b/tests/python/unittest/test_gluon_data_vision.py
@@ -0,0 +1,40 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+from __future__ import print_function
+import mxnet as mx
+import mxnet.ndarray as nd
+import numpy as np
+from mxnet import gluon
+from mxnet.gluon.data.vision.transforms import AdjustLighting
+from mxnet.test_utils import assert_almost_equal
+
+def test_adjust_lighting():
+    data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
+    alpha_rgb = [0.05, 0.06, 0.07]
+    eigval = np.array([55.46, 4.794, 1.148])
+    eigvec = np.array([[-0.5675, 0.7192, 0.4009],
+                       [-0.5808, -0.0045, -0.8140],
+                       [-0.5808, -0.0045, -0.8140]])
+    f = AdjustLighting(alpha_rgb=alpha_rgb, eigval=eigval.ravel().tolist(), eigvec=eigvec.ravel().tolist())
+    out_nd = f(nd.array(data_in, dtype=np.uint8))
+    out_gt = np.clip(data_in.astype(np.float32)
+                     + np.dot(eigvec * alpha_rgb, eigval.reshape((3, 1))).reshape((1, 1, 3)), 0, 255).astype(np.uint8)
+    assert_almost_equal(out_nd.asnumpy(), out_gt)
+
+if __name__ == '__main__':
+    import nose
+    nose.runmodule()

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 09/20: fix (#8736)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 434b0161c4abf1795e97b8bc80c03e335a6f0174
Author: Eric Junyuan Xie <pi...@users.noreply.github.com>
AuthorDate: Mon Nov 20 22:06:32 2017 -0800

    fix (#8736)
---
 python/mxnet/gluon/data/dataset.py           | 2 +-
 python/mxnet/gluon/data/vision/datasets.py   | 6 +++---
 python/mxnet/gluon/data/vision/transforms.py | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/python/mxnet/gluon/data/dataset.py b/python/mxnet/gluon/data/dataset.py
index 35d4c5c..f7ab395 100644
--- a/python/mxnet/gluon/data/dataset.py
+++ b/python/mxnet/gluon/data/dataset.py
@@ -18,7 +18,7 @@
 # coding: utf-8
 # pylint: disable=
 """Dataset container."""
-__all__ = ['Dataset', 'SimpleDataset', 'ArrayDataset', 'LabeledDataset',
+__all__ = ['Dataset', 'SimpleDataset', 'ArrayDataset',
            'RecordFileDataset']
 
 import os
diff --git a/python/mxnet/gluon/data/vision/datasets.py b/python/mxnet/gluon/data/vision/datasets.py
index cb40af5..4ddc2e3 100644
--- a/python/mxnet/gluon/data/vision/datasets.py
+++ b/python/mxnet/gluon/data/vision/datasets.py
@@ -28,9 +28,9 @@ import struct
 import warnings
 import numpy as np
 
-from . import dataset
-from ..utils import download, check_sha1
-from ... import nd, image, recordio
+from .. import dataset
+from ...utils import download, check_sha1
+from .... import nd, image, recordio
 
 apache_repo_url = 'https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/'
 
diff --git a/python/mxnet/gluon/data/vision/transforms.py b/python/mxnet/gluon/data/vision/transforms.py
index fa7c0f2..e1deef6 100644
--- a/python/mxnet/gluon/data/vision/transforms.py
+++ b/python/mxnet/gluon/data/vision/transforms.py
@@ -58,7 +58,7 @@ class ToTensor(HybridBlock):
         super(ToTensor, self).__init__()
 
     def hybrid_forward(self, F, x):
-        return F.cast(x, 'float32').transpose((2, 0, 1))
+        return F.image.to_tensor(x)
 
 
 class Normalize(HybridBlock):

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 19/20: [Vision] add test cases for flip, normalize, to_tensor (#8919)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 62ffb9271f2237728a4dd75fe01f31782ed13e96
Author: Yizhi Liu <ja...@gmail.com>
AuthorDate: Tue Dec 12 13:28:41 2017 -0800

    [Vision] add test cases for flip, normalize, to_tensor (#8919)
    
    * [vision] ut for to_tensor, normalize, flip
    
    * [vision] fix flip
    
    * [vision] flip name
    
    * [vision] test non-random flip op
    
    * remove transform.FlipXXXX
---
 python/mxnet/gluon/data/vision/transforms.py    | 16 +++---
 src/operator/image/image_random-inl.h           | 68 +++++++++++++++++--------
 src/operator/image/image_random.cc              | 16 ++++--
 tests/python/unittest/test_gluon_data_vision.py | 41 ++++++++++-----
 4 files changed, 96 insertions(+), 45 deletions(-)

diff --git a/python/mxnet/gluon/data/vision/transforms.py b/python/mxnet/gluon/data/vision/transforms.py
index 8daf88e..38eb690 100644
--- a/python/mxnet/gluon/data/vision/transforms.py
+++ b/python/mxnet/gluon/data/vision/transforms.py
@@ -184,26 +184,26 @@ class Resize(Block):
         return image.imresize(x, *self._args)
 
 
-class RandomHorizontalFlip(HybridBlock):
-    """Randomly flip the input image horizontally with a probability
+class RandomFlipLeftRight(HybridBlock):
+    """Randomly flip the input image left to right with a probability
     of 0.5.
     """
     def __init__(self):
-        super(RandomHorizontalFlip, self).__init__()
+        super(RandomFlipLeftRight, self).__init__()
 
     def hybrid_forward(self, F, x):
-        return F.image.random_horizontal_flip(x)
+        return F.image.random_flip_left_right(x)
 
 
-class RandomVerticalFlip(HybridBlock):
-    """Randomly flip the input image vertically with a probability
+class RandomFlipTopBottom(HybridBlock):
+    """Randomly flip the input image top to bottom with a probability
     of 0.5.
     """
     def __init__(self):
-        super(RandomVerticalFlip, self).__init__()
+        super(RandomFlipTopBottom, self).__init__()
 
     def hybrid_forward(self, F, x):
-        return F.image.random_vertical_flip(x)
+        return F.image.random_flip_top_bottom(x)
 
 
 class RandomBrightness(HybridBlock):
diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index cbc7f40..ec96149 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -85,14 +85,6 @@ void ToTensor(const nnvm::NodeAttrs &attrs,
   });
 }
 
-inline bool TensorShape(const nnvm::NodeAttrs& attrs,
-                       std::vector<TShape> *in_attrs,
-                       std::vector<TShape> *out_attrs) {
-  TShape& dshape = (*in_attrs)[0];
-  SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape);
-  return true;
-}
-
 struct NormalizeParam : public dmlc::Parameter<NormalizeParam> {
   nnvm::Tuple<float> mean;
   nnvm::Tuple<float> std;
@@ -179,16 +171,16 @@ inline bool ImageShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-template<typename DType>
-void FlipImpl(const TShape &shape, DType *src, DType *dst, int axis) {
+template<typename DType, int axis>
+void FlipImpl(const TShape &shape, DType *src, DType *dst) {
   int head = 1, mid = shape[axis], tail = 1;
   for (int i = 0; i < axis; ++i) head *= shape[i];
   for (int i = axis+1; i < shape.ndim(); ++i) tail *= shape[i];
 
   for (int i = 0; i < head; ++i) {
-    for (int j = 0; j < (mid >>2); ++j) {
-      int idx1 = (i*mid + j)*tail;
-      int idx2 = idx1 + (mid - (j<<2))*tail;
+    for (int j = 0; j < (mid >> 1); ++j) {
+      int idx1 = (i*mid + j) * tail;
+      int idx2 = idx1 + (mid-(j << 1)-1) * tail;
       for (int k = 0; k < tail; ++k, ++idx1, ++idx2) {
         DType tmp = src[idx1];
         dst[idx1] = src[idx2];
@@ -198,7 +190,31 @@ void FlipImpl(const TShape &shape, DType *src, DType *dst, int axis) {
   }
 }
 
-void RandomHorizontalFlip(
+void FlipLeftRight(const nnvm::NodeAttrs &attrs,
+                   const OpContext &ctx,
+                   const std::vector<TBlob> &inputs,
+                   const std::vector<OpReqType> &req,
+                   const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    FlipImpl<DType, 1>(inputs[0].shape_, inputs[0].dptr<DType>(),
+                       outputs[0].dptr<DType>());
+  });
+}
+
+void FlipTopBottom(const nnvm::NodeAttrs &attrs,
+                   const OpContext &ctx,
+                   const std::vector<TBlob> &inputs,
+                   const std::vector<OpReqType> &req,
+                   const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    FlipImpl<DType, 0>(inputs[0].shape_, inputs[0].dptr<DType>(),
+                       outputs[0].dptr<DType>());
+  });
+}
+
+void RandomFlipLeftRight(
     const nnvm::NodeAttrs &attrs,
     const OpContext &ctx,
     const std::vector<TBlob> &inputs,
@@ -207,14 +223,19 @@ void RandomHorizontalFlip(
   using namespace mshadow;
   Stream<cpu> *s = ctx.get_stream<cpu>();
   Random<cpu> *prnd = ctx.requested[0].get_random<cpu, float>(s);
-  if (std::bernoulli_distribution()(prnd->GetRndEngine())) return;
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-    FlipImpl(inputs[0].shape_, inputs[0].dptr<DType>(),
-             outputs[0].dptr<DType>(), 1);
+    if (std::bernoulli_distribution()(prnd->GetRndEngine())) {
+      if (outputs[0].dptr_ != inputs[0].dptr_) {
+        std::memcpy(outputs[0].dptr_, inputs[0].dptr_, inputs[0].Size() * sizeof(DType));
+      }
+    } else {
+      FlipImpl<DType, 1>(inputs[0].shape_, inputs[0].dptr<DType>(),
+                         outputs[0].dptr<DType>());
+    }
   });
 }
 
-void RandomVerticalFlip(
+void RandomFlipTopBottom(
     const nnvm::NodeAttrs &attrs,
     const OpContext &ctx,
     const std::vector<TBlob> &inputs,
@@ -223,10 +244,15 @@ void RandomVerticalFlip(
   using namespace mshadow;
   Stream<cpu> *s = ctx.get_stream<cpu>();
   Random<cpu> *prnd = ctx.requested[0].get_random<cpu, float>(s);
-  if (std::bernoulli_distribution()(prnd->GetRndEngine())) return;
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-    FlipImpl(inputs[0].shape_, inputs[0].dptr<DType>(),
-             outputs[0].dptr<DType>(), 0);
+    if (std::bernoulli_distribution()(prnd->GetRndEngine())) {
+      if (outputs[0].dptr_ != inputs[0].dptr_) {
+        std::memcpy(outputs[0].dptr_, inputs[0].dptr_, inputs[0].Size() * sizeof(DType));
+      }
+    } else {
+      FlipImpl<DType, 0>(inputs[0].shape_, inputs[0].dptr<DType>(),
+                         outputs[0].dptr<DType>());
+    }
   });
 }
 
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 481dfce..26f520b 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -48,7 +48,6 @@ NNVM_REGISTER_OP(_image_to_tensor)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
 .add_argument("data", "NDArray-or-Symbol", "The input.");
 
-
 NNVM_REGISTER_OP(_image_normalize)
 .describe(R"code()code" ADD_FILELINE)
 .set_num_inputs(1)
@@ -65,11 +64,21 @@ NNVM_REGISTER_OP(_image_normalize)
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(NormalizeParam::__FIELDS__());
 
+MXNET_REGISTER_IMAGE_AUG_OP(_image_flip_left_right)
+.describe(R"code()code" ADD_FILELINE)
+.set_attr<FCompute>("FCompute<cpu>", FlipLeftRight);
+
+MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_flip_left_right)
+.describe(R"code()code" ADD_FILELINE)
+.set_attr<FCompute>("FCompute<cpu>", RandomFlipLeftRight);
 
-MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_horizontal_flip)
+MXNET_REGISTER_IMAGE_AUG_OP(_image_flip_top_bottom)
 .describe(R"code()code" ADD_FILELINE)
-.set_attr<FCompute>("FCompute<cpu>", RandomHorizontalFlip);
+.set_attr<FCompute>("FCompute<cpu>", FlipTopBottom);
 
+MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_flip_top_bottom)
+.describe(R"code()code" ADD_FILELINE)
+.set_attr<FCompute>("FCompute<cpu>", RandomFlipTopBottom);
 
 MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_brightness)
 .describe(R"code()code" ADD_FILELINE)
@@ -77,7 +86,6 @@ MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_brightness)
 .set_attr<FCompute>("FCompute<cpu>", RandomBrightness)
 .add_arguments(RandomEnhanceParam::__FIELDS__());
 
-
 MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_contrast)
 .describe(R"code()code" ADD_FILELINE)
 .set_attr_parser(ParamParser<RandomEnhanceParam>)
diff --git a/tests/python/unittest/test_gluon_data_vision.py b/tests/python/unittest/test_gluon_data_vision.py
index 0c9e5c1..5e9ff87 100644
--- a/tests/python/unittest/test_gluon_data_vision.py
+++ b/tests/python/unittest/test_gluon_data_vision.py
@@ -18,22 +18,39 @@ from __future__ import print_function
 import mxnet as mx
 import mxnet.ndarray as nd
 import numpy as np
+from PIL import Image
 from mxnet import gluon
-from mxnet.gluon.data.vision.transforms import AdjustLighting
+from mxnet.gluon.data.vision import transforms
 from mxnet.test_utils import assert_almost_equal
+from mxnet.test_utils import almost_equal
 
-def test_adjust_lighting():
+def test_to_tensor():
     data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
-    alpha_rgb = [0.05, 0.06, 0.07]
-    eigval = np.array([55.46, 4.794, 1.148])
-    eigvec = np.array([[-0.5675, 0.7192, 0.4009],
-                       [-0.5808, -0.0045, -0.8140],
-                       [-0.5808, -0.0045, -0.8140]])
-    f = AdjustLighting(alpha_rgb=alpha_rgb, eigval=eigval.ravel().tolist(), eigvec=eigvec.ravel().tolist())
-    out_nd = f(nd.array(data_in, dtype=np.uint8))
-    out_gt = np.clip(data_in.astype(np.float32)
-                     + np.dot(eigvec * alpha_rgb, eigval.reshape((3, 1))).reshape((1, 1, 3)), 0, 255).astype(np.uint8)
-    assert_almost_equal(out_nd.asnumpy(), out_gt)
+    out_nd = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
+    assert_almost_equal(out_nd.asnumpy(), np.transpose(
+        data_in.astype(dtype=np.float32) / 255.0, (2, 0, 1)))
+
+def test_normalize():
+    data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
+    data_in = transforms.ToTensor()(nd.array(data_in, dtype='uint8'))
+    out_nd = transforms.Normalize(mean=(0, 1, 2), std=(3, 2, 1))(data_in)
+    data_expected = data_in.asnumpy()
+    data_expected[:][:][0] = data_expected[:][:][0] / 3.0
+    data_expected[:][:][1] = (data_expected[:][:][1] - 1.0) / 2.0
+    data_expected[:][:][2] = data_expected[:][:][2] - 2.0
+    assert_almost_equal(data_expected, out_nd.asnumpy())
+
+def test_flip_left_right():
+    data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
+    pil_img = Image.fromarray(data_in).transpose(Image.FLIP_LEFT_RIGHT)
+    data_trans = nd.image.flip_left_right(nd.array(data_in, dtype='uint8'))
+    assert_almost_equal(np.array(pil_img), data_trans.asnumpy())
+
+def test_flip_top_bottom():
+    data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
+    pil_img = Image.fromarray(data_in).transpose(Image.FLIP_TOP_BOTTOM)
+    data_trans = nd.image.flip_top_bottom(nd.array(data_in, dtype='uint8'))
+    assert_almost_equal(np.array(pil_img), data_trans.asnumpy())
 
 if __name__ == '__main__':
     import nose

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 11/20: Revert "add test script"

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit b6698b8bac5ecc251f9e635bd2de239b438f4a06
Author: Xingjian Shi <xs...@ust.hk>
AuthorDate: Tue Nov 21 11:08:23 2017 -0800

    Revert "add test script"
    
    This reverts commit 23f68272e305103ad87d089e700ef715b13067c0.
---
 test_new_image_loader.py | 34 ----------------------------------
 1 file changed, 34 deletions(-)

diff --git a/test_new_image_loader.py b/test_new_image_loader.py
deleted file mode 100644
index 296869e..0000000
--- a/test_new_image_loader.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import os
-os.environ['MXNET_CPU_WORKER_NTHREADS'] = '1'
-os.environ['OMP_NUM_THREADS'] = '1'
-import time
-import numpy as np
-import multiprocessing as mp
-import mxnet as mx
-from mxnet import gluon as gl
-from mxnet.gluon.data.vision import transforms
-
-if __name__ == '__main__':
-	M = 24
-	BS = 100
-
-	dataset = gl.data.vision.ImageFolderDataset('../256_ObjectCategories')
-	transform = transforms.Compose([transforms.ToTensor(),
-									transforms.RandomBrightness(1.0),
-									transforms.RandomContrast(1.0),
-									transforms.RandomSaturation(1.0),
-									transforms.Normalize([0, 0, 0], [1, 1, 1])])
-	dataset = dataset.transform_first(lambda x: transform(mx.image.center_crop(x, (224, 224))[0]))
-	data_loader = gl.data.DataLoader(dataset, BS, shuffle=True, num_workers=M)
-
-	N = len(dataset)
-
-	iterator = iter(data_loader)
-
-	tic = time.time()
-
-	for data, label in iterator:
-		data.wait_to_read()
-		print(data.shape)
-
-	print(N/(time.time() - tic))

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 16/20: Vision (#8856)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit fb199fc246153df0a54e3e94bae7b4ca330f33fa
Author: Eric Junyuan Xie <pi...@users.noreply.github.com>
AuthorDate: Tue Nov 28 17:14:26 2017 -0800

    Vision (#8856)
    
    * refactor
    
    * fix
    
    * fix
---
 src/operator/image/image_random-inl.h | 756 ++++++++++++++++++----------------
 src/operator/image/image_random.cc    |  84 ++--
 2 files changed, 455 insertions(+), 385 deletions(-)

diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index 3bee843..9d10a30 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -38,18 +38,19 @@
 
 namespace mxnet {
 namespace op {
+namespace image {
 
-inline bool CheckIsImage(const TBlob &image) {
-  CHECK_EQ(image.type_flag_, mshadow::kUint8) << "input type is not an image.";
-  CHECK_EQ(image.ndim(), 3) << "input dimension is not 3.";
-  CHECK(image.shape_[2] == 1 || image.shape_[2] == 3) << "image channel should be 1 or 3.";
-}
-
-static void RandomFlip(const nnvm::NodeAttrs &attrs,
-                       const OpContext &ctx,
-                       const std::vector<TBlob> &inputs,
-                       const std::vector<OpReqType> &req,
-                       const std::vector<TBlob> &outputs) {
+inline bool ToTensorShape(const nnvm::NodeAttrs& attrs,
+                          std::vector<TShape> *in_attrs,
+                          std::vector<TShape> *out_attrs) {
+  CHECK_EQ(in_attrs->size(), 1U);
+  CHECK_EQ(out_attrs->size(), 1U);
+  TShape &shp = (*in_attrs)[0];
+  if (!shp.ndim()) return false;
+  CHECK_EQ(shp.ndim(), 3)
+      << "Input image must have shape (height, width, channels), but got " << shp;
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, TShape({shp[2], shp[0], shp[1]}));
+  return true;
 }
 
 inline bool ToTensorType(const nnvm::NodeAttrs& attrs,
@@ -57,47 +58,39 @@ inline bool ToTensorType(const nnvm::NodeAttrs& attrs,
                          std::vector<int> *out_attrs) {
   CHECK_EQ(in_attrs->size(), 1U);
   CHECK_EQ(out_attrs->size(), 1U);
-  CHECK_EQ((*in_attrs)[0], mshadow::kUint8)
-    << "`to_tensor` only supports uint8 input";
   TYPE_ASSIGN_CHECK(*out_attrs, 0, mshadow::kFloat32);
   return (*in_attrs)[0] != -1;
 }
 
-inline bool ToTensorShape(const nnvm::NodeAttrs& attrs,
-                          std::vector<TShape> *in_attrs,
-                          std::vector<TShape> *out_attrs) {
-  CHECK_EQ(in_attrs->size(), 1U);
-  CHECK_EQ(out_attrs->size(), 1U);
-  TShape &shp = (*in_attrs)[0];
-  CHECK_EQ(shp.ndim(), 3U) << "`to_tensor` only supports 3 dimensions";
-  TShape ret(3);
-  ret[0] = shp[2];
-  ret[1] = shp[0];
-  ret[2] = shp[1];
-  SHAPE_ASSIGN_CHECK(*out_attrs, 0, ret);
-  return true;
-}
-
-static void ToTensor(const nnvm::NodeAttrs &attrs,
+void ToTensor(const nnvm::NodeAttrs &attrs,
                      const OpContext &ctx,
                      const std::vector<TBlob> &inputs,
                      const std::vector<OpReqType> &req,
                      const std::vector<TBlob> &outputs) {
   CHECK_EQ(req[0], kWriteTo)
     << "`to_tensor` does not support inplace";
-  CheckIsImage(inputs[0]);
 
   int length = inputs[0].shape_[0] * inputs[0].shape_[1];
   int channel = inputs[0].shape_[2];
 
-  float* output = outputs[0].dptr<float>();
-  uint8_t* input = inputs[0].dptr<uint8_t>();
+  MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, {
+    float* output = outputs[0].dptr<float>();
+    DType* input = inputs[0].dptr<DType>();
 
-  for (int l = 0; l < length; ++l) {
-    for (int c = 0; c < channel; ++c) {
-      output[c*length + l] = static_cast<float>(input[l*channel + c]) / 255.0f;
+    for (int l = 0; l < length; ++l) {
+      for (int c = 0; c < channel; ++c) {
+        output[c*length + l] = static_cast<float>(input[l*channel + c]) / 255.0f;
+      }
     }
-  }
+  });
+}
+
+inline bool TensorShape(const nnvm::NodeAttrs& attrs,
+                       std::vector<TShape> *in_attrs,
+                       std::vector<TShape> *out_attrs) {
+  TShape& dshape = (*in_attrs)[0];
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape);
+  return true;
 }
 
 struct NormalizeParam : public dmlc::Parameter<NormalizeParam> {
@@ -117,20 +110,28 @@ inline bool NormalizeShape(const nnvm::NodeAttrs& attrs,
   const NormalizeParam &param = nnvm::get<NormalizeParam>(attrs.parsed);
   const auto& dshape = (*in_attrs)[0];
   if (!dshape.ndim()) return false;
-  CHECK_EQ(dshape.ndim(), 3)
-      << "Input must have 3 dimensions";
 
+  CHECK_EQ(dshape.ndim(), 3)
+      << "Input tensor must have shape (channels, height, width), but got "
+      << dshape;
   auto nchannels = dshape[0];
+  CHECK(nchannels == 3 || nchannels == 1)
+      << "The first dimension of input tensor must be the channel dimension with "
+      << "either 1 or 3 elements, but got input with shape " << dshape;
   CHECK(param.mean.ndim() == 1 || param.mean.ndim() == nchannels)
-      << "mean must have either 1 or " << nchannels << " elements";
+      << "Invalid mean for input with shape " << dshape
+      << ". mean must have either 1 or " << nchannels
+      << " elements, but got " << param.mean;
   CHECK(param.std.ndim() == 1 || param.std.ndim() == nchannels)
-      << "std must have either 1 or " << nchannels << " elements";
+      << "Invalid std for input with shape " << dshape
+      << ". std must have either 1 or " << nchannels
+      << " elements, but got " << param.std;
 
   SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape);
+  return true;
 }
 
-
-static void Normalize(const nnvm::NodeAttrs &attrs,
+void Normalize(const nnvm::NodeAttrs &attrs,
                       const OpContext &ctx,
                       const std::vector<TBlob> &inputs,
                       const std::vector<OpReqType> &req,
@@ -154,214 +155,241 @@ static void Normalize(const nnvm::NodeAttrs &attrs,
   });
 }
 
-struct FlipParam : public dmlc::Parameter<FlipParam> {
-  int axis;
-  DMLC_DECLARE_PARAMETER(FlipParam) {
-    DMLC_DECLARE_FIELD(axis)
-    .describe("0 or 1. 0 for horizontal flip, 1 for vertical flip.");
-  }
-};
+template<typename DType>
+inline DType saturate_cast(const float& src) {
+  return static_cast<DType>(src);
+}
 
-#define SWAP_IF_INPLACE(dst, dst_idx, src, src_idx) \
-  if (dst == src) {                                 \
-    std::swap(dst[dst_idx], src[src_idx]);          \
-  } else {                                          \
-    dst[dst_idx] = src[src_idx];                    \
-  }
+template<>
+inline uint8_t saturate_cast(const float& src) {
+  return std::min(std::max(src, 0.f), 255.f);
+}
+
+inline bool ImageShape(const nnvm::NodeAttrs& attrs,
+                       std::vector<TShape> *in_attrs,
+                       std::vector<TShape> *out_attrs) {
+  TShape& dshape = (*in_attrs)[0];
+  CHECK_EQ(dshape.ndim(), 3)
+      << "Input image must have shape (height, width, channels), but got " << dshape;
+  auto nchannels = dshape[dshape.ndim()-1];
+  CHECK(nchannels == 3 || nchannels == 1)
+      << "The last dimension of input image must be the channel dimension with "
+      << "either 1 or 3 elements, but got input with shape " << dshape;
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape);
+  return true;
+}
 
 template<typename DType>
-static void FlipImpl(const TShape &shape, DType *src, DType *dst, int axis) {
-  const int height = shape[0];
-  const int width = shape[1];
-  const int nchannel = shape[2];
-
-  const int length = width * nchannel;
-  const int height_stride = (src == dst && axis == 1) ? (height >> 1) : height;
-  const int width_stride = (src == dst && axis == 0) ? (width >> 1) : width;
-
-  for (int h = 0; h < height_stride; ++h) {
-    const int h_dst = (axis == 0) ? h : (height - h);
-    for (int w = 0; w < width_stride; ++w) {
-      const int w_dst = (axis == 0) ? (width - w) : w;
-      const int idx_dst = h_dst * length + w_dst * nchannel;
-      const int idx_src = h * length + w * nchannel;
-      SWAP_IF_INPLACE(dst, idx_dst, src, idx_src);
-      if (nchannel > 1) {
-        SWAP_IF_INPLACE(dst, idx_dst + 1, src, idx_src + 1);
-        SWAP_IF_INPLACE(dst, idx_dst + 2, src, idx_src + 2);
+void FlipImpl(const TShape &shape, DType *src, DType *dst, int axis) {
+  int head = 1, mid = shape[axis], tail = 1;
+  for (int i = 0; i < axis; ++i) head *= shape[i];
+  for (int i = axis+1; i < shape.ndim(); ++i) tail *= shape[i];
+
+  for (int i = 0; i < head; ++i) {
+    for (int j = 0; j < (mid >>2); ++j) {
+      int idx1 = (i*mid + j)*tail;
+      int idx2 = idx1 + (mid - (j<<2))*tail;
+      for (int k = 0; k < tail; ++k, ++idx1, ++idx2) {
+        DType tmp = src[idx1];
+        dst[idx1] = src[idx2];
+        dst[idx2] = tmp;
       }
     }
   }
 }
 
-static void Flip(const nnvm::NodeAttrs &attrs,
-                  const OpContext &ctx,
-                  const std::vector<TBlob> &inputs,
-                  const std::vector<OpReqType> &req,
-                  const std::vector<TBlob> &outputs) {
-  const FlipParam &param = nnvm::get<FlipParam>(attrs.parsed);
-  CHECK(param.axis == 0 || param.axis == 1) << "flip axis must be 0 or 1.";
-  CheckIsImage(inputs[0]);
-  const TShape& ishape = inputs[0].shape_;
+void RandomHorizontalFlip(
+    const nnvm::NodeAttrs &attrs,
+    const OpContext &ctx,
+    const std::vector<TBlob> &inputs,
+    const std::vector<OpReqType> &req,
+    const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  Stream<cpu> *s = ctx.get_stream<cpu>();
+  Random<cpu> *prnd = ctx.requested[0].get_random<cpu, float>(s);
+  if (std::bernoulli_distribution()(prnd->GetRndEngine())) return;
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    FlipImpl(inputs[0].shape_, inputs[0].dptr<DType>(),
+             outputs[0].dptr<DType>(), 1);
+  });
+}
+
+void RandomVerticalFlip(
+    const nnvm::NodeAttrs &attrs,
+    const OpContext &ctx,
+    const std::vector<TBlob> &inputs,
+    const std::vector<OpReqType> &req,
+    const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  Stream<cpu> *s = ctx.get_stream<cpu>();
+  Random<cpu> *prnd = ctx.requested[0].get_random<cpu, float>(s);
+  if (std::bernoulli_distribution()(prnd->GetRndEngine())) return;
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-    FlipImpl(ishape, inputs[0].dptr<DType>(), outputs[0].dptr<DType>(), param.axis);
+    FlipImpl(inputs[0].shape_, inputs[0].dptr<DType>(),
+             outputs[0].dptr<DType>(), 0);
   });
 }
 
-struct RandomBrightnessParam : public dmlc::Parameter<RandomBrightnessParam> {
-  float max_brightness;
-  DMLC_DECLARE_PARAMETER(RandomBrightnessParam) {
-    DMLC_DECLARE_FIELD(max_brightness)
+struct RandomEnhanceParam : public dmlc::Parameter<RandomEnhanceParam> {
+  float min_factor;
+  float max_factor;
+  DMLC_DECLARE_PARAMETER(RandomEnhanceParam) {
+    DMLC_DECLARE_FIELD(min_factor)
+    .set_lower_bound(0.0)
+    .describe("Minimum factor.");
+    DMLC_DECLARE_FIELD(max_factor)
     .set_lower_bound(0.0)
-    .describe("Max Brightness.");
+    .describe("Maximum factor.");
   }
 };
 
-static void RandomBrightness(const nnvm::NodeAttrs &attrs,
-                             const OpContext &ctx,
-                             const std::vector<TBlob> &inputs,
-                             const std::vector<OpReqType> &req,
-                             const std::vector<TBlob> &outputs) {
+inline void AdjustBrightnessImpl(const float& alpha_b,
+                                 const OpContext &ctx,
+                                 const std::vector<TBlob> &inputs,
+                                 const std::vector<OpReqType> &req,
+                                 const std::vector<TBlob> &outputs) {
   using namespace mshadow;
-  const RandomBrightnessParam &param = nnvm::get<RandomBrightnessParam>(attrs.parsed);
-
   int length = inputs[0].Size();
 
-  uint8_t* output = outputs[0].dptr<uint8_t>();
-  uint8_t* input = inputs[0].dptr<uint8_t>();
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    DType* output = outputs[0].dptr<DType>();
+    DType* input = inputs[0].dptr<DType>();
+    for (int l = 0; l < length; ++l) {
+      float val = static_cast<float>(input[l]) * alpha_b;
+      output[l] = saturate_cast<DType>(val);
+    }
+  });
+}
+
+void RandomBrightness(const nnvm::NodeAttrs &attrs,
+                      const OpContext &ctx,
+                      const std::vector<TBlob> &inputs,
+                      const std::vector<OpReqType> &req,
+                      const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  const RandomEnhanceParam &param = nnvm::get<RandomEnhanceParam>(attrs.parsed);
+
 
   Stream<cpu> *s = ctx.get_stream<cpu>();
   Random<cpu> *prnd = ctx.requested[0].get_random<cpu, float>(s);
-  float alpha_b = 1.0 + std::uniform_real_distribution<float>(
-      -param.max_brightness, param.max_brightness)(prnd->GetRndEngine());
+  float alpha_b = std::uniform_real_distribution<float>(
+      param.min_factor, param.max_factor)(prnd->GetRndEngine());
 
-  for (int l = 0; l < length; ++l) {
-    float val = static_cast<float>(input[l]) * alpha_b;
-    val = std::min(std::max(val, 0.f), 255.f);
-    output[l] = static_cast<uint8_t>(val);
-  }
+  AdjustBrightnessImpl(alpha_b, ctx, inputs, req, outputs);
 }
 
+inline void AdjustContrastImpl(const float& alpha_c,
+                               const OpContext &ctx,
+                               const std::vector<TBlob> &inputs,
+                               const std::vector<OpReqType> &req,
+                               const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  static const float coef[] = { 0.299f, 0.587f, 0.114f };
 
-struct RandomContrastParam : public dmlc::Parameter<RandomContrastParam> {
-  float max_contrast;
-  DMLC_DECLARE_PARAMETER(RandomContrastParam) {
-    DMLC_DECLARE_FIELD(max_contrast)
-    .set_lower_bound(0.0)
-    .describe("Max Contrast.");
-  }
-};
+  int length = inputs[0].shape_[0] * inputs[0].shape_[1];
+  int nchannels = inputs[0].shape_[2];
 
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    DType* output = outputs[0].dptr<DType>();
+    DType* input = inputs[0].dptr<DType>();
+
+    float sum = 0.f;
+    if (nchannels > 1) {
+      for (int l = 0; l < length; ++l) {
+        for (int c = 0; c < 3; ++c) sum += input[l*3 + c] * coef[c];
+      }
+    } else {
+      for (int l = 0; l < length; ++l) sum += input[l];
+    }
+    float gray_mean = sum / static_cast<float>(length);
+    float beta = (1 - alpha_c) * gray_mean;
 
-static void RandomContrast(const nnvm::NodeAttrs &attrs,
+    for (int l = 0; l < length * nchannels; ++l) {
+      float val = input[l] * alpha_c + beta;
+      output[l] = saturate_cast<DType>(val);
+    }
+  });
+}
+
+inline void RandomContrast(const nnvm::NodeAttrs &attrs,
                            const OpContext &ctx,
                            const std::vector<TBlob> &inputs,
                            const std::vector<OpReqType> &req,
                            const std::vector<TBlob> &outputs) {
   using namespace mshadow;
+  const RandomEnhanceParam &param = nnvm::get<RandomEnhanceParam>(attrs.parsed);
+
+
+  Stream<cpu> *s = ctx.get_stream<cpu>();
+  Random<cpu> *prnd = ctx.requested[0].get_random<cpu, real_t>(s);
+  float alpha_c = std::uniform_real_distribution<float>(
+      param.min_factor, param.max_factor)(prnd->GetRndEngine());
+
+  AdjustContrastImpl(alpha_c, ctx, inputs, req, outputs);
+}
+
+inline void AdjustSaturationImpl(const float& alpha_s,
+                                 const OpContext &ctx,
+                                 const std::vector<TBlob> &inputs,
+                                 const std::vector<OpReqType> &req,
+                                 const std::vector<TBlob> &outputs) {
   static const float coef[] = { 0.299f, 0.587f, 0.114f };
-  const RandomContrastParam &param = nnvm::get<RandomContrastParam>(attrs.parsed);
 
   int length = inputs[0].shape_[0] * inputs[0].shape_[1];
   int nchannels = inputs[0].shape_[2];
 
-  uint8_t* output = outputs[0].dptr<uint8_t>();
-  uint8_t* input = inputs[0].dptr<uint8_t>();
+  float alpha_o = 1.f - alpha_s;
 
-  Stream<cpu> *s = ctx.get_stream<cpu>();
-  Random<cpu> *prnd = ctx.requested[0].get_random<cpu, real_t>(s);
-  float alpha_c = 1.0 + std::uniform_real_distribution<float>(
-    -param.max_contrast, param.max_contrast)(prnd->GetRndEngine());
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    DType* output = outputs[0].dptr<DType>();
+    DType* input = inputs[0].dptr<DType>();
 
-  float sum = 0.f;
-  if (nchannels > 1) {
-    for (int l = 0; l < length; ++l) {
-      for (int c = 0; c < nchannels; ++c) sum += input[l*nchannels + c] * coef[c];
+    if (nchannels == 1) {
+      for (int l = 0; l < length; ++l) output[l] = input[l];
+      return;
     }
-  } else {
-    for (int l = 0; l < length; ++l) sum += input[l];
-  }
-  float gray_mean = sum / static_cast<float>(length);
-  float beta = (1 - alpha_c) * gray_mean;
 
-  for (int l = 0; l < length * nchannels; ++l) {
-    float val = input[l] * alpha_c + beta;
-    val = std::min(std::max(val, 0.f), 255.f);
-    output[l] = static_cast<uint8_t>(val);
-  }
+    for (int l = 0; l < length; ++l) {
+      float gray = 0.f;
+      for (int c = 0; c < 3; ++c) {
+        gray = input[l*3 + c] * coef[c];
+      }
+      gray *= alpha_o;
+      for (int c = 0; c < 3; ++c) {
+        float val = gray + input[l*3 + c] * alpha_s;
+        output[l*3 + c] = saturate_cast<DType>(val);
+      }
+    }
+  });
 }
 
-struct RandomSaturationParam : public dmlc::Parameter<RandomSaturationParam> {
-  float max_saturation;
-  DMLC_DECLARE_PARAMETER(RandomSaturationParam) {
-    DMLC_DECLARE_FIELD(max_saturation)
-    .set_default(0.0)
-    .describe("Max Saturation.");
-  }
-};
-
-static void RandomSaturation(const nnvm::NodeAttrs &attrs,
+inline void RandomSaturation(const nnvm::NodeAttrs &attrs,
                              const OpContext &ctx,
                              const std::vector<TBlob> &inputs,
                              const std::vector<OpReqType> &req,
                              const std::vector<TBlob> &outputs) {
   using namespace mshadow;
-  const RandomSaturationParam &param = nnvm::get<RandomSaturationParam>(attrs.parsed);
-  static const float coef[] = { 0.299f, 0.587f, 0.114f };
-
-  int length = inputs[0].shape_[0] * inputs[0].shape_[1];
-  int nchannels = inputs[0].shape_[2];
-
-  uint8_t* output = outputs[0].dptr<uint8_t>();
-  uint8_t* input = inputs[0].dptr<uint8_t>();
+  const RandomEnhanceParam &param = nnvm::get<RandomEnhanceParam>(attrs.parsed);
 
   Stream<cpu> *s = ctx.get_stream<cpu>();
   Random<cpu> *prnd = ctx.requested[0].get_random<cpu, real_t>(s);
-  float alpha_s = 1.f + std::uniform_real_distribution<float>(
-    -param.max_saturation, param.max_saturation)(prnd->GetRndEngine());
-  float alpha_o = 1.f - alpha_s;
-
-  if (nchannels == 1) {
-    for (int l = 0; l < length * nchannels; ++l) output[l] = input[l];
-    return;
-  }
+  float alpha_s = std::uniform_real_distribution<float>(
+      param.min_factor, param.max_factor)(prnd->GetRndEngine());
 
-  for (int l = 0; l < length; ++l) {
-    float gray = 0.f;
-    for (int c = 0; c < nchannels; ++c) {
-      gray = input[l*nchannels + c] * coef[c];
-    }
-    gray *= alpha_o;
-    for (int c = 0; c < nchannels; ++c) {
-      float val = gray + input[l*nchannels + c] * alpha_s;
-      val = std::min(std::max(val, 0.f), 255.f);
-      output[l*nchannels + c] = static_cast<uint8_t>(val);
-    }
-  }
+  AdjustSaturationImpl(alpha_s, ctx, inputs, req, outputs);
 }
 
-struct RandomHueParam : public dmlc::Parameter<RandomHueParam> {
-  float max_hue;
-  DMLC_DECLARE_PARAMETER(RandomHueParam) {
-    DMLC_DECLARE_FIELD(max_hue)
-    .set_default(0.0)
-    .describe("Max Hue.");
-  }
-};
-
-template <typename DType> static
-void RGB2HLSConvert(const DType src_r,
-                    const DType src_g,
-                    const DType src_b,
-                    DType *dst_h,
-                    DType *dst_l,
-                    DType *dst_s
-                   ) {
-  DType b = src_b, g = src_g, r = src_r;
-  DType h = 0.f, s = 0.f, l;
-  DType vmin;
-  DType vmax;
-  DType diff;
+void RGB2HLSConvert(const float& src_r,
+                    const float& src_g,
+                    const float& src_b,
+                    float *dst_h,
+                    float *dst_l,
+                    float *dst_s) {
+  float b = src_b / 255.f, g = src_g / 255.f, r = src_r / 255.f;
+  float h = 0.f, s = 0.f, l;
+  float vmin;
+  float vmax;
+  float diff;
 
   vmax = vmin = r;
   vmax = fmax(vmax, g);
@@ -372,7 +400,7 @@ void RGB2HLSConvert(const DType src_r,
   diff = vmax - vmin;
   l = (vmax + vmin) * 0.5f;
 
-  if (diff > std::numeric_limits<DType>::epsilon()) {
+  if (diff > std::numeric_limits<float>::epsilon()) {
     s = (l < 0.5f) * diff / (vmax + vmin);
     s += (l >= 0.5f) * diff / (2.0f - vmax - vmin);
 
@@ -389,23 +417,20 @@ void RGB2HLSConvert(const DType src_r,
   *dst_s = s;
 }
 
-
-static  int c_HlsSectorData[6][3] = {
-  { 1, 3, 0 },
-  { 1, 0, 2 },
-  { 3, 0, 1 },
-  { 0, 2, 1 },
-  { 0, 1, 3 },
-  { 2, 1, 0 }
-};
-
-template <typename DType>  static  void HLS2RGBConvert(const DType src_h,
-    const DType src_l,
-    const DType src_s,
-    DType *dst_r,
-    DType *dst_g,
-    DType *dst_b) {
-
+void HLS2RGBConvert(const float& src_h,
+                    const float& src_l,
+                    const float& src_s,
+                    float *dst_r,
+                    float *dst_g,
+                    float *dst_b) {
+  static const int c_HlsSectorData[6][3] = {
+    { 1, 3, 0 },
+    { 1, 0, 2 },
+    { 3, 0, 1 },
+    { 0, 2, 1 },
+    { 0, 1, 3 },
+    { 2, 1, 0 }
+  };
 
   float h = src_h, l = src_l, s = src_s;
   float b = l, g = l, r = l;
@@ -415,6 +440,8 @@ template <typename DType>  static  void HLS2RGBConvert(const DType src_h,
     p2 += (l > 0.5f) * (l + s - l * s);
     float p1 = 2 * l - p2;
 
+    h *= 1.f / 60.f;
+
     if (h < 0) {
       do { h += 6; } while (h < 0);
     } else if (h >= 6) {
@@ -436,177 +463,202 @@ template <typename DType>  static  void HLS2RGBConvert(const DType src_h,
     r = tab[c_HlsSectorData[sector][2]];
   }
 
-  *dst_b = b;
-  *dst_g = g;
-  *dst_r = r;
+  *dst_b = b * 255.f;
+  *dst_g = g * 255.f;
+  *dst_r = r * 255.f;
 }
 
-template<typename xpu, typename DType>
-static  void RandomHueKernal(const TBlob &input,
-                             const TBlob &output,
-                             Stream<xpu> *s,
-                             int hight,
-                             int weight,
-                             DType alpha) {
-  auto input_3d = input.get<xpu, 3, DType>(s);
-  auto output_3d = output.get<xpu, 3, DType>(s);
-  for (int h_index = 0; h_index < hight; ++h_index) {
-    for (int w_index = 0; w_index < weight; ++w_index) {
-      DType h;
-      DType l;
-      DType s;
-      RGB2HLSConvert(input_3d[0][h_index][w_index],
-                     input_3d[1][h_index][w_index],
-                     input_3d[2][h_index][w_index],
-                     &h, &l, &s);
-      h += alpha;
-      h = std::max(DType(0), std::min(DType(180), h));
-
-      HLS2RGBConvert(
-        h, l, s,
-        &output_3d[0][h_index][w_index],
-        &output_3d[1][h_index][w_index],
-        &output_3d[2][h_index][w_index]);
+void AdjustHueImpl(float alpha,
+                   const OpContext &ctx,
+                   const std::vector<TBlob> &inputs,
+                   const std::vector<OpReqType> &req,
+                   const std::vector<TBlob> &outputs) {
+  int length = inputs[0].shape_[0] * inputs[0].shape_[1];
+  if (inputs[0].shape_[2] == 1) return;
+
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    DType* input = inputs[0].dptr<DType>();
+    DType* output = outputs[0].dptr<DType>();
+
+    for (int i = 0; i < length; ++i) {
+      float h, l, s;
+      float r = static_cast<float>(*(input++));
+      float g = static_cast<float>(*(input++));
+      float b = static_cast<float>(*(input++));
+      RGB2HLSConvert(r, g, b, &h, &l, &s);
+      h += alpha * 360.f;
+      HLS2RGBConvert(h, l, s, &r, &g, &b);
+      *(output++) = saturate_cast<DType>(r);
+      *(output++) = saturate_cast<DType>(g);
+      *(output++) = saturate_cast<DType>(b);
     }
-  }
+  });
 }
 
-template<typename xpu>
-static void RandomHue(const nnvm::NodeAttrs &attrs,
-                      const OpContext &ctx,
-                      const std::vector<TBlob> &inputs,
-                      const std::vector<OpReqType> &req,
-                      const std::vector<TBlob> &outputs) {
+void RandomHue(const nnvm::NodeAttrs &attrs,
+               const OpContext &ctx,
+               const std::vector<TBlob> &inputs,
+               const std::vector<OpReqType> &req,
+               const std::vector<TBlob> &outputs) {
   using namespace mshadow;
-  auto input = inputs[0];
-  auto output = outputs[0];
-  int channel = input.shape_[0];
-  int hight = input.shape_[1];
-  int weight = input.shape_[2];
-  Stream<xpu> *s = ctx.get_stream<xpu>();
-  Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
-
-  const RandomHueParam &param = nnvm::get<RandomHueParam>(attrs.parsed);
-  float alpha =  std::uniform_real_distribution<float>(
-    -param.max_hue, param.max_hue)(prnd->GetRndEngine());
-  auto output_float = output.get<xpu, 3, float>(s);
-
-  MSHADOW_TYPE_SWITCH(input.type_flag_, DType, {
-    RandomHueKernal<xpu, DType>(input, output, s, hight, weight, alpha);
-  });
+  const RandomEnhanceParam &param = nnvm::get<RandomEnhanceParam>(attrs.parsed);
+
+  Stream<cpu> *s = ctx.get_stream<cpu>();
+  Random<cpu> *prnd = ctx.requested[0].get_random<cpu, real_t>(s);
+  float alpha = std::uniform_real_distribution<float>(
+      param.min_factor, param.max_factor)(prnd->GetRndEngine());
+
+  AdjustHueImpl(alpha, ctx, inputs, req, outputs);
 }
 
-static void RandomColorJitter(const nnvm::NodeAttrs &attrs,
-                              const OpContext &ctx,
-                              const std::vector<TBlob> &inputs,
-                              const std::vector<OpReqType> &req,
-                              const std::vector<TBlob> &outputs) {
+struct RandomColorJitterParam : public dmlc::Parameter<RandomColorJitterParam> {
+  float brightness;
+  float contrast;
+  float saturation;
+  float hue;
+  DMLC_DECLARE_PARAMETER(RandomColorJitterParam) {
+    DMLC_DECLARE_FIELD(brightness)
+    .describe("How much to jitter brightness.");
+    DMLC_DECLARE_FIELD(contrast)
+    .describe("How much to jitter contrast.");
+    DMLC_DECLARE_FIELD(saturation)
+    .describe("How much to jitter saturation.");
+    DMLC_DECLARE_FIELD(hue)
+    .describe("How much to jitter hue.");
+  }
+};
+
+void RandomColorJitter(const nnvm::NodeAttrs &attrs,
+                       const OpContext &ctx,
+                       const std::vector<TBlob> &inputs,
+                       const std::vector<OpReqType> &req,
+                       const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  const RandomColorJitterParam &param = nnvm::get<RandomColorJitterParam>(attrs.parsed);
+  Stream<cpu> *s = ctx.get_stream<cpu>();
+  Random<cpu> *prnd = ctx.requested[0].get_random<cpu, real_t>(s);
+
+  int order[4] = {0, 1, 2, 3};
+  std::shuffle(order, order + 4, prnd->GetRndEngine());
+  bool flag = false;
+
+  for (int i = 0; i < 4; ++i) {
+    switch (order[i]) {
+      case 0:
+        if (param.brightness > 0) {
+          float alpha_b = 1.0 + std::uniform_real_distribution<float>(
+              -param.brightness, param.brightness)(prnd->GetRndEngine());
+          AdjustBrightnessImpl(alpha_b, ctx, flag ? outputs : inputs, req, outputs);
+          flag = true;
+        }
+        break;
+      case 1:
+        if (param.contrast > 0) {
+          float alpha_c = 1.0 + std::uniform_real_distribution<float>(
+              -param.contrast, param.contrast)(prnd->GetRndEngine());
+          AdjustContrastImpl(alpha_c, ctx, flag ? outputs : inputs, req, outputs);
+          flag = true;
+        }
+        break;
+      case 2:
+        if (param.saturation > 0) {
+          float alpha_s = 1.f + std::uniform_real_distribution<float>(
+              -param.saturation, param.saturation)(prnd->GetRndEngine());
+          AdjustSaturationImpl(alpha_s, ctx, flag ? outputs : inputs, req, outputs);
+          flag = true;
+        }
+        break;
+      case 3:
+        if (param.hue > 0) {
+          float alpha_h = std::uniform_real_distribution<float>(
+              -param.hue, param.hue)(prnd->GetRndEngine());
+          AdjustHueImpl(alpha_h, ctx, flag ? outputs : inputs, req, outputs);
+          flag = true;
+        }
+        break;
+    }
+  }
 }
 
 struct AdjustLightingParam : public dmlc::Parameter<AdjustLightingParam> {
-  nnvm::Tuple<float> alpha_rgb;
-  nnvm::Tuple<float> eigval;
-  nnvm::Tuple<float> eigvec;
+  nnvm::Tuple<float> alpha;
   DMLC_DECLARE_PARAMETER(AdjustLightingParam) {
-    DMLC_DECLARE_FIELD(alpha_rgb)
-    .set_default({0, 0, 0})
+    DMLC_DECLARE_FIELD(alpha)
     .describe("The lighting alphas for the R, G, B channels.");
-    DMLC_DECLARE_FIELD(eigval)
-    .describe("Eigen value.")
-    .set_default({ 55.46, 4.794, 1.148 });
-    DMLC_DECLARE_FIELD(eigvec)
-    .describe("Eigen vector.")
-    .set_default({ -0.5675,  0.7192,  0.4009,
-                   -0.5808, -0.0045, -0.8140,
-                   -0.5808, -0.0045, -0.8140 });
   }
 };
 
 struct RandomLightingParam : public dmlc::Parameter<RandomLightingParam> {
   float alpha_std;
-  nnvm::Tuple<float> eigval;
-  nnvm::Tuple<float> eigvec;
   DMLC_DECLARE_PARAMETER(RandomLightingParam) {
     DMLC_DECLARE_FIELD(alpha_std)
     .set_default(0.05)
     .describe("Level of the lighting noise.");
-    DMLC_DECLARE_FIELD(eigval)
-    .describe("Eigen value.")
-    .set_default({ 55.46, 4.794, 1.148 });
-    DMLC_DECLARE_FIELD(eigvec)
-    .describe("Eigen vector.")
-    .set_default({ -0.5675,  0.7192,  0.4009,
-                   -0.5808, -0.0045, -0.8140,
-                   -0.5808, -0.0045, -0.8140 });
   }
 };
 
-void AdjustLightingImpl(uint8_t* dst, const uint8_t* src,
-                        float alpha_r, float alpha_g, float alpha_b,
-                        const nnvm::Tuple<float> eigval, const nnvm::Tuple<float> eigvec,
-                        int H, int W) {
-    alpha_r *= eigval[0];
-    alpha_g *= eigval[1];
-    alpha_b *= eigval[2];
-    float pca_r = alpha_r * eigvec[0] + alpha_g * eigvec[1] + alpha_b * eigvec[2];
-    float pca_g = alpha_r * eigvec[3] + alpha_g * eigvec[4] + alpha_b * eigvec[5];
-    float pca_b = alpha_r * eigvec[6] + alpha_g * eigvec[7] + alpha_b * eigvec[8];
-    for (int i = 0; i < H * W; i++) {
-        int base_ind = 3 * i;
-        float in_r = static_cast<float>(src[base_ind]);
-        float in_g = static_cast<float>(src[base_ind + 1]);
-        float in_b = static_cast<float>(src[base_ind + 2]);
-        dst[base_ind] = std::min(255, std::max(0, static_cast<int>(in_r + pca_r)));
-        dst[base_ind + 1] = std::min(255, std::max(0, static_cast<int>(in_g + pca_g)));
-        dst[base_ind + 2] = std::min(255, std::max(0, static_cast<int>(in_b + pca_b)));
+void AdjustLightingImpl(const nnvm::Tuple<float>& alpha,
+                        const OpContext &ctx,
+                        const std::vector<TBlob> &inputs,
+                        const std::vector<OpReqType> &req,
+                        const std::vector<TBlob> &outputs) {
+  static const float eig[3][3] = {
+      { 55.46 * -0.5675, 4.794 * 0.7192,  1.148 * 0.4009 },
+      { 55.46 * -0.5808, 4.794 * -0.0045, 1.148 * -0.8140 },
+      { 55.46 * -0.5836, 4.794 * -0.6948, 1.148 * 0.4203 }
+    };
+
+  int length = inputs[0].shape_[0] * inputs[0].shape_[1];
+  int channels = inputs[0].shape_[2];
+  if (channels == 1) return;
+
+  float pca_r = eig[0][0] * alpha[0] + eig[0][1] * alpha[1] + eig[0][2] * alpha[2];
+  float pca_g = eig[1][0] * alpha[0] + eig[1][1] * alpha[1] + eig[1][2] * alpha[2];
+  float pca_b = eig[2][0] * alpha[0] + eig[2][1] * alpha[1] + eig[2][2] * alpha[2];
+
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    DType* output = outputs[0].dptr<DType>();
+    DType* input = inputs[0].dptr<DType>();
+
+    for (int i = 0; i < length; i++) {
+      int base_ind = 3 * i;
+      float in_r = static_cast<float>(input[base_ind]);
+      float in_g = static_cast<float>(input[base_ind + 1]);
+      float in_b = static_cast<float>(input[base_ind + 2]);
+      output[base_ind] = saturate_cast<DType>(in_r + pca_r);
+      output[base_ind + 1] = saturate_cast<DType>(in_g + pca_g);
+      output[base_ind + 2] = saturate_cast<DType>(in_b + pca_b);
     }
+  });
 }
 
-static void AdjustLighting(const nnvm::NodeAttrs &attrs,
-                           const OpContext &ctx,
-                           const std::vector<TBlob> &inputs,
-                           const std::vector<OpReqType> &req,
-                           const std::vector<TBlob> &outputs) {
-    using namespace mshadow;
-    const AdjustLightingParam &param = nnvm::get<AdjustLightingParam>(attrs.parsed);
-    CHECK_EQ(param.eigval.ndim(), 3) << "There should be 3 numbers in the eigval.";
-    CHECK_EQ(param.eigvec.ndim(), 9) << "There should be 9 numbers in the eigvec.";
-    CHECK_EQ(inputs[0].ndim(), 3);
-    CHECK_EQ(inputs[0].size(2), 3);
-    int H = inputs[0].size(0);
-    int W = inputs[0].size(1);
-    AdjustLightingImpl(outputs[0].dptr<uint8_t>(), inputs[0].dptr<uint8_t>(),
-                       param.alpha_rgb[0], param.alpha_rgb[1], param.alpha_rgb[2],
-                       param.eigval, param.eigvec, H, W);
+void AdjustLighting(const nnvm::NodeAttrs &attrs,
+                    const OpContext &ctx,
+                    const std::vector<TBlob> &inputs,
+                    const std::vector<OpReqType> &req,
+                    const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  const AdjustLightingParam &param = nnvm::get<AdjustLightingParam>(attrs.parsed);
+  AdjustLightingImpl(param.alpha, ctx, inputs, req, outputs);
 }
 
-static void RandomLighting(const nnvm::NodeAttrs &attrs,
-                           const OpContext &ctx,
-                           const std::vector<TBlob> &inputs,
-                           const std::vector<OpReqType> &req,
-                           const std::vector<TBlob> &outputs) {
-    using namespace mshadow;
-    const RandomLightingParam &param = nnvm::get<RandomLightingParam>(attrs.parsed);
-    CHECK_EQ(param.eigval.ndim(), 3) << "There should be 3 numbers in the eigval.";
-    CHECK_EQ(param.eigvec.ndim(), 9) << "There should be 9 numbers in the eigvec.";
-    CHECK_EQ(inputs[0].ndim(), 3);
-    CHECK_EQ(inputs[0].size(2), 3);
-    int H = inputs[0].size(0);
-    int W = inputs[0].size(1);
-    Stream<cpu> *s = ctx.get_stream<cpu>();
-    Random<cpu> *prnd = ctx.requested[0].get_random<cpu, real_t>(s);
-    std::normal_distribution<float> dist(0, param.alpha_std);
-    float alpha_r = dist(prnd->GetRndEngine());
-    float alpha_g = dist(prnd->GetRndEngine());
-    float alpha_b = dist(prnd->GetRndEngine());
-    AdjustLightingImpl(outputs[0].dptr<uint8_t>(), inputs[0].dptr<uint8_t>(),
-                       alpha_r, alpha_g, alpha_b,
-                       param.eigval, param.eigvec, H, W);
+void RandomLighting(const nnvm::NodeAttrs &attrs,
+                    const OpContext &ctx,
+                    const std::vector<TBlob> &inputs,
+                    const std::vector<OpReqType> &req,
+                    const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  const RandomLightingParam &param = nnvm::get<RandomLightingParam>(attrs.parsed);
+  Stream<cpu> *s = ctx.get_stream<cpu>();
+  Random<cpu> *prnd = ctx.requested[0].get_random<cpu, float>(s);
+  std::normal_distribution<float> dist(0, param.alpha_std);
+  float alpha_r = dist(prnd->GetRndEngine());
+  float alpha_g = dist(prnd->GetRndEngine());
+  float alpha_b = dist(prnd->GetRndEngine());
+  AdjustLightingImpl({alpha_r, alpha_g, alpha_b}, ctx, inputs, req, outputs);
 }
 
-
-
-
+}  // namespace image
 }  // namespace op
 }  // namespace mxnet
 
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 29edeed..5a21bf8 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -30,6 +30,13 @@
 
 namespace mxnet {
 namespace op {
+namespace image {
+
+DMLC_REGISTER_PARAMETER(NormalizeParam);
+DMLC_REGISTER_PARAMETER(RandomEnhanceParam);
+DMLC_REGISTER_PARAMETER(AdjustLightingParam);
+DMLC_REGISTER_PARAMETER(RandomLightingParam);
+DMLC_REGISTER_PARAMETER(RandomColorJitterParam);
 
 NNVM_REGISTER_OP(_image_to_tensor)
 .describe(R"code()code" ADD_FILELINE)
@@ -42,13 +49,12 @@ NNVM_REGISTER_OP(_image_to_tensor)
 .add_argument("data", "NDArray-or-Symbol", "The input.");
 
 
-DMLC_REGISTER_PARAMETER(NormalizeParam);
 NNVM_REGISTER_OP(_image_normalize)
 .describe(R"code()code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<NormalizeParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferShape>("FInferShape", NormalizeShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
@@ -59,33 +65,31 @@ NNVM_REGISTER_OP(_image_normalize)
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(NormalizeParam::__FIELDS__());
 
-DMLC_REGISTER_PARAMETER(FlipParam);
-NNVM_REGISTER_OP(_image_flip)
+
+NNVM_REGISTER_OP(_image_random_horizontal_flip)
 .describe(R"code()code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr_parser(ParamParser<FlipParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
-                                [](const NodeAttrs& attrs){
-                                  return std::vector<std::pair<int, int> >{{0, 0}};
-                                })
-.set_attr<FCompute>("FCompute<cpu>", Flip)
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 0}};
+  })
+.set_attr<FCompute>("FCompute<cpu>", RandomHorizontalFlip)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
-.add_argument("data", "NDArray-or-Symbol", "The input.")
-.add_arguments(FlipParam::__FIELDS__());
+.add_argument("data", "NDArray-or-Symbol", "The input.");
+
 
-DMLC_REGISTER_PARAMETER(RandomBrightnessParam);
 NNVM_REGISTER_OP(_image_random_brightness)
 .describe(R"code()code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr_parser(ParamParser<RandomBrightnessParam>)
+.set_attr_parser(ParamParser<RandomEnhanceParam>)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
   return std::vector<ResourceRequest>{ResourceRequest::kRandom};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
@@ -94,18 +98,18 @@ NNVM_REGISTER_OP(_image_random_brightness)
 .set_attr<FCompute>("FCompute<cpu>", RandomBrightness)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
 .add_argument("data", "NDArray-or-Symbol", "The input.")
-.add_arguments(RandomBrightnessParam::__FIELDS__());
+.add_arguments(RandomEnhanceParam::__FIELDS__());
+
 
-DMLC_REGISTER_PARAMETER(RandomContrastParam);
 NNVM_REGISTER_OP(_image_random_contrast)
 .describe(R"code()code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr_parser(ParamParser<RandomContrastParam>)
+.set_attr_parser(ParamParser<RandomEnhanceParam>)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
   return std::vector<ResourceRequest>{ResourceRequest::kRandom};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
@@ -114,18 +118,18 @@ NNVM_REGISTER_OP(_image_random_contrast)
 .set_attr<FCompute>("FCompute<cpu>", RandomContrast)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
 .add_argument("data", "NDArray-or-Symbol", "The input.")
-.add_arguments(RandomContrastParam::__FIELDS__());
+.add_arguments(RandomEnhanceParam::__FIELDS__());
+
 
-DMLC_REGISTER_PARAMETER(RandomSaturationParam);
 NNVM_REGISTER_OP(_image_random_saturation)
 .describe(R"code()code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr_parser(ParamParser<RandomSaturationParam>)
+.set_attr_parser(ParamParser<RandomEnhanceParam>)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
   return std::vector<ResourceRequest>{ResourceRequest::kRandom};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
@@ -134,31 +138,44 @@ NNVM_REGISTER_OP(_image_random_saturation)
 .set_attr<FCompute>("FCompute<cpu>", RandomSaturation)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
 .add_argument("data", "NDArray-or-Symbol", "The input.")
-.add_arguments(RandomSaturationParam::__FIELDS__());
+.add_arguments(RandomEnhanceParam::__FIELDS__());
 
-DMLC_REGISTER_PARAMETER(RandomHueParam);
 NNVM_REGISTER_OP(_image_random_hue)
 .describe(R"code()code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
-.set_attr_parser(ParamParser<RandomHueParam>)
+.set_attr_parser(ParamParser<RandomEnhanceParam>)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
   return std::vector<ResourceRequest>{ResourceRequest::kRandom};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCompute>("FCompute<cpu>", RandomHue<cpu>)
+.set_attr<FCompute>("FCompute<cpu>", RandomHue)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
 .add_argument("data", "NDArray-or-Symbol", "The input.")
-.add_arguments(RandomHueParam::__FIELDS__());
+.add_arguments(RandomEnhanceParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_image_random_color_jitter)
+.describe(R"code()code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<RandomColorJitterParam>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
+  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
+})
+.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCompute>("FCompute<cpu>", RandomColorJitter)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
+.add_argument("data", "NDArray-or-Symbol", "The input.")
+.add_arguments(RandomColorJitterParam::__FIELDS__());
 
-DMLC_REGISTER_PARAMETER(AdjustLightingParam);
 NNVM_REGISTER_OP(_image_adjust_lighting)
 .describe(R"code(Adjust the lighting level of the input. Follow the AlexNet style.)code" ADD_FILELINE)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<AdjustLightingParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
@@ -169,7 +186,7 @@ NNVM_REGISTER_OP(_image_adjust_lighting)
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(AdjustLightingParam::__FIELDS__());
 
-DMLC_REGISTER_PARAMETER(RandomLightingParam);
+
 NNVM_REGISTER_OP(_image_random_lighting)
 .describe(R"code(Randomly add PCA noise. Follow the AlexNet style.)code" ADD_FILELINE)
 .set_num_inputs(1)
@@ -178,7 +195,7 @@ NNVM_REGISTER_OP(_image_random_lighting)
 .set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
   return std::vector<ResourceRequest>{ResourceRequest::kRandom};
 })
-.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
   [](const NodeAttrs& attrs){
@@ -189,5 +206,6 @@ NNVM_REGISTER_OP(_image_random_lighting)
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomLightingParam::__FIELDS__());
 
+}  // namespace image
 }  // namespace op
 }  // namespace mxnet

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 06/20: Add Gluon data transform (#8672)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 7999c43eb5046e1f741806581a830de7ac8ae87d
Author: Eric Junyuan Xie <pi...@users.noreply.github.com>
AuthorDate: Wed Nov 15 15:16:51 2017 -0800

    Add Gluon data transform (#8672)
    
    * fix
    
    * fix
    
    * fix
---
 python/mxnet/gluon/data/dataset.py                 |  43 +++++-
 python/mxnet/gluon/data/vision/__init__.py         |  22 +++
 .../gluon/data/{vision.py => vision/datasets.py}   |   0
 python/mxnet/gluon/data/vision/transforms.py       | 153 +++++++++++++++++++++
 src/operator/image/image_aug_op.h                  |  70 ++++++++++
 src/operator/image/image_random.cc                 |   4 +-
 6 files changed, 289 insertions(+), 3 deletions(-)

diff --git a/python/mxnet/gluon/data/dataset.py b/python/mxnet/gluon/data/dataset.py
index 2c46f1e..35d4c5c 100644
--- a/python/mxnet/gluon/data/dataset.py
+++ b/python/mxnet/gluon/data/dataset.py
@@ -18,12 +18,14 @@
 # coding: utf-8
 # pylint: disable=
 """Dataset container."""
-__all__ = ['Dataset', 'ArrayDataset', 'RecordFileDataset']
+__all__ = ['Dataset', 'SimpleDataset', 'ArrayDataset', 'LabeledDataset',
+           'RecordFileDataset']
 
 import os
 
 from ... import recordio, ndarray
 
+
 class Dataset(object):
     """Abstract dataset class. All datasets should have this interface.
 
@@ -38,6 +40,45 @@ class Dataset(object):
     def __len__(self):
         raise NotImplementedError
 
+    def transform(self, fn, lazy=True):
+        trans = _LazyTransformDataset(self, fn)
+        if lazy:
+            return trans
+        return SimpleDataset([i for i in trans])
+
+    def transform_first(self, fn, lazy=True):
+        def base_fn(x, *args):
+            if args:
+                return (fn(x),) + args
+            return fn(x)
+        return self.transform(base_fn, lazy)
+
+
+class SimpleDataset(Dataset):
+    def __init__(self, data):
+        self._data = data
+
+    def __len__(self):
+        return len(self._data)
+
+    def __getitem__(self, idx):
+        return self._data[idx]
+
+
+class _LazyTransformDataset(Dataset):
+    def __init__(self, data, fn):
+        self._data = data
+        self._fn = fn
+
+    def __len__(self):
+        return len(self._data)
+
+    def __getitem__(self, idx):
+        item = self._data[idx]
+        if isinstance(item, tuple):
+            return self._fn(*item)
+        return self._fn(item)
+
 
 class ArrayDataset(Dataset):
     """A dataset of multiple arrays.
diff --git a/python/mxnet/gluon/data/vision/__init__.py b/python/mxnet/gluon/data/vision/__init__.py
new file mode 100644
index 0000000..8837984
--- /dev/null
+++ b/python/mxnet/gluon/data/vision/__init__.py
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+
+from .datasets import *
+
+from . import transforms
diff --git a/python/mxnet/gluon/data/vision.py b/python/mxnet/gluon/data/vision/datasets.py
similarity index 100%
rename from python/mxnet/gluon/data/vision.py
rename to python/mxnet/gluon/data/vision/datasets.py
diff --git a/python/mxnet/gluon/data/vision/transforms.py b/python/mxnet/gluon/data/vision/transforms.py
new file mode 100644
index 0000000..fa7c0f2
--- /dev/null
+++ b/python/mxnet/gluon/data/vision/transforms.py
@@ -0,0 +1,153 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# coding: utf-8
+
+from .. import dataset
+from ...block import Block, HybridBlock
+from ...nn import Sequential, HybridSequential
+from .... import ndarray, initializer
+
+
+class Compose(Sequential):
+    def __init__(self, transforms):
+        super(Compose, self).__init__()
+        transforms.append(None)
+        hybrid = []
+        for i in transforms:
+            if isinstance(i, HybridBlock):
+                hybrid.append(i)
+                continue
+            elif len(hybrid) == 1:
+                self.register_child(hybrid[0])
+            elif len(hybrid) > 1:
+                hblock = HybridSequential()
+                for j in hybrid:
+                    hblock.add(j)
+                self.register_child(hblock)
+            if i is not None:
+                self.register_child(i)
+        self.hybridize()
+
+
+class Cast(HybridBlock):
+    def __init__(self, dtype='float32'):
+        super(Cast, self).__init__()
+        self._dtype = dtype
+
+    def hybrid_forward(self, F, x):
+        return F.cast(x, self._dtype)
+
+
+class ToTensor(HybridBlock):
+    def __init__(self):
+        super(ToTensor, self).__init__()
+
+    def hybrid_forward(self, F, x):
+        return F.cast(x, 'float32').transpose((2, 0, 1))
+
+
+class Normalize(HybridBlock):
+    def __init__(self, mean, std):
+        super(Normalize, self).__init__()
+        self._mean = mean
+        self._std = std
+
+    def hybrid_forward(self, F, x):
+        return F.image.normalize(x, self._mean, self._std)
+
+
+class RandomResizedCrop(HybridBlock):
+    def __init__(self, size, area=(0.08, 1.0), ratio=(3.0/4.0, 4.0/3.0),
+                 interpolation=2):
+        super(RandomResizedCrop, self).__init__()
+        self._args = (size, area, ratio, interpolation)
+
+    def hybrid_forward(self, F, x):
+        return F.image.random_resized_crop(x, *self._args)
+
+
+class CenterCrop(HybridBlock):
+    def __init__(self, size):
+        super(CenterCrop, self).__init__()
+        self._size = size
+
+    def hybrid_forward(self, F, x):
+        return F.image.center_crop(x, size)
+
+
+class Resize(HybridBlock):
+    def __init__(self, size, interpolation=2):
+        super(Resize, self).__init__()
+        self._args = (size, interpolation)
+
+    def hybrid_forward(self, F, x):
+        return F.image.resize(x, *self._args)
+
+
+class RandomFlip(HybridBlock):
+    def __init__(self, axis=1):
+        super(RandomFlip, self).__init__()
+        self._axis = axis
+
+    def hybrid_forward(self, F, x):
+        return F.image.random_flip(x, self._axis)
+
+
+class RandomBrightness(HybridBlock):
+    def __init__(self, max_brightness):
+        super(RandomBrightness, self).__init__()
+        self._max_brightness = max_brightness
+
+    def hybrid_forward(self, F, x):
+        return F.image.random_brightness(x, self._max_brightness)
+
+
+class RandomContrast(HybridBlock):
+    def __init__(self, max_contrast):
+        super(RandomContrast, self).__init__()
+        self._max_contrast = max_contrast
+
+    def hybrid_forward(self, F, x):
+        return F.image.random_contrast(x, self._max_contrast)
+
+
+class RandomSaturation(HybridBlock):
+    def __init__(self, max_saturation):
+        super(RandomSaturation, self).__init__()
+        self._max_saturation = max_saturation
+
+    def hybrid_forward(self, F, x):
+        return F.image.random_saturation(x, self._max_saturation)
+
+
+class RandomHue(HybridBlock):
+    def __init__(self, max_hue):
+        super(RandomHue, self).__init__()
+        self._max_hue = max_hue
+
+    def hybrid_forward(self, F, x):
+        return F.image.random_hue(x, self._max_hue)
+
+
+class RandomColorJitter(HybridBlock):
+    def __init__(self, max_brightness=0, max_contrast=0, max_saturation=0, max_hue=0):
+        super(RandomColorJitter, self).__init__()
+        self._args = (max_brightness, max_contrast, max_saturation, max_hue)
+
+    def hybrid_forward(self, F, x):
+        return F.image.random_color_jitter(x, *self._args)
diff --git a/src/operator/image/image_aug_op.h b/src/operator/image/image_aug_op.h
new file mode 100644
index 0000000..40315ec
--- /dev/null
+++ b/src/operator/image/image_aug_op.h
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef MXNET_OPERATOR_IMAGE_IMAGE_AUG_OP_H_
+#define MXNET_OPERATOR_IMAGE_IMAGE_AUG_OP_H_
+
+#include <mxnet/operator_util.h>
+#include <vector>
+#include <utility>
+#include <algorithm>
+#include "../mshadow_op.h"
+#include "../elemwise_op_common.h"
+#include "../mxnet_op.h"
+
+namespace mxnet {
+namespace op {
+
+struct NormalizeParam : public dmlc::Parameter<NormalizeParam> {
+  nnvm::Tuple<float> mean, std;
+  DMLC_DECLARE_PARAMETER(NormalizeParam) {
+    DMLC_DECLARE_FIELD(mean).set_default(nnvm::Tuple<float>({0.f}))
+      .describe("");
+    DMLC_DECLARE_FIELD(std).set_default(nnvm::Tuple<float>({1.f}))
+      .describe("");
+  }
+};
+
+
+void NormalizeCompute(const nnvm::NodeAttrs& attrs,
+                      const OpContext& ctx,
+                      const std::vector<NDArray>& inputs,
+                      const std::vector<OpReqType>& req,
+                      const std::vector<NDArray>& outputs) {
+  using namespace mxnet_op;
+  const auto& params = dmlc::get<NormalizeParam>(attrs.parsed);
+  CHECK_NE(req[0], kAddTo);
+  MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, {
+    auto num_channel = inputs[0].shape_[0];
+    auto size = inputs[0].Size(1, inputs[0].ndim());
+    nnvm::Tuple<DType> mean(params.mean.begin(), params.mean.end());
+    nnvm::Tuple<DType> std(params.std.begin(), params.std.end());
+    DType* src = inputs[0].dptr<DType>();
+    DType* dst = outputs[0].dptr<DType>();
+    for (int i = 0; i < num_channel; ++i) {
+      for (int j = 0; j < size; ++j, ++out, ++src) {
+        *out = (*src - mean[i]) / std[i];
+      }
+    }
+  });
+}
+
+}  // namespace op
+}  // namespace mxnet
+#endif  // MXNET_OPERATOR_IMAGE_IMAGE_AUG_OP_H_
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 83abc17..63f7904 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -25,8 +25,8 @@
 
 #include <mxnet/base.h>
 #include "./image_random-inl.h"
-#include "../../operator/operator_common.h"
-#include "../../operator/elemwise_op_common.h"
+#include "../operator_common.h"
+#include "../elemwise_op_common.h"
 
 
 

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 20/20: lint

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit ab0d1d5ea1bf00a27425ceb43f08a9820f4d6023
Author: Junyuan Xie <er...@gmail.com>
AuthorDate: Thu Jan 18 17:00:39 2018 -0800

    lint
---
 docker/install/python.sh                        |  4 ++--
 python/mxnet/gluon/data/vision/__init__.py      |  2 ++
 python/mxnet/gluon/data/vision/transforms.py    |  7 +++---
 python/mxnet/ndarray/__init__.py                |  3 ++-
 src/operator/image/image_random-inl.h           |  2 +-
 tests/python/unittest/test_gluon_data.py        | 21 -----------------
 tests/python/unittest/test_gluon_data_vision.py | 31 +++++++++++++++++++++----
 7 files changed, 37 insertions(+), 33 deletions(-)

diff --git a/docker/install/python.sh b/docker/install/python.sh
index 763f27b..ba71246 100755
--- a/docker/install/python.sh
+++ b/docker/install/python.sh
@@ -24,5 +24,5 @@ apt-get update && apt-get install -y python-dev python3-dev
 # the version of the pip shipped with ubuntu may be too lower, install a recent version here
 cd /tmp && wget https://bootstrap.pypa.io/get-pip.py && python3 get-pip.py && python2 get-pip.py
 
-pip2 install nose pylint numpy nose-timer requests
-pip3 install nose pylint numpy nose-timer requests
+pip2 install nose pylint numpy nose-timer requests Pillow
+pip3 install nose pylint numpy nose-timer requests Pillow
diff --git a/python/mxnet/gluon/data/vision/__init__.py b/python/mxnet/gluon/data/vision/__init__.py
index 8837984..1c2b7ba 100644
--- a/python/mxnet/gluon/data/vision/__init__.py
+++ b/python/mxnet/gluon/data/vision/__init__.py
@@ -16,6 +16,8 @@
 # under the License.
 
 # coding: utf-8
+# pylint: disable=wildcard-import
+"""Vision utilities."""
 
 from .datasets import *
 
diff --git a/python/mxnet/gluon/data/vision/transforms.py b/python/mxnet/gluon/data/vision/transforms.py
index 38eb690..8a87f37 100644
--- a/python/mxnet/gluon/data/vision/transforms.py
+++ b/python/mxnet/gluon/data/vision/transforms.py
@@ -16,12 +16,13 @@
 # under the License.
 
 # coding: utf-8
+# pylint: disable= arguments-differ
+"Image transforms."
 
-from .. import dataset
 from ...block import Block, HybridBlock
 from ...nn import Sequential, HybridSequential
-from .... import ndarray, initializer, image
-from ....base import _Null, numeric_types
+from .... import image
+from ....base import numeric_types
 
 
 class Compose(Sequential):
diff --git a/python/mxnet/ndarray/__init__.py b/python/mxnet/ndarray/__init__.py
index 86a3a20..fc4a55d 100644
--- a/python/mxnet/ndarray/__init__.py
+++ b/python/mxnet/ndarray/__init__.py
@@ -31,4 +31,5 @@ from .utils import load, save, zeros, empty, array
 from .sparse import _ndarray_cls
 from .ndarray import _GRAD_REQ_MAP
 
-__all__ = op.__all__ + ndarray.__all__ + utils.__all__ + ['contrib', 'linalg', 'random', 'sparse', 'image']
+__all__ = op.__all__ + ndarray.__all__ + utils.__all__ + \
+          ['contrib', 'linalg', 'random', 'sparse', 'image']
diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index ec96149..47beca1 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -175,7 +175,7 @@ template<typename DType, int axis>
 void FlipImpl(const TShape &shape, DType *src, DType *dst) {
   int head = 1, mid = shape[axis], tail = 1;
   for (int i = 0; i < axis; ++i) head *= shape[i];
-  for (int i = axis+1; i < shape.ndim(); ++i) tail *= shape[i];
+  for (uint32_t i = axis+1; i < shape.ndim(); ++i) tail *= shape[i];
 
   for (int i = 0; i < head; ++i) {
     for (int j = 0; j < (mid >> 1); ++j) {
diff --git a/tests/python/unittest/test_gluon_data.py b/tests/python/unittest/test_gluon_data.py
index c72ef7c..63c5d28 100644
--- a/tests/python/unittest/test_gluon_data.py
+++ b/tests/python/unittest/test_gluon_data.py
@@ -107,27 +107,6 @@ def test_multi_worker():
         assert (batch.asnumpy() == i).all()
 
 
-def test_transformer():
-    from mxnet.gluon.data.vision import transforms
-
-    transform = transforms.Compose([
-		transforms.Resize(300),
-		transforms.CenterCrop(256),
-		transforms.RandomResizedCrop(224),
-		transforms.RandomHorizontalFlip(),
-		transforms.RandomColorJitter(0.1, 0.1, 0.1, 0.1),
-		transforms.RandomBrightness(0.1),
-		transforms.RandomContrast(0.1),
-		transforms.RandomSaturation(0.1),
-		transforms.RandomHue(0.1),
-		transforms.RandomLighting(0.1),
-		transforms.ToTensor(),
-		transforms.Normalize([0, 0, 0], [1, 1, 1])])
-
-    transform(mx.nd.ones((245, 480, 3), dtype='uint8')).wait_to_read()
-
-
 if __name__ == '__main__':
-    test_transformer()
     import nose
     nose.runmodule()
diff --git a/tests/python/unittest/test_gluon_data_vision.py b/tests/python/unittest/test_gluon_data_vision.py
index 5e9ff87..52e9d4a 100644
--- a/tests/python/unittest/test_gluon_data_vision.py
+++ b/tests/python/unittest/test_gluon_data_vision.py
@@ -18,7 +18,6 @@ from __future__ import print_function
 import mxnet as mx
 import mxnet.ndarray as nd
 import numpy as np
-from PIL import Image
 from mxnet import gluon
 from mxnet.gluon.data.vision import transforms
 from mxnet.test_utils import assert_almost_equal
@@ -42,15 +41,37 @@ def test_normalize():
 
 def test_flip_left_right():
     data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
-    pil_img = Image.fromarray(data_in).transpose(Image.FLIP_LEFT_RIGHT)
+    flip_in = data_in[:, ::-1, :]
     data_trans = nd.image.flip_left_right(nd.array(data_in, dtype='uint8'))
-    assert_almost_equal(np.array(pil_img), data_trans.asnumpy())
+    assert_almost_equal(flip_in, data_trans.asnumpy())
 
 def test_flip_top_bottom():
     data_in = np.random.uniform(0, 255, (300, 300, 3)).astype(dtype=np.uint8)
-    pil_img = Image.fromarray(data_in).transpose(Image.FLIP_TOP_BOTTOM)
+    flip_in = data_in[::-1, :, :]
     data_trans = nd.image.flip_top_bottom(nd.array(data_in, dtype='uint8'))
-    assert_almost_equal(np.array(pil_img), data_trans.asnumpy())
+    assert_almost_equal(flip_in, data_trans.asnumpy())
+
+
+def test_transformer():
+    from mxnet.gluon.data.vision import transforms
+
+    transform = transforms.Compose([
+		transforms.Resize(300),
+		transforms.CenterCrop(256),
+		transforms.RandomResizedCrop(224),
+		transforms.RandomFlipLeftRight(),
+		transforms.RandomColorJitter(0.1, 0.1, 0.1, 0.1),
+		transforms.RandomBrightness(0.1),
+		transforms.RandomContrast(0.1),
+		transforms.RandomSaturation(0.1),
+		transforms.RandomHue(0.1),
+		transforms.RandomLighting(0.1),
+		transforms.ToTensor(),
+		transforms.Normalize([0, 0, 0], [1, 1, 1])])
+
+    transform(mx.nd.ones((245, 480, 3), dtype='uint8')).wait_to_read()
+
+
 
 if __name__ == '__main__':
     import nose

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 08/20: [Image OP] Normalize (#8731)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 6b06bf28d446253fe408d57a5838aa25e9fc0eb8
Author: Yizhi Liu <ja...@gmail.com>
AuthorDate: Mon Nov 20 11:57:22 2017 -0800

    [Image OP] Normalize (#8731)
    
    * image normalize op
    
    * image normalize op param check
---
 src/operator/image/image_random-inl.h | 93 +++++++++++++++++++++++++++++++++++
 src/operator/image/image_random.cc    | 34 ++++++++++++-
 2 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index c50ecb7..6f9cdc0 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -106,12 +106,105 @@ static void ToTensor(const nnvm::NodeAttrs &attrs,
   });
 }
 
+struct NormalizeParam : public dmlc::Parameter<NormalizeParam> {
+  nnvm::Tuple<float> mean;
+  nnvm::Tuple<float> std;
+  DMLC_DECLARE_PARAMETER(NormalizeParam) {
+    DMLC_DECLARE_FIELD(mean)
+    .describe("Sequence of mean for each channel.");
+    DMLC_DECLARE_FIELD(std)
+    .describe("Sequence of standard deviations for each channel.");
+  }
+};
+
+struct normalize {
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in,
+                                  const OpReqType req,
+                                  const int nchannel, const int size,
+                                  const float *mean, const float *std) {
+    int c = 0;
+    switch (nchannel) {
+      case 1:
+        break;
+      case 3:
+        if (i < size) {
+          c = 0;
+        } else if (i < (size << 1)) {
+          c = 1;
+        } else {
+          c = 2;
+        }
+        break;
+      default:
+        LOG(FATAL) << "not support channel" << nchannel;
+    }
+    float m = (mean ? mean[c] : 0);
+    KERNEL_ASSIGN(out[i], req, static_cast<DType>((in[i] - m) / std[c]));
+  }
+};
+
+static void NormalizeCheckParam(const nnvm::Tuple<float> &mean,
+                                const nnvm::Tuple<float> &std,
+                                const int nchannel) {
+  CHECK(mean.ndim() == 1 || mean.ndim() == 3)
+    << "Mean must be in dimension 1 or 3.";
+  CHECK(std.ndim() == 1 || std.ndim() == 3)
+    << "Standard deviations must be in dimension 1 or 3.";
+  CHECK(nchannel == 1 || nchannel == 3) << "Image channel must be 1 or 3.";
+  CHECK_EQ(mean.ndim(), nchannel)
+    << "Mean dimension does not agree with image channel.";
+  CHECK_EQ(std.ndim(), nchannel)
+    << "Standard deviations dimension does not agree with image channel.";
+  for (uint32_t c = 0; c < std.ndim(); ++c) {
+    CHECK(std[c] > 0) << "Invalid standard deviation " << std[c];
+  }
+}
+
 template<typename xpu>
 static void Normalize(const nnvm::NodeAttrs &attrs,
                       const OpContext &ctx,
                       const std::vector<TBlob> &inputs,
                       const std::vector<OpReqType> &req,
                       const std::vector<TBlob> &outputs) {
+  const NormalizeParam &param = nnvm::get<NormalizeParam>(attrs.parsed);
+  auto mean = param.mean;
+  auto std = param.std;
+
+  int nchannel = inputs[0].shape_[0];
+  NormalizeCheckParam(mean, std, nchannel);
+
+  int size = inputs[0].Size() / nchannel;
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
+    MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+      mxnet_op::Kernel<normalize, xpu>::Launch(
+        s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(),
+        Req, nchannel, size, mean.begin(), std.begin());
+    });
+  });
+}
+
+template<typename xpu>
+static void NormalizeBackward(const nnvm::NodeAttrs &attrs,
+                              const OpContext &ctx,
+                              const std::vector<TBlob> &inputs,
+                              const std::vector<OpReqType> &req,
+                              const std::vector<TBlob> &outputs) {
+  const NormalizeParam &param = nnvm::get<NormalizeParam>(attrs.parsed);
+  int nchannel = inputs[0].shape_[0];
+
+  NormalizeCheckParam(param.mean, param.std, nchannel);
+
+  int size = inputs[0].Size() / nchannel;
+  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
+  MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
+    MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+      mxnet_op::Kernel<normalize, xpu>::Launch(
+        s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(),
+        Req, nchannel, size, nullptr, param.std.begin());
+      });
+  });
 }
 
 struct RandomBrightnessParam : public dmlc::Parameter<RandomBrightnessParam> {
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 26fa843..e32a677 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -28,8 +28,6 @@
 #include "../operator_common.h"
 #include "../elemwise_op_common.h"
 
-
-
 namespace mxnet {
 namespace op {
 
@@ -46,6 +44,38 @@ NNVM_REGISTER_OP(_image_to_tensor)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
 .add_argument("data", "NDArray-or-Symbol", "The input.");
 
+DMLC_REGISTER_PARAMETER(NormalizeParam);
+NNVM_REGISTER_OP(_image_normalize)
+.describe(R"code()code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<NormalizeParam>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
+  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
+})
+.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+[](const NodeAttrs& attrs){
+  return std::vector<std::pair<int, int> >{{0, 0}};
+})
+.set_attr<FCompute>("FCompute<cpu>", Normalize<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_image_backward_normalize" })
+.add_argument("data", "NDArray-or-Symbol", "The input.")
+.add_arguments(NormalizeParam::__FIELDS__());
+
+NNVM_REGISTER_OP(_image_backward_normalize)
+.describe(R"code()code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<NormalizeParam>)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+[](const NodeAttrs& attrs){
+  return std::vector<std::pair<int, int> >{{0, 0}};
+})
+.set_attr<FCompute>("FCompute<cpu>", NormalizeBackward<cpu>);
+
 DMLC_REGISTER_PARAMETER(RandomBrightnessParam);
 NNVM_REGISTER_OP(_image_random_brightness)
 .describe(R"code()code" ADD_FILELINE)

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 14/20: image flip op (#8759)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit a15a6e7c491912c84b588ae7aa606ae8d4f48df9
Author: Yizhi Liu <ja...@gmail.com>
AuthorDate: Sun Nov 26 17:20:55 2017 -0800

    image flip op (#8759)
    
    * image flip op
    
    * rm image_common.h
    
    * fix
    
    * lint code
    
    * flip optimize
---
 src/operator/image/image_random-inl.h | 66 +++++++++++++++++++++++++++++++++--
 src/operator/image/image_random.cc    | 16 +++++++++
 2 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index ebbf60a..5c552b2 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -28,14 +28,20 @@
 #include <mxnet/base.h>
 #include <algorithm>
 #include <vector>
-#include <opencv2/opencv.hpp>
-#include <opencv2/core/mat.hpp>
+#include <algorithm>
+#include <utility>
 #include "../mxnet_op.h"
 #include "../operator_common.h"
 
 namespace mxnet {
 namespace op {
 
+inline bool CheckIsImage(const TBlob &image) {
+  CHECK_EQ(image.type_flag_, mshadow::kUint8) << "input type is not an image.";
+  CHECK_EQ(image.ndim(), 3) << "input dimension is not 3.";
+  CHECK(image.shape_[2] == 1 || image.shape_[2] == 3) << "image channel should be 1 or 3.";
+}
+
 static void RandomFlip(const nnvm::NodeAttrs &attrs,
                        const OpContext &ctx,
                        const std::vector<TBlob> &inputs,
@@ -76,6 +82,7 @@ static void ToTensor(const nnvm::NodeAttrs &attrs,
                      const std::vector<TBlob> &outputs) {
   CHECK_EQ(req[0], kWriteTo)
     << "`to_tensor` does not support inplace";
+  CheckIsImage(inputs[0]);
 
   int length = inputs[0].shape_[0] * inputs[0].shape_[1];
   int channel = inputs[0].shape_[2];
@@ -101,7 +108,6 @@ struct NormalizeParam : public dmlc::Parameter<NormalizeParam> {
   }
 };
 
-
 inline bool NormalizeShape(const nnvm::NodeAttrs& attrs,
                           std::vector<TShape> *in_attrs,
                           std::vector<TShape> *out_attrs) {
@@ -145,6 +151,60 @@ static void Normalize(const nnvm::NodeAttrs &attrs,
   });
 }
 
+struct FlipParam : public dmlc::Parameter<FlipParam> {
+  int axis;
+  DMLC_DECLARE_PARAMETER(FlipParam) {
+    DMLC_DECLARE_FIELD(axis)
+    .describe("0 or 1. 0 for horizontal flip, 1 for vertical flip.");
+  }
+};
+
+#define SWAP_IF_INPLACE(dst, dst_idx, src, src_idx) \
+  if (dst == src) {                                 \
+    std::swap(dst[dst_idx], src[src_idx]);          \
+  } else {                                          \
+    dst[dst_idx] = src[src_idx];                    \
+  }
+
+template<typename DType>
+static void FlipImpl(const TShape &shape, DType *src, DType *dst, int axis) {
+  const int height = shape[0];
+  const int width = shape[1];
+  const int nchannel = shape[2];
+
+  const int length = width * nchannel;
+  const int height_stride = (src == dst && axis == 1) ? (height >> 1) : height;
+  const int width_stride = (src == dst && axis == 0) ? (width >> 1) : width;
+
+  for (int h = 0; h < height_stride; ++h) {
+    const int h_dst = (axis == 0) ? h : (height - h);
+    for (int w = 0; w < width_stride; ++w) {
+      const int w_dst = (axis == 0) ? (width - w) : w;
+      const int idx_dst = h_dst * length + w_dst * nchannel;
+      const int idx_src = h * length + w * nchannel;
+      SWAP_IF_INPLACE(dst, idx_dst, src, idx_src);
+      if (nchannel > 1) {
+        SWAP_IF_INPLACE(dst, idx_dst + 1, src, idx_src + 1);
+        SWAP_IF_INPLACE(dst, idx_dst + 2, src, idx_src + 2);
+      }
+    }
+  }
+}
+
+static void Flip(const nnvm::NodeAttrs &attrs,
+                  const OpContext &ctx,
+                  const std::vector<TBlob> &inputs,
+                  const std::vector<OpReqType> &req,
+                  const std::vector<TBlob> &outputs) {
+  const FlipParam &param = nnvm::get<FlipParam>(attrs.parsed);
+  CHECK(param.axis == 0 || param.axis == 1) << "flip axis must be 0 or 1.";
+  CheckIsImage(inputs[0]);
+  const TShape& ishape = inputs[0].shape_;
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    FlipImpl(ishape, inputs[0].dptr<DType>(), outputs[0].dptr<DType>(), param.axis);
+  });
+}
+
 struct RandomBrightnessParam : public dmlc::Parameter<RandomBrightnessParam> {
   float max_brightness;
   DMLC_DECLARE_PARAMETER(RandomBrightnessParam) {
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 5b47f50..4184382 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -59,6 +59,22 @@ NNVM_REGISTER_OP(_image_normalize)
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(NormalizeParam::__FIELDS__());
 
+DMLC_REGISTER_PARAMETER(FlipParam);
+NNVM_REGISTER_OP(_image_flip)
+.describe(R"code()code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<FlipParam>)
+.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+                                [](const NodeAttrs& attrs){
+                                  return std::vector<std::pair<int, int> >{{0, 0}};
+                                })
+.set_attr<FCompute>("FCompute<cpu>", Flip)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
+.add_argument("data", "NDArray-or-Symbol", "The input.")
+.add_arguments(FlipParam::__FIELDS__());
 
 DMLC_REGISTER_PARAMETER(RandomBrightnessParam);
 NNVM_REGISTER_OP(_image_random_brightness)

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 10/20: add test script

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 6cfd5b46b9208d4e47482739732266c598e514df
Author: Xingjian Shi <xs...@ust.hk>
AuthorDate: Tue Nov 21 11:07:21 2017 -0800

    add test script
---
 test_new_image_loader.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/test_new_image_loader.py b/test_new_image_loader.py
new file mode 100644
index 0000000..296869e
--- /dev/null
+++ b/test_new_image_loader.py
@@ -0,0 +1,34 @@
+import os
+os.environ['MXNET_CPU_WORKER_NTHREADS'] = '1'
+os.environ['OMP_NUM_THREADS'] = '1'
+import time
+import numpy as np
+import multiprocessing as mp
+import mxnet as mx
+from mxnet import gluon as gl
+from mxnet.gluon.data.vision import transforms
+
+if __name__ == '__main__':
+	M = 24
+	BS = 100
+
+	dataset = gl.data.vision.ImageFolderDataset('../256_ObjectCategories')
+	transform = transforms.Compose([transforms.ToTensor(),
+									transforms.RandomBrightness(1.0),
+									transforms.RandomContrast(1.0),
+									transforms.RandomSaturation(1.0),
+									transforms.Normalize([0, 0, 0], [1, 1, 1])])
+	dataset = dataset.transform_first(lambda x: transform(mx.image.center_crop(x, (224, 224))[0]))
+	data_loader = gl.data.DataLoader(dataset, BS, shuffle=True, num_workers=M)
+
+	N = len(dataset)
+
+	iterator = iter(data_loader)
+
+	tic = time.time()
+
+	for data, label in iterator:
+		data.wait_to_read()
+		print(data.shape)
+
+	print(N/(time.time() - tic))

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 05/20: fix image_random compile (#8665)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit e81f2dd187373457e8cead488a80756e869cea96
Author: Yizhi Liu <ja...@gmail.com>
AuthorDate: Wed Nov 15 10:40:34 2017 -0800

    fix image_random compile (#8665)
---
 src/operator/image/image_random-inl.h | 2 +-
 src/operator/image/image_random.cc    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index fa1d6dc..8a3acf6 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -31,7 +31,7 @@
 #include <opencv2/core/mat.hpp>
 #include "../mxnet_op.h"
 #include "image_common.h"
-#include "operator/operator_common.h"
+#include "../../operator/operator_common.h"
 
 
 namespace mxnet {
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 32648bb..83abc17 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -25,8 +25,8 @@
 
 #include <mxnet/base.h>
 #include "./image_random-inl.h"
-#include "operator/operator_common.h"
-#include "operator/elemwise_op_common.h"
+#include "../../operator/operator_common.h"
+#include "../../operator/elemwise_op_common.h"
 
 
 

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 17/20: fix (#8857)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit f74f6e7c6a91b4d0ad6bd4377e16480f943ff54e
Author: Eric Junyuan Xie <pi...@users.noreply.github.com>
AuthorDate: Tue Nov 28 17:28:58 2017 -0800

    fix (#8857)
    
    use macro for registration
---
 src/operator/image/image_random-inl.h |  22 +++++++
 src/operator/image/image_random.cc    | 111 ++++------------------------------
 2 files changed, 34 insertions(+), 99 deletions(-)

diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index 9d10a30..cbc7f40 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -658,6 +658,28 @@ void RandomLighting(const nnvm::NodeAttrs &attrs,
   AdjustLightingImpl({alpha_r, alpha_g, alpha_b}, ctx, inputs, req, outputs);
 }
 
+
+#define MXNET_REGISTER_IMAGE_AUG_OP(name)                                   \
+  NNVM_REGISTER_OP(name)                                                    \
+  .set_num_inputs(1)                                                        \
+  .set_num_outputs(1)                                                       \
+  .set_attr<nnvm::FInplaceOption>("FInplaceOption",                         \
+    [](const NodeAttrs& attrs){                                             \
+      return std::vector<std::pair<int, int> >{{0, 0}};                     \
+    })                                                                      \
+  .set_attr<nnvm::FInferShape>("FInferShape", ImageShape)                   \
+  .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)             \
+  .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })   \
+  .add_argument("data", "NDArray-or-Symbol", "The input.")
+
+
+#define MXNET_REGISTER_IMAGE_RND_AUG_OP(name)                               \
+  MXNET_REGISTER_IMAGE_AUG_OP(name)                                         \
+  .set_attr<FResourceRequest>("FResourceRequest",                           \
+    [](const NodeAttrs& attrs) {                                            \
+      return std::vector<ResourceRequest>{ResourceRequest::kRandom};        \
+    })
+
 }  // namespace image
 }  // namespace op
 }  // namespace mxnet
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 5a21bf8..481dfce 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -66,144 +66,57 @@ NNVM_REGISTER_OP(_image_normalize)
 .add_arguments(NormalizeParam::__FIELDS__());
 
 
-NNVM_REGISTER_OP(_image_random_horizontal_flip)
+MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_horizontal_flip)
 .describe(R"code()code" ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<nnvm::FInplaceOption>("FInplaceOption",
-  [](const NodeAttrs& attrs){
-    return std::vector<std::pair<int, int> >{{0, 0}};
-  })
-.set_attr<FCompute>("FCompute<cpu>", RandomHorizontalFlip)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
-.add_argument("data", "NDArray-or-Symbol", "The input.");
+.set_attr<FCompute>("FCompute<cpu>", RandomHorizontalFlip);
 
 
-NNVM_REGISTER_OP(_image_random_brightness)
+MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_brightness)
 .describe(R"code()code" ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
 .set_attr_parser(ParamParser<RandomEnhanceParam>)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
-  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
-})
-.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<nnvm::FInplaceOption>("FInplaceOption",
-  [](const NodeAttrs& attrs){
-    return std::vector<std::pair<int, int> >{{0, 0}};
-  })
 .set_attr<FCompute>("FCompute<cpu>", RandomBrightness)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
-.add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomEnhanceParam::__FIELDS__());
 
 
-NNVM_REGISTER_OP(_image_random_contrast)
+MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_contrast)
 .describe(R"code()code" ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
 .set_attr_parser(ParamParser<RandomEnhanceParam>)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
-  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
-})
-.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<nnvm::FInplaceOption>("FInplaceOption",
-  [](const NodeAttrs& attrs){
-    return std::vector<std::pair<int, int> >{{0, 0}};
-  })
 .set_attr<FCompute>("FCompute<cpu>", RandomContrast)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
-.add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomEnhanceParam::__FIELDS__());
 
 
-NNVM_REGISTER_OP(_image_random_saturation)
+MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_saturation)
 .describe(R"code()code" ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
 .set_attr_parser(ParamParser<RandomEnhanceParam>)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
-  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
-})
-.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<nnvm::FInplaceOption>("FInplaceOption",
-  [](const NodeAttrs& attrs){
-    return std::vector<std::pair<int, int> >{{0, 0}};
-  })
 .set_attr<FCompute>("FCompute<cpu>", RandomSaturation)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
-.add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomEnhanceParam::__FIELDS__());
 
-NNVM_REGISTER_OP(_image_random_hue)
+
+MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_hue)
 .describe(R"code()code" ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
 .set_attr_parser(ParamParser<RandomEnhanceParam>)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
-  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
-})
-.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", RandomHue)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
-.add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomEnhanceParam::__FIELDS__());
 
-NNVM_REGISTER_OP(_image_random_color_jitter)
+
+MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_color_jitter)
 .describe(R"code()code" ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
 .set_attr_parser(ParamParser<RandomColorJitterParam>)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
-  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
-})
-.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", RandomColorJitter)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
-.add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomColorJitterParam::__FIELDS__());
 
-NNVM_REGISTER_OP(_image_adjust_lighting)
+
+MXNET_REGISTER_IMAGE_AUG_OP(_image_adjust_lighting)
 .describe(R"code(Adjust the lighting level of the input. Follow the AlexNet style.)code" ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
 .set_attr_parser(ParamParser<AdjustLightingParam>)
-.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<nnvm::FInplaceOption>("FInplaceOption",
-  [](const NodeAttrs& attrs){
-    return std::vector<std::pair<int, int> >{{0, 0}};
-  })
 .set_attr<FCompute>("FCompute<cpu>", AdjustLighting)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
-.add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(AdjustLightingParam::__FIELDS__());
 
 
-NNVM_REGISTER_OP(_image_random_lighting)
+MXNET_REGISTER_IMAGE_RND_AUG_OP(_image_random_lighting)
 .describe(R"code(Randomly add PCA noise. Follow the AlexNet style.)code" ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
 .set_attr_parser(ParamParser<RandomLightingParam>)
-.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
-  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
-})
-.set_attr<nnvm::FInferShape>("FInferShape", ImageShape)
-.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<nnvm::FInplaceOption>("FInplaceOption",
-  [](const NodeAttrs& attrs){
-    return std::vector<std::pair<int, int> >{{0, 0}};
-  })
 .set_attr<FCompute>("FCompute<cpu>", RandomLighting)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
-.add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomLightingParam::__FIELDS__());
 
 }  // namespace image

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 15/20: [WIP]hue (#8678)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 9dda92009844da33c30e51b454aead0bb749a3ba
Author: Hu Shiwen <ya...@gmail.com>
AuthorDate: Tue Nov 28 08:57:51 2017 +0800

    [WIP]hue (#8678)
    
    * add hue
    
    * fix
    
    * up sub
    
    * fix lnit
---
 src/operator/image/image_random-inl.h | 152 ++++++++++++++++++++++++++++++++++
 src/operator/image/image_random.cc    |  16 ++++
 2 files changed, 168 insertions(+)

diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index 5c552b2..3bee843 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -25,9 +25,12 @@
 #ifndef MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
 #define MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
 
+
 #include <mxnet/base.h>
 #include <algorithm>
 #include <vector>
+#include <cmath>
+#include <limits>
 #include <algorithm>
 #include <utility>
 #include "../mxnet_op.h"
@@ -337,11 +340,160 @@ static void RandomSaturation(const nnvm::NodeAttrs &attrs,
   }
 }
 
+struct RandomHueParam : public dmlc::Parameter<RandomHueParam> {
+  float max_hue;
+  DMLC_DECLARE_PARAMETER(RandomHueParam) {
+    DMLC_DECLARE_FIELD(max_hue)
+    .set_default(0.0)
+    .describe("Max Hue.");
+  }
+};
+
+template <typename DType> static
+void RGB2HLSConvert(const DType src_r,
+                    const DType src_g,
+                    const DType src_b,
+                    DType *dst_h,
+                    DType *dst_l,
+                    DType *dst_s
+                   ) {
+  DType b = src_b, g = src_g, r = src_r;
+  DType h = 0.f, s = 0.f, l;
+  DType vmin;
+  DType vmax;
+  DType diff;
+
+  vmax = vmin = r;
+  vmax = fmax(vmax, g);
+  vmax = fmax(vmax, b);
+  vmin = fmin(vmin, g);
+  vmin = fmin(vmin, b);
+
+  diff = vmax - vmin;
+  l = (vmax + vmin) * 0.5f;
+
+  if (diff > std::numeric_limits<DType>::epsilon()) {
+    s = (l < 0.5f) * diff / (vmax + vmin);
+    s += (l >= 0.5f) * diff / (2.0f - vmax - vmin);
+
+    diff = 60.f / diff;
+
+    h = (vmax == r) * (g - b) * diff;
+    h += (vmax != r && vmax == g) * ((b - r) * diff + 120.f);
+    h += (vmax != r && vmax != g) * ((r - g) * diff + 240.f);
+    h += (h < 0.f) * 360.f;
+  }
+
+  *dst_h = h;
+  *dst_l = l;
+  *dst_s = s;
+}
+
+
+static  int c_HlsSectorData[6][3] = {
+  { 1, 3, 0 },
+  { 1, 0, 2 },
+  { 3, 0, 1 },
+  { 0, 2, 1 },
+  { 0, 1, 3 },
+  { 2, 1, 0 }
+};
+
+template <typename DType>  static  void HLS2RGBConvert(const DType src_h,
+    const DType src_l,
+    const DType src_s,
+    DType *dst_r,
+    DType *dst_g,
+    DType *dst_b) {
+
+
+  float h = src_h, l = src_l, s = src_s;
+  float b = l, g = l, r = l;
+
+  if (s != 0) {
+    float p2 = (l <= 0.5f) * l * (1 + s);
+    p2 += (l > 0.5f) * (l + s - l * s);
+    float p1 = 2 * l - p2;
+
+    if (h < 0) {
+      do { h += 6; } while (h < 0);
+    } else if (h >= 6) {
+      do { h -= 6; } while (h >= 6);
+    }
+
+    int sector = static_cast<int>(h);
+
+    h -= sector;
+
+    float tab[4];
+    tab[0] = p2;
+    tab[1] = p1;
+    tab[2] = p1 + (p2 - p1) * (1 - h);
+    tab[3] = p1 + (p2 - p1) * h;
+
+    b = tab[c_HlsSectorData[sector][0]];
+    g = tab[c_HlsSectorData[sector][1]];
+    r = tab[c_HlsSectorData[sector][2]];
+  }
+
+  *dst_b = b;
+  *dst_g = g;
+  *dst_r = r;
+}
+
+template<typename xpu, typename DType>
+static  void RandomHueKernal(const TBlob &input,
+                             const TBlob &output,
+                             Stream<xpu> *s,
+                             int hight,
+                             int weight,
+                             DType alpha) {
+  auto input_3d = input.get<xpu, 3, DType>(s);
+  auto output_3d = output.get<xpu, 3, DType>(s);
+  for (int h_index = 0; h_index < hight; ++h_index) {
+    for (int w_index = 0; w_index < weight; ++w_index) {
+      DType h;
+      DType l;
+      DType s;
+      RGB2HLSConvert(input_3d[0][h_index][w_index],
+                     input_3d[1][h_index][w_index],
+                     input_3d[2][h_index][w_index],
+                     &h, &l, &s);
+      h += alpha;
+      h = std::max(DType(0), std::min(DType(180), h));
+
+      HLS2RGBConvert(
+        h, l, s,
+        &output_3d[0][h_index][w_index],
+        &output_3d[1][h_index][w_index],
+        &output_3d[2][h_index][w_index]);
+    }
+  }
+}
+
+template<typename xpu>
 static void RandomHue(const nnvm::NodeAttrs &attrs,
                       const OpContext &ctx,
                       const std::vector<TBlob> &inputs,
                       const std::vector<OpReqType> &req,
                       const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  auto input = inputs[0];
+  auto output = outputs[0];
+  int channel = input.shape_[0];
+  int hight = input.shape_[1];
+  int weight = input.shape_[2];
+  Stream<xpu> *s = ctx.get_stream<xpu>();
+  Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
+
+  const RandomHueParam &param = nnvm::get<RandomHueParam>(attrs.parsed);
+  float alpha =  std::uniform_real_distribution<float>(
+    -param.max_hue, param.max_hue)(prnd->GetRndEngine());
+  auto output_float = output.get<xpu, 3, float>(s);
+
+  MSHADOW_TYPE_SWITCH(input.type_flag_, DType, {
+    RandomHueKernal<xpu, DType>(input, output, s, hight, weight, alpha);
+  });
 }
 
 static void RandomColorJitter(const nnvm::NodeAttrs &attrs,
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 4184382..29edeed 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -136,6 +136,22 @@ NNVM_REGISTER_OP(_image_random_saturation)
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomSaturationParam::__FIELDS__());
 
+DMLC_REGISTER_PARAMETER(RandomHueParam);
+NNVM_REGISTER_OP(_image_random_hue)
+.describe(R"code()code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<RandomHueParam>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
+  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
+})
+.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCompute>("FCompute<cpu>", RandomHue<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
+.add_argument("data", "NDArray-or-Symbol", "The input.")
+.add_arguments(RandomHueParam::__FIELDS__());
+
 DMLC_REGISTER_PARAMETER(AdjustLightingParam);
 NNVM_REGISTER_OP(_image_adjust_lighting)
 .describe(R"code(Adjust the lighting level of the input. Follow the AlexNet style.)code" ADD_FILELINE)

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 01/20: [WIP]Image Augmenter (#8633)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 3aef4e85bcf2bab46e304fd32088ec6da2f9f0f7
Author: Hu Shiwen <ya...@gmail.com>
AuthorDate: Tue Nov 14 08:32:47 2017 +0800

    [WIP]Image Augmenter (#8633)
    
    * add file
    
    * add random_brightness
    add python mx.sym/nd.image
    
    * fix lint
    
    * add image/image_common.h
    
    * add RandomContrast
    
    * change name
    
    * fix
---
 python/mxnet/base.py                               |  8 +-
 python/mxnet/ndarray/__init__.py                   |  4 +-
 .../mxnet/{symbol/__init__.py => ndarray/image.py} | 15 ++--
 python/mxnet/symbol/__init__.py                    |  4 +-
 python/mxnet/symbol/{__init__.py => image.py}      | 15 ++--
 src/operator/batch_norm_v1-inl.h                   |  2 +-
 src/operator/image/image_common.h                  | 88 +++++++++++++++++++
 src/operator/image/image_random-inl.h              | 99 ++++++++++++++++++++++
 src/operator/image/image_random.cc                 | 50 +++++++++++
 src/operator/random/multisample_op.h               |  2 +-
 src/operator/tensor/broadcast_reduce_op_index.cc   |  2 +-
 .../elemwise_binary_broadcast_op_extended.cc       |  2 +-
 .../tensor/elemwise_binary_broadcast_op_logic.cc   |  2 +-
 13 files changed, 261 insertions(+), 32 deletions(-)

diff --git a/python/mxnet/base.py b/python/mxnet/base.py
index fbdf15f..7bdfb5b 100644
--- a/python/mxnet/base.py
+++ b/python/mxnet/base.py
@@ -449,7 +449,7 @@ def _as_list(obj):
         return [obj]
 
 
-_OP_NAME_PREFIX_LIST = ['_contrib_', '_linalg_', '_sparse_']
+_OP_NAME_PREFIX_LIST = ['_contrib_', '_linalg_', '_sparse_', '_image_']
 
 
 def _get_op_name_prefix(op_name):
@@ -503,10 +503,11 @@ def _init_op_module(root_namespace, module_name, make_op_func):
         hdl = OpHandle()
         check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
         op_name_prefix = _get_op_name_prefix(name)
+        module_name_local = module_name
         if len(op_name_prefix) > 0:
             func_name = name[len(op_name_prefix):]
             cur_module = submodule_dict[op_name_prefix]
-            module_name = "%s.%s.%s" % (root_namespace, module_name, op_name_prefix[1:-1])
+            module_name_local = "%s.%s.%s" % (root_namespace, module_name, op_name_prefix[1:-1])
         elif name.startswith('_'):
             func_name = name
             cur_module = module_internal
@@ -515,10 +516,11 @@ def _init_op_module(root_namespace, module_name, make_op_func):
             cur_module = module_op
 
         function = make_op_func(hdl, name, func_name)
-        function.__module__ = module_name
+        function.__module__ = module_name_local
         setattr(cur_module, function.__name__, function)
         cur_module.__all__.append(function.__name__)
 
+
         if op_name_prefix == '_contrib_':
             hdl = OpHandle()
             check_call(_LIB.NNGetOpHandle(c_str(name), ctypes.byref(hdl)))
diff --git a/python/mxnet/ndarray/__init__.py b/python/mxnet/ndarray/__init__.py
index 586dc9e..86a3a20 100644
--- a/python/mxnet/ndarray/__init__.py
+++ b/python/mxnet/ndarray/__init__.py
@@ -17,7 +17,7 @@
 
 """NDArray API of MXNet."""
 
-from . import _internal, contrib, linalg, op, random, sparse, utils
+from . import _internal, contrib, linalg, op, random, sparse, utils, image
 # pylint: disable=wildcard-import, redefined-builtin
 try:
     from .gen_op import * # pylint: disable=unused-wildcard-import
@@ -31,4 +31,4 @@ from .utils import load, save, zeros, empty, array
 from .sparse import _ndarray_cls
 from .ndarray import _GRAD_REQ_MAP
 
-__all__ = op.__all__ + ndarray.__all__ + utils.__all__ + ['contrib', 'linalg', 'random', 'sparse']
+__all__ = op.__all__ + ndarray.__all__ + utils.__all__ + ['contrib', 'linalg', 'random', 'sparse', 'image']
diff --git a/python/mxnet/symbol/__init__.py b/python/mxnet/ndarray/image.py
similarity index 67%
copy from python/mxnet/symbol/__init__.py
copy to python/mxnet/ndarray/image.py
index a07025e..0afab24 100644
--- a/python/mxnet/symbol/__init__.py
+++ b/python/mxnet/ndarray/image.py
@@ -15,17 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""Symbol API of MXNet."""
-
-from . import _internal, contrib, linalg, op, random, sparse
-# pylint: disable=wildcard-import, redefined-builtin
+# coding: utf-8
+# pylint: disable=wildcard-import, unused-wildcard-import
+"""Image NDArray API of MXNet."""
 try:
-    from .gen_op import * # pylint: disable=unused-wildcard-import
+    from .gen_iamge import *
 except ImportError:
     pass
-from . import register
-from .op import *
-from .symbol import *
-# pylint: enable=wildcard-import
 
-__all__ = op.__all__ + symbol.__all__ + ['contrib', 'linalg', 'random', 'sparse']
+__all__ = []
diff --git a/python/mxnet/symbol/__init__.py b/python/mxnet/symbol/__init__.py
index a07025e..a10b64e 100644
--- a/python/mxnet/symbol/__init__.py
+++ b/python/mxnet/symbol/__init__.py
@@ -17,7 +17,7 @@
 
 """Symbol API of MXNet."""
 
-from . import _internal, contrib, linalg, op, random, sparse
+from . import _internal, contrib, linalg, op, random, sparse, image
 # pylint: disable=wildcard-import, redefined-builtin
 try:
     from .gen_op import * # pylint: disable=unused-wildcard-import
@@ -28,4 +28,4 @@ from .op import *
 from .symbol import *
 # pylint: enable=wildcard-import
 
-__all__ = op.__all__ + symbol.__all__ + ['contrib', 'linalg', 'random', 'sparse']
+__all__ = op.__all__ + symbol.__all__ + ['contrib', 'linalg', 'random', 'sparse', 'image']
diff --git a/python/mxnet/symbol/__init__.py b/python/mxnet/symbol/image.py
similarity index 67%
copy from python/mxnet/symbol/__init__.py
copy to python/mxnet/symbol/image.py
index a07025e..7624bcc 100644
--- a/python/mxnet/symbol/__init__.py
+++ b/python/mxnet/symbol/image.py
@@ -15,17 +15,12 @@
 # specific language governing permissions and limitations
 # under the License.
 
-"""Symbol API of MXNet."""
-
-from . import _internal, contrib, linalg, op, random, sparse
-# pylint: disable=wildcard-import, redefined-builtin
+# coding: utf-8
+# pylint: disable=wildcard-import, unused-wildcard-import
+"""Image Symbol API of MXNet."""
 try:
-    from .gen_op import * # pylint: disable=unused-wildcard-import
+    from .gen_iamge import *
 except ImportError:
     pass
-from . import register
-from .op import *
-from .symbol import *
-# pylint: enable=wildcard-import
 
-__all__ = op.__all__ + symbol.__all__ + ['contrib', 'linalg', 'random', 'sparse']
+__all__ = []
diff --git a/src/operator/batch_norm_v1-inl.h b/src/operator/batch_norm_v1-inl.h
index 329d66d..1e04845 100644
--- a/src/operator/batch_norm_v1-inl.h
+++ b/src/operator/batch_norm_v1-inl.h
@@ -19,7 +19,7 @@
 
 /*!
  * Copyright (c) 2015 by Contributors
- * \file batch_norm-inl_v1.h
+ * \file batch_norm_v1-inl.h
  * \brief
  * \author Bing Xu
 */
diff --git a/src/operator/image/image_common.h b/src/operator/image/image_common.h
new file mode 100644
index 0000000..7cf3f96
--- /dev/null
+++ b/src/operator/image/image_common.h
@@ -0,0 +1,88 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*/
+
+/*!
+* \file image_common.h
+* \brief
+* \author
+*/
+#ifndef MXNET_OPERATOR_IMAGE_IMAGE_COMMON_H_
+#define MXNET_OPERATOR_IMAGE_IMAGE_COMMON_H_
+
+#include <mxnet/base.h>
+
+namespace mxnet {
+namespace op {
+
+/**
+* @brief convert TBlob to cv::Mat
+* @param input @see TBlob
+* @param hight
+* @param weight
+* @param channel
+* @return
+*/
+static cv::Mat mat_convert(TBlob input, int hight, int weight, int channel) {
+  cv::Mat m;
+  switch (input.type_flag_) {
+    case mshadow::kFloat32: {
+      typedef float DType;
+      m = cv::Mat(hight, weight, CV_MAKETYPE(CV_32F, channel), input.dptr<DType>());
+    }
+    break;
+    case mshadow::kFloat64: {
+      typedef double DType;
+      m = cv::Mat(hight, weight, CV_MAKETYPE(CV_64F, channel), input.dptr<DType>());
+    }
+    break;
+    case mshadow::kFloat16: {
+      typedef mshadow::half::half_t DType;
+      LOG(FATAL) << "not support type enum " << input.type_flag_;
+    }
+    break;
+    case mshadow::kUint8: {
+      typedef uint8_t DType;
+      m = cv::Mat(hight, weight, CV_MAKETYPE(CV_8U, channel), input.dptr<DType>());
+    }
+    break;
+    case mshadow::kInt8: {
+      typedef int8_t DType;
+      m = cv::Mat(hight, weight, CV_MAKETYPE(CV_8S, channel), input.dptr<DType>());
+    }
+    break;
+    case mshadow::kInt32: {
+      typedef int32_t DType;
+      m = cv::Mat(hight, weight, CV_MAKETYPE(CV_32S, channel), input.dptr<DType>());
+    }
+    break;
+    case mshadow::kInt64: {
+      typedef int64_t DType;
+      LOG(FATAL) << "not support type enum " << input.type_flag_;
+    }
+    break;
+    default:
+      LOG(FATAL) << "Unknown type enum " << input.type_flag_;
+  }
+  return m;
+}
+} // namespace op
+} // namespace mxnet
+
+
+#endif // MXNET_OPERATOR_IMAGE_IMAGE_COMMON_H_
\ No newline at end of file
diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
new file mode 100644
index 0000000..027d587
--- /dev/null
+++ b/src/operator/image/image_random-inl.h
@@ -0,0 +1,99 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*/
+
+/*!
+* \file image_random-inl.h
+* \brief
+* \author
+*/
+#ifndef MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
+#define MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
+
+#include <vector>
+#include <mxnet/base.h>
+#include <opencv2/opencv.hpp>
+#include <opencv2/core/mat.hpp>
+#include "mxnet/op_attr_types.h"
+#include "image_common.h"
+
+
+namespace mxnet {
+namespace op {
+struct RandomBrightnessParam : public dmlc::Parameter<RandomBrightnessParam> {
+  float max_brightness;
+  DMLC_DECLARE_PARAMETER(RandomBrightnessParam) {
+    DMLC_DECLARE_FIELD(max_brightness)
+    .set_default(0.0)
+    .describe("Max Contrast.");
+  }
+};
+
+
+template<typename xpu>
+static void RandomBrightness(const nnvm::NodeAttrs &attrs,
+                             const OpContext &ctx,
+                             const std::vector<TBlob> &inputs,
+                             const std::vector<OpReqType> &req,
+                             const std::vector<TBlob> &outputs) {
+  auto input = inputs[0];
+  auto output = outputs[0];
+  int hight = input.shape_[0];
+  int weight = input.shape_[1];
+  int channel = input.shape_[2];
+
+  auto input_mat = mat_convert(input, hight, weight, channel);
+  auto output_mat = mat_convert(output, hight, weight, channel);
+  //input_mat.convertTo(output_mat, -1, 1/255.0, 0);
+  std::default_random_engine generator;
+  const RandomBrightnessParam &param = nnvm::get<RandomBrightnessParam>(attrs.parsed);
+  float alpha_b = 1.0 + std::uniform_real_distribution<float>(-param.max_brightness, param.max_brightness)(generator);
+  output_mat.convertTo(output_mat, -1, alpha_b, 0);
+}
+
+
+template<typename xpu>
+static void RandomContrast(const nnvm::NodeAttrs &attrs,
+  const OpContext &ctx,
+  const std::vector<TBlob> &inputs,
+  const std::vector<OpReqType> &req,
+  const std::vector<TBlob> &outputs) {
+  auto input = inputs[0];
+  auto output = outputs[0];
+  int hight = input.shape_[0];
+  int weight = input.shape_[1];
+  int channel = input.shape_[2];
+
+  auto input_mat = mat_convert(input, hight, weight, channel);
+  auto output_mat = mat_convert(output, hight, weight, channel);
+  //input_mat.convertTo(output_mat, -1, 1/255.0, 0);
+  std::default_random_engine generator;
+  const RandomBrightnessParam &param = nnvm::get<RandomBrightnessParam>(attrs.parsed);
+  float alpha_c = 1.0 + std::uniform_real_distribution<float>(-param.max_brightness, param.max_brightness)(generator);
+  cv::Mat temp_;
+  cv::cvtColor(input_mat, temp_,  CV_RGB2GRAY);
+  float gray_mean = cv::mean(temp_)[0];
+  input_mat.convertTo(output_mat, -1, alpha_c, (1 - alpha_c) * gray_mean);
+
+}
+
+
+} // namespace op
+} // namespace mxnet
+
+#endif  // MXNET_OPERATOR_IMAGE_IMAGE_RANDOM_INL_H_
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
new file mode 100644
index 0000000..3777e43
--- /dev/null
+++ b/src/operator/image/image_random.cc
@@ -0,0 +1,50 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*/
+
+/*!
+* \file image_random.cc
+* \brief
+* \author
+*/
+
+#include <mxnet/base.h>
+#include "./image_random-inl.h"
+#include "operator/operator_common.h"
+#include "operator/elemwise_op_common.h"
+
+
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(RandomBrightnessParam);
+NNVM_REGISTER_OP(_image_random_brightness)
+.describe(R"code()code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<RandomBrightnessParam>)
+.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCompute>("FCompute<cpu>", RandomBrightness<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
+.add_argument("data", "NDArray-or-Symbol", "The input.")
+.add_arguments(RandomBrightnessParam::__FIELDS__());
+
+}
+}
diff --git a/src/operator/random/multisample_op.h b/src/operator/random/multisample_op.h
index e93e453..360c100 100644
--- a/src/operator/random/multisample_op.h
+++ b/src/operator/random/multisample_op.h
@@ -19,7 +19,7 @@
 
 /*!
  * Copyright (c) 2017 by Contributors
- * \file sampling_op.h
+ * \file multisample_op.h
  * \brief Function definitions of operators for sampling from multiple distributions
  */
 #ifndef MXNET_OPERATOR_RANDOM_MULTISAMPLE_OP_H_
diff --git a/src/operator/tensor/broadcast_reduce_op_index.cc b/src/operator/tensor/broadcast_reduce_op_index.cc
index dc07e67..6fd90df 100644
--- a/src/operator/tensor/broadcast_reduce_op_index.cc
+++ b/src/operator/tensor/broadcast_reduce_op_index.cc
@@ -19,7 +19,7 @@
 
 /*!
  *  Copyright (c) 2016 by Contributors
- * \file broadcast_reduce_op.cc
+ * \file broadcast_reduce_op_index.cc
  * \brief CPU Implementation of broadcast and reduce functions.
  */
 #include "./broadcast_reduce_op.h"
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc b/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc
index fe7ad76..8fc3c48 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc
+++ b/src/operator/tensor/elemwise_binary_broadcast_op_extended.cc
@@ -19,7 +19,7 @@
 
 /*!
  *  Copyright (c) 2016 by Contributors
- * \file elemwise_binary_scalar_op.cc
+ * \file elemwise_binary_broadcast_op_extended.cc
  * \brief CPU Implementation of unary function.
  */
 #include "./elemwise_unary_op.h"
diff --git a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc
index 6d74f2d..31f34bb 100644
--- a/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc
+++ b/src/operator/tensor/elemwise_binary_broadcast_op_logic.cc
@@ -19,7 +19,7 @@
 
 /*!
  *  Copyright (c) 2016 by Contributors
- * \file elemwise_binary_scalar_op.cc
+ * \file elemwise_binary_broadcast_op_logic.cc
  * \brief CPU Implementation of unary function.
  */
 #include "./elemwise_unary_op.h"

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 02/20: [WIP]]Vision (#8649)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 992123d3dfa760df838d88401e860c39471e67cf
Author: Hu Shiwen <ya...@gmail.com>
AuthorDate: Tue Nov 14 21:11:53 2017 +0800

    [WIP]]Vision (#8649)
    
    * not use opencv
---
 src/operator/image/image_random-inl.h | 93 ++++++++++++++++++++++++-----------
 src/operator/image/image_random.cc    |  3 ++
 2 files changed, 68 insertions(+), 28 deletions(-)

diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index 027d587..9506607 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -29,7 +29,7 @@
 #include <mxnet/base.h>
 #include <opencv2/opencv.hpp>
 #include <opencv2/core/mat.hpp>
-#include "mxnet/op_attr_types.h"
+#include "../mxnet_op.h"
 #include "image_common.h"
 
 
@@ -44,6 +44,7 @@ struct RandomBrightnessParam : public dmlc::Parameter<RandomBrightnessParam> {
   }
 };
 
+enum ImageRandomResource { kRandom };
 
 template<typename xpu>
 static void RandomBrightness(const nnvm::NodeAttrs &attrs,
@@ -51,44 +52,80 @@ static void RandomBrightness(const nnvm::NodeAttrs &attrs,
                              const std::vector<TBlob> &inputs,
                              const std::vector<OpReqType> &req,
                              const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
   auto input = inputs[0];
   auto output = outputs[0];
-  int hight = input.shape_[0];
-  int weight = input.shape_[1];
-  int channel = input.shape_[2];
-
-  auto input_mat = mat_convert(input, hight, weight, channel);
-  auto output_mat = mat_convert(output, hight, weight, channel);
-  //input_mat.convertTo(output_mat, -1, 1/255.0, 0);
-  std::default_random_engine generator;
+  int channel = input.shape_[0];
+  int hight = input.shape_[1];
+  int weight = input.shape_[2];
+  Stream<xpu> *s = ctx.get_stream<xpu>();
+  Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
+
   const RandomBrightnessParam &param = nnvm::get<RandomBrightnessParam>(attrs.parsed);
-  float alpha_b = 1.0 + std::uniform_real_distribution<float>(-param.max_brightness, param.max_brightness)(generator);
-  output_mat.convertTo(output_mat, -1, alpha_b, 0);
+  float alpha_b = 1.0 + std::uniform_real_distribution<float>(-param.max_brightness, param.max_brightness)(prnd->GetRndEngine());
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    MXNET_ASSIGN_REQ_SWITCH(req[0], Req,{
+      mxnet_op::Kernel<mxnet_op::op_with_req<mshadow::op::mul, Req>, xpu>::Launch(
+        s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(), DType(alpha_b));
+    });
+  });
+
 }
 
 
+/*! \brief mul_add operator */
+struct mul_add {
+  /*! \brief map a, b, c to result using defined operation */
+  template<typename DType>
+  MSHADOW_XINLINE static DType Map(DType a, DType b, DType c) {
+    return a * b + c;
+  }
+};
+
+
 template<typename xpu>
 static void RandomContrast(const nnvm::NodeAttrs &attrs,
-  const OpContext &ctx,
-  const std::vector<TBlob> &inputs,
-  const std::vector<OpReqType> &req,
-  const std::vector<TBlob> &outputs) {
+                           const OpContext &ctx,
+                           const std::vector<TBlob> &inputs,
+                           const std::vector<OpReqType> &req,
+                           const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
   auto input = inputs[0];
   auto output = outputs[0];
-  int hight = input.shape_[0];
-  int weight = input.shape_[1];
-  int channel = input.shape_[2];
-
-  auto input_mat = mat_convert(input, hight, weight, channel);
-  auto output_mat = mat_convert(output, hight, weight, channel);
-  //input_mat.convertTo(output_mat, -1, 1/255.0, 0);
-  std::default_random_engine generator;
+  int channel = input.shape_[0];
+  int hight = input.shape_[1];
+  int weight = input.shape_[2];
+  Stream<xpu> *s = ctx.get_stream<xpu>();
+  Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
+
+
   const RandomBrightnessParam &param = nnvm::get<RandomBrightnessParam>(attrs.parsed);
-  float alpha_c = 1.0 + std::uniform_real_distribution<float>(-param.max_brightness, param.max_brightness)(generator);
-  cv::Mat temp_;
-  cv::cvtColor(input_mat, temp_,  CV_RGB2GRAY);
-  float gray_mean = cv::mean(temp_)[0];
-  input_mat.convertTo(output_mat, -1, alpha_c, (1 - alpha_c) * gray_mean);
+  float alpha_c = 1.0 + std::uniform_real_distribution<float>(-param.max_brightness, param.max_brightness)(prnd->GetRndEngine());
+
+  const float R2YF = 0.299f;
+  const float G2YF = 0.587f;
+  const float B2YF = 0.114f;
+  static const float coeffs0[] = { R2YF, G2YF, B2YF };
+
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    auto input_3d = input.FlatTo3D<xpu, DType>(s);
+    DType sum = (DType)0.0;
+    for (int c = 0; c < channel; ++c) {
+      for (int h = 0; h < hight; ++h) {
+        for (int w = 0; w < weight; ++w) {
+          sum += input_3d[c][h][w] * coeffs0[c];
+        }
+      }
+    }
+    float gray_mean = sum / (float)(hight * weight);
+    float beta = (1 - alpha_c) * gray_mean;
+
+    MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
+      mxnet_op::Kernel<mxnet_op::op_with_req<mul_add, Req>, xpu>::Launch(
+        s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(), DType(alpha_c), DType(beta));
+    });
+
+  });
 
 }
 
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index 3777e43..ade43d8 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -39,6 +39,9 @@ NNVM_REGISTER_OP(_image_random_brightness)
 .set_num_inputs(1)
 .set_num_outputs(1)
 .set_attr_parser(ParamParser<RandomBrightnessParam>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) { 
+  return std::vector<ResourceRequest>{ResourceRequest::kRandom}; 
+})
 .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<FCompute>("FCompute<cpu>", RandomBrightness<cpu>)

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 04/20: [WIP][vision]fix (#8657)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit a0cff26cae37971ec0e1d5c8ac3c2847eaf757af
Author: Hu Shiwen <ya...@gmail.com>
AuthorDate: Wed Nov 15 12:16:23 2017 +0800

    [WIP][vision]fix (#8657)
    
    * fix
    
    * lint
---
 src/operator/image/image_random-inl.h | 118 ++++++++++++++++++++++++----------
 src/operator/image/image_random.cc    |  32 +++++++++
 src/operator/mxnet_op.h               |   6 ++
 3 files changed, 122 insertions(+), 34 deletions(-)

diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index 1281a71..fa1d6dc 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -31,6 +31,7 @@
 #include <opencv2/core/mat.hpp>
 #include "../mxnet_op.h"
 #include "image_common.h"
+#include "operator/operator_common.h"
 
 
 namespace mxnet {
@@ -41,32 +42,32 @@ enum ImageRandomResource { kRandom };
 
 template<typename xpu>
 static void RandomFlip(const nnvm::NodeAttrs &attrs,
-  const OpContext &ctx,
-  const std::vector<TBlob> &inputs,
-  const std::vector<OpReqType> &req,
-  const std::vector<TBlob> &outputs) {
+                       const OpContext &ctx,
+                       const std::vector<TBlob> &inputs,
+                       const std::vector<OpReqType> &req,
+                       const std::vector<TBlob> &outputs) {
 }
 template<typename xpu>
 static void ToTensor(const nnvm::NodeAttrs &attrs,
-  const OpContext &ctx,
-  const std::vector<TBlob> &inputs,
-  const std::vector<OpReqType> &req,
-  const std::vector<TBlob> &outputs) {
+                     const OpContext &ctx,
+                     const std::vector<TBlob> &inputs,
+                     const std::vector<OpReqType> &req,
+                     const std::vector<TBlob> &outputs) {
 }
 template<typename xpu>
 static void Normalize(const nnvm::NodeAttrs &attrs,
-  const OpContext &ctx,
-  const std::vector<TBlob> &inputs,
-  const std::vector<OpReqType> &req,
-  const std::vector<TBlob> &outputs) {
+                      const OpContext &ctx,
+                      const std::vector<TBlob> &inputs,
+                      const std::vector<OpReqType> &req,
+                      const std::vector<TBlob> &outputs) {
 }
 
 struct RandomBrightnessParam : public dmlc::Parameter<RandomBrightnessParam> {
   float max_brightness;
   DMLC_DECLARE_PARAMETER(RandomBrightnessParam) {
     DMLC_DECLARE_FIELD(max_brightness)
-      .set_default(0.0)
-      .describe("Max Brightness.");
+    .set_default(0.0)
+    .describe("Max Brightness.");
   }
 };
 
@@ -88,7 +89,7 @@ static void RandomBrightness(const nnvm::NodeAttrs &attrs,
   const RandomBrightnessParam &param = nnvm::get<RandomBrightnessParam>(attrs.parsed);
   float alpha_b = 1.0 + std::uniform_real_distribution<float>(-param.max_brightness, param.max_brightness)(prnd->GetRndEngine());
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-    MXNET_ASSIGN_REQ_SWITCH(req[0], Req,{
+    MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
       mxnet_op::Kernel<mxnet_op::op_with_req<mshadow::op::mul, Req>, xpu>::Launch(
         s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(), DType(alpha_b));
     });
@@ -100,8 +101,8 @@ struct RandomContrastParam : public dmlc::Parameter<RandomContrastParam> {
   float max_contrast;
   DMLC_DECLARE_PARAMETER(RandomContrastParam) {
     DMLC_DECLARE_FIELD(max_contrast)
-      .set_default(0.0)
-      .describe("Max Contrast.");
+    .set_default(0.0)
+    .describe("Max Contrast.");
   }
 };
 
@@ -139,7 +140,7 @@ static void RandomContrast(const nnvm::NodeAttrs &attrs,
   static const float coeffs0[] = { R2YF, G2YF, B2YF };
 
   MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-    auto input_3d = input.FlatTo3D<xpu, DType>(s);
+    auto input_3d = input.get<xpu, 3, DType>(s);
     DType sum = (DType)0.0;
     for (int c = 0; c < channel; ++c) {
       for (int h = 0; h < hight; ++h) {
@@ -160,36 +161,85 @@ static void RandomContrast(const nnvm::NodeAttrs &attrs,
 
 }
 
+struct RandomSaturationParam : public dmlc::Parameter<RandomSaturationParam> {
+  float max_saturation;
+  DMLC_DECLARE_PARAMETER(RandomSaturationParam) {
+    DMLC_DECLARE_FIELD(max_saturation)
+    .set_default(0.0)
+    .describe("Max Saturation.");
+  }
+};
+
 template<typename xpu>
 static void RandomSaturation(const nnvm::NodeAttrs &attrs,
-  const OpContext &ctx,
-  const std::vector<TBlob> &inputs,
-  const std::vector<OpReqType> &req,
-  const std::vector<TBlob> &outputs) {
+                             const OpContext &ctx,
+                             const std::vector<TBlob> &inputs,
+                             const std::vector<OpReqType> &req,
+                             const std::vector<TBlob> &outputs) {
+  using namespace mshadow;
+  auto input = inputs[0];
+  auto output = outputs[0];
+  int channel = input.shape_[0];
+  int hight = input.shape_[1];
+  int weight = input.shape_[2];
+  Stream<xpu> *s = ctx.get_stream<xpu>();
+  Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
+  const RandomSaturationParam &param = nnvm::get<RandomSaturationParam>(attrs.parsed);
+  float alpha_s = 1.0 + std::uniform_real_distribution<float>(-param.max_saturation, param.max_saturation)(prnd->GetRndEngine());
+  float alpha_o = 1 - alpha_s;
+  const float R2YF = 0.299f;
+  const float G2YF = 0.587f;
+  const float B2YF = 0.114f;
+  static const float coeffs0[] = { R2YF, G2YF, B2YF };
+
+
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
+      auto input_3d =  input.get<xpu, 3, DType>(s);
+      auto output_3d = output.get<xpu, 3, DType>(s);
+      switch (channel) {
+        case 1:
+          Assign(output_3d, Req, input_3d)
+          break;
+        case 3:
+          for (int h = 0; h < hight; ++h) {
+            for (int w = 0; w < weight; ++w) {
+              float gray = input_3d[0][h][w] * R2YF + input_3d[1][h][w] * G2YF + input_3d[2][h][w] * B2YF;
+              Assign(output_3d[0][h][w], Req, DType(gray * alpha_s + input_3d[0][h][w] * alpha_o))
+            }
+          }
+          break;
+        default:
+          LOG(FATAL) << "not support channel" << channel;
+
+      }
+    });
+  });
+
 }
 
 template<typename xpu>
 static void RandomHue(const nnvm::NodeAttrs &attrs,
-  const OpContext &ctx,
-  const std::vector<TBlob> &inputs,
-  const std::vector<OpReqType> &req,
-  const std::vector<TBlob> &outputs) {
+                      const OpContext &ctx,
+                      const std::vector<TBlob> &inputs,
+                      const std::vector<OpReqType> &req,
+                      const std::vector<TBlob> &outputs) {
 }
 
 template<typename xpu>
 static void RandomColorJitter(const nnvm::NodeAttrs &attrs,
-  const OpContext &ctx,
-  const std::vector<TBlob> &inputs,
-  const std::vector<OpReqType> &req,
-  const std::vector<TBlob> &outputs) {
+                              const OpContext &ctx,
+                              const std::vector<TBlob> &inputs,
+                              const std::vector<OpReqType> &req,
+                              const std::vector<TBlob> &outputs) {
 }
 
 template<typename xpu>
 static void RandomLighting(const nnvm::NodeAttrs &attrs,
-  const OpContext &ctx,
-  const std::vector<TBlob> &inputs,
-  const std::vector<OpReqType> &req,
-  const std::vector<TBlob> &outputs) {
+                           const OpContext &ctx,
+                           const std::vector<TBlob> &inputs,
+                           const std::vector<OpReqType> &req,
+                           const std::vector<TBlob> &outputs) {
 }
 
 
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index ade43d8..32648bb 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -49,5 +49,37 @@ NNVM_REGISTER_OP(_image_random_brightness)
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomBrightnessParam::__FIELDS__());
 
+DMLC_REGISTER_PARAMETER(RandomContrastParam);
+NNVM_REGISTER_OP(_image_random_contrast)
+.describe(R"code()code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<RandomContrastParam>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
+  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
+})
+.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCompute>("FCompute<cpu>", RandomContrast<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
+.add_argument("data", "NDArray-or-Symbol", "The input.")
+.add_arguments(RandomContrastParam::__FIELDS__());
+
+DMLC_REGISTER_PARAMETER(RandomSaturationParam);
+NNVM_REGISTER_OP(_image_random_saturation)
+.describe(R"code()code" ADD_FILELINE)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr_parser(ParamParser<RandomSaturationParam>)
+.set_attr<FResourceRequest>("FResourceRequest", [](const NodeAttrs& attrs) {
+  return std::vector<ResourceRequest>{ResourceRequest::kRandom};
+})
+.set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
+.set_attr<FCompute>("FCompute<cpu>", RandomSaturation<cpu>)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
+.add_argument("data", "NDArray-or-Symbol", "The input.")
+.add_arguments(RandomSaturationParam::__FIELDS__());
+
 }
 }
diff --git a/src/operator/mxnet_op.h b/src/operator/mxnet_op.h
index 081e40a..43f3a55 100644
--- a/src/operator/mxnet_op.h
+++ b/src/operator/mxnet_op.h
@@ -366,6 +366,12 @@ struct op_with_req {
     KERNEL_ASSIGN(out[i], req, OP::Map(in[i], value));
   }
 
+  /*! \brief input is tensor and two scalar value */
+  template<typename DType>
+  MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in, const DType value_1, const DType value_2) {
+    KERNEL_ASSIGN(out[i], req, OP::Map(in[i], value_1, value_2));
+  }
+
   /*! \brief No inputs (ie fill to constant value) */
   template<typename DType>
   MSHADOW_XINLINE static void Map(int i, DType *out) {

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 18/20: add comments and sanity check (#8901)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit b9569ee7175c21faca6bf82a5d31a5b4a829c03f
Author: Eric Junyuan Xie <pi...@users.noreply.github.com>
AuthorDate: Thu Nov 30 11:37:24 2017 -0800

    add comments and sanity check (#8901)
---
 python/mxnet/gluon/data/dataset.py           |  56 ++++++-
 python/mxnet/gluon/data/vision/transforms.py | 229 +++++++++++++++++++++------
 tests/python/unittest/test_gluon_data.py     |  21 +++
 3 files changed, 251 insertions(+), 55 deletions(-)

diff --git a/python/mxnet/gluon/data/dataset.py b/python/mxnet/gluon/data/dataset.py
index f7ab395..4b97e43 100644
--- a/python/mxnet/gluon/data/dataset.py
+++ b/python/mxnet/gluon/data/dataset.py
@@ -41,12 +41,53 @@ class Dataset(object):
         raise NotImplementedError
 
     def transform(self, fn, lazy=True):
+        """Returns a new dataset with each sample transformed by the
+        transformer function `fn`.
+
+        Parameters
+        ----------
+        fn : callable
+            A transformer function that takes a sample as input and
+            returns the transformed sample.
+        lazy : bool, default True
+            If False, transforms all samples at once. Otherwise,
+            transforms each sample on demand. Note that if `fn`
+            is stochastic, you must set lazy to True or you will
+            get the same result on all epochs.
+
+        Returns
+        -------
+        Dataset
+            The transformed dataset.
+        """
         trans = _LazyTransformDataset(self, fn)
         if lazy:
             return trans
         return SimpleDataset([i for i in trans])
 
     def transform_first(self, fn, lazy=True):
+        """Returns a new dataset with the first element of each sample
+        transformed by the transformer function `fn`.
+
+        This is useful, for example, when you only want to transform data
+        while keeping label as is.
+
+        Parameters
+        ----------
+        fn : callable
+            A transformer function that takes the first elemtn of a sample
+            as input and returns the transformed element.
+        lazy : bool, default True
+            If False, transforms all samples at once. Otherwise,
+            transforms each sample on demand. Note that if `fn`
+            is stochastic, you must set lazy to True or you will
+            get the same result on all epochs.
+
+        Returns
+        -------
+        Dataset
+            The transformed dataset.
+        """
         def base_fn(x, *args):
             if args:
                 return (fn(x),) + args
@@ -55,6 +96,13 @@ class Dataset(object):
 
 
 class SimpleDataset(Dataset):
+    """Simple Dataset wrapper for lists and arrays.
+
+    Parameters
+    ----------
+    data : dataset-like object
+        Any object that implements `len()` and `[]`.
+    """
     def __init__(self, data):
         self._data = data
 
@@ -66,6 +114,7 @@ class SimpleDataset(Dataset):
 
 
 class _LazyTransformDataset(Dataset):
+    """Lazily transformed dataset."""
     def __init__(self, data, fn):
         self._data = data
         self._fn = fn
@@ -81,13 +130,14 @@ class _LazyTransformDataset(Dataset):
 
 
 class ArrayDataset(Dataset):
-    """A dataset of multiple arrays.
+    """A dataset that combines multiple dataset-like objects, e.g.
+    Datasets, lists, arrays, etc.
 
-    The i-th sample is `(x1[i], x2[i], ...)`.
+    The i-th sample is defined as `(x1[i], x2[i], ...)`.
 
     Parameters
     ----------
-    *args : one or more arrays
+    *args : one or more dataset-like objects
         The data arrays.
     """
     def __init__(self, *args):
diff --git a/python/mxnet/gluon/data/vision/transforms.py b/python/mxnet/gluon/data/vision/transforms.py
index 931d644..8daf88e 100644
--- a/python/mxnet/gluon/data/vision/transforms.py
+++ b/python/mxnet/gluon/data/vision/transforms.py
@@ -20,11 +20,18 @@
 from .. import dataset
 from ...block import Block, HybridBlock
 from ...nn import Sequential, HybridSequential
-from .... import ndarray, initializer
-from ....base import _Null
+from .... import ndarray, initializer, image
+from ....base import _Null, numeric_types
 
 
 class Compose(Sequential):
+    """Sequentially composes multiple transforms.
+
+    Parameters
+    ----------
+    transforms : list of transform Blocks.
+        The list of transforms to be composed.
+    """
     def __init__(self, transforms):
         super(Compose, self).__init__()
         transforms.append(None)
@@ -34,18 +41,25 @@ class Compose(Sequential):
                 hybrid.append(i)
                 continue
             elif len(hybrid) == 1:
-                self.register_child(hybrid[0])
+                self.add(hybrid[0])
             elif len(hybrid) > 1:
                 hblock = HybridSequential()
                 for j in hybrid:
                     hblock.add(j)
-                self.register_child(hblock)
+                self.add(hblock)
             if i is not None:
-                self.register_child(i)
+                self.add(i)
         self.hybridize()
 
 
 class Cast(HybridBlock):
+    """Cast input to a specific data type
+
+    Parameters
+    ----------
+    dtype : str, default 'float32'
+        The target data type, in string or `numpy.dtype`.
+    """
     def __init__(self, dtype='float32'):
         super(Cast, self).__init__()
         self._dtype = dtype
@@ -55,6 +69,12 @@ class Cast(HybridBlock):
 
 
 class ToTensor(HybridBlock):
+    """Converts an image NDArray to a tensor NDArray.
+
+    Converts an image NDArray of shape (H x W x C) in the range
+    [0, 255] to a float32 tensor NDArray of shape (C x H x W) in
+    the range [0, 1).
+    """
     def __init__(self):
         super(ToTensor, self).__init__()
 
@@ -63,6 +83,23 @@ class ToTensor(HybridBlock):
 
 
 class Normalize(HybridBlock):
+    """Normalize an tensor of shape (C x H x W) with mean and
+    standard deviation.
+
+    Given mean `(m1, ..., mn)` and std `(s1, ..., sn)` for `n` channels,
+    this transform normalizes each channel of the input tensor with::
+
+        output[i] = (input[i] - mi) / si
+
+    If mean or std is scalar, the same value will be applied to all channels.
+
+    Parameters
+    ----------
+    mean : float or tuple of floats
+        The mean values.
+    std : float or tuple of floats
+        The standard deviation values.
+    """
     def __init__(self, mean, std):
         super(Normalize, self).__init__()
         self._mean = mean
@@ -72,101 +109,189 @@ class Normalize(HybridBlock):
         return F.image.normalize(x, self._mean, self._std)
 
 
-class RandomResizedCrop(HybridBlock):
-    def __init__(self, size, area=(0.08, 1.0), ratio=(3.0/4.0, 4.0/3.0),
+class RandomResizedCrop(Block):
+    """Crop the input image with random scale and aspect ratio.
+
+    Makes a crop of the original image with random size (default: 0.08
+    to 1.0 of the original image size) and random aspect ratio (default:
+    3/4 to 4/3), then resize it to the specified size.
+
+    Parameters
+    ----------
+    size : int or tuple of (W, H)
+        Size of the final output.
+    scale : tuple of two floats
+        If scale is `(min_area, max_area)`, the cropped image's area will
+        range from min_area to max_area of the original image's area
+    ratio : tuple of two floats
+        Range of aspect ratio of the cropped image before resizing.
+    interpolation : int
+        Interpolation method for resizing. By default uses bilinear
+        interpolation. See OpenCV's resize function for available choices.
+    """
+    def __init__(self, size, scale=(0.08, 1.0), ratio=(3.0/4.0, 4.0/3.0),
                  interpolation=2):
         super(RandomResizedCrop, self).__init__()
-        self._args = (size, area, ratio, interpolation)
-
-    def hybrid_forward(self, F, x):
-        return F.image.random_resized_crop(x, *self._args)
-
-
-class CenterCrop(HybridBlock):
-    def __init__(self, size):
+        if isinstance(size, numeric_types):
+            size = (size, size)
+        self._args = (size, scale[0], ratio, interpolation)
+
+    def forward(self, x):
+        return image.random_size_crop(x, *self._args)[0]
+
+
+class CenterCrop(Block):
+    """Crops the image `src` to the given `size` by trimming on all four
+    sides and preserving the center of the image. Upsamples if `src` is
+    smaller than `size`.
+
+    Parameters
+    ----------
+    size : int or tuple of (W, H)
+        Size of output image.
+    interpolation : int
+        Interpolation method for resizing. By default uses bilinear
+        interpolation. See OpenCV's resize function for available choices.
+    """
+    def __init__(self, size, interpolation=2):
         super(CenterCrop, self).__init__()
-        self._size = size
+        if isinstance(size, numeric_types):
+            size = (size, size)
+        self._args = (size, interpolation)
+
+    def forward(self, x):
+        return image.center_crop(x, *self._args)[0]
 
-    def hybrid_forward(self, F, x):
-        return F.image.center_crop(x, size)
 
+class Resize(Block):
+    """Resize an image to the given size.
 
-class Resize(HybridBlock):
+    Parameters
+    ----------
+    size : int or tuple of (W, H)
+        Size of output image.
+    interpolation : int
+        Interpolation method for resizing. By default uses bilinear
+        interpolation. See OpenCV's resize function for available choices.
+    """
     def __init__(self, size, interpolation=2):
         super(Resize, self).__init__()
-        self._args = (size, interpolation)
+        if isinstance(size, numeric_types):
+            size = (size, size)
+        self._args = tuple(size) + (interpolation,)
+
+    def forward(self, x):
+        return image.imresize(x, *self._args)
+
+
+class RandomHorizontalFlip(HybridBlock):
+    """Randomly flip the input image horizontally with a probability
+    of 0.5.
+    """
+    def __init__(self):
+        super(RandomHorizontalFlip, self).__init__()
 
     def hybrid_forward(self, F, x):
-        return F.image.resize(x, *self._args)
+        return F.image.random_horizontal_flip(x)
 
 
-class RandomFlip(HybridBlock):
-    def __init__(self, axis=1):
-        super(RandomFlip, self).__init__()
-        self._axis = axis
+class RandomVerticalFlip(HybridBlock):
+    """Randomly flip the input image vertically with a probability
+    of 0.5.
+    """
+    def __init__(self):
+        super(RandomVerticalFlip, self).__init__()
 
     def hybrid_forward(self, F, x):
-        return F.image.random_flip(x, self._axis)
+        return F.image.random_vertical_flip(x)
 
 
 class RandomBrightness(HybridBlock):
-    def __init__(self, max_brightness):
+    """Randomly jitters image brightness with a factor
+    chosen from `[max(0, 1 - brightness), 1 + brightness]`.
+    """
+    def __init__(self, brightness):
         super(RandomBrightness, self).__init__()
-        self._max_brightness = max_brightness
+        self._args = (max(0, 1-brightness), 1+brightness)
 
     def hybrid_forward(self, F, x):
-        return F.image.random_brightness(x, self._max_brightness)
+        return F.image.random_brightness(x, *self._args)
 
 
 class RandomContrast(HybridBlock):
-    def __init__(self, max_contrast):
+    """Randomly jitters image contrast with a factor
+    chosen from `[max(0, 1 - contrast), 1 + contrast]`.
+    """
+    def __init__(self, contrast):
         super(RandomContrast, self).__init__()
-        self._max_contrast = max_contrast
+        self._args = (max(0, 1-contrast), 1+contrast)
 
     def hybrid_forward(self, F, x):
-        return F.image.random_contrast(x, self._max_contrast)
+        return F.image.random_contrast(x, *self._args)
 
 
 class RandomSaturation(HybridBlock):
-    def __init__(self, max_saturation):
+    """Randomly jitters image saturation with a factor
+    chosen from `[max(0, 1 - saturation), 1 + saturation]`.
+    """
+    def __init__(self, saturation):
         super(RandomSaturation, self).__init__()
-        self._max_saturation = max_saturation
+        self._args = (max(0, 1-saturation), 1+saturation)
 
     def hybrid_forward(self, F, x):
-        return F.image.random_saturation(x, self._max_saturation)
+        return F.image.random_saturation(x, *self._args)
 
 
 class RandomHue(HybridBlock):
-    def __init__(self, max_hue):
+    """Randomly jitters image hue with a factor
+    chosen from `[max(0, 1 - hue), 1 + hue]`.
+    """
+    def __init__(self, hue):
         super(RandomHue, self).__init__()
-        self._max_hue = max_hue
+        self._args = (max(0, 1-hue), 1+hue)
 
     def hybrid_forward(self, F, x):
-        return F.image.random_hue(x, self._max_hue)
+        return F.image.random_hue(x, *self._args)
 
 
 class RandomColorJitter(HybridBlock):
-    def __init__(self, max_brightness=0, max_contrast=0, max_saturation=0, max_hue=0):
+    """Randomly jitters the brightness, contrast, saturation, and hue
+    of an image.
+
+    Parameters
+    ----------
+    brightness : float
+        How much to jitter brightness. brightness factor is randomly
+        chosen from `[max(0, 1 - brightness), 1 + brightness]`.
+    contrast : float
+        How much to jitter contrast. contrast factor is randomly
+        chosen from `[max(0, 1 - contrast), 1 + contrast]`.
+    saturation : float
+        How much to jitter saturation. saturation factor is randomly
+        chosen from `[max(0, 1 - saturation), 1 + saturation]`.
+    hue : float
+        How much to jitter hue. hue factor is randomly
+        chosen from `[max(0, 1 - hue), 1 + hue]`.
+    """
+    def __init__(self, brightness=0, contrast=0, saturation=0, hue=0):
         super(RandomColorJitter, self).__init__()
-        self._args = (max_brightness, max_contrast, max_saturation, max_hue)
+        self._args = (brightness, contrast, saturation, hue)
 
     def hybrid_forward(self, F, x):
         return F.image.random_color_jitter(x, *self._args)
 
 
-class AdjustLighting(HybridBlock):
-    def __init__(self, alpha_rgb=_Null, eigval=_Null, eigvec=_Null):
-        super(AdjustLighting, self).__init__()
-        self._args = (alpha_rgb, eigval, eigvec)
-
-    def hybrid_forward(self, F, x):
-        return F.image.adjust_lighting(x, *self._args)
-
-
 class RandomLighting(HybridBlock):
-    def __init__(self, alpha_std=_Null, eigval=_Null, eigvec=_Null):
+    """Add AlexNet-style PCA-based noise to an image.
+
+    Parameters
+    ----------
+    alpha : float
+        Intensity of the image.
+    """
+    def __init__(self, alpha):
         super(RandomLighting, self).__init__()
-        self._args = (alpha_std, eigval, eigvec)
+        self._alpha = alpha
 
     def hybrid_forward(self, F, x):
-        return F.image.random_lighting(x, *self._args)
\ No newline at end of file
+        return F.image.random_lighting(x, self._alpha)
diff --git a/tests/python/unittest/test_gluon_data.py b/tests/python/unittest/test_gluon_data.py
index 63c5d28..c72ef7c 100644
--- a/tests/python/unittest/test_gluon_data.py
+++ b/tests/python/unittest/test_gluon_data.py
@@ -107,6 +107,27 @@ def test_multi_worker():
         assert (batch.asnumpy() == i).all()
 
 
+def test_transformer():
+    from mxnet.gluon.data.vision import transforms
+
+    transform = transforms.Compose([
+		transforms.Resize(300),
+		transforms.CenterCrop(256),
+		transforms.RandomResizedCrop(224),
+		transforms.RandomHorizontalFlip(),
+		transforms.RandomColorJitter(0.1, 0.1, 0.1, 0.1),
+		transforms.RandomBrightness(0.1),
+		transforms.RandomContrast(0.1),
+		transforms.RandomSaturation(0.1),
+		transforms.RandomHue(0.1),
+		transforms.RandomLighting(0.1),
+		transforms.ToTensor(),
+		transforms.Normalize([0, 0, 0], [1, 1, 1])])
+
+    transform(mx.nd.ones((245, 480, 3), dtype='uint8')).wait_to_read()
+
+
 if __name__ == '__main__':
+    test_transformer()
     import nose
     nose.runmodule()

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 03/20: add stub (#8650)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 19973190944ce181f25c87f17dddc147b8c69162
Author: Hu Shiwen <ya...@gmail.com>
AuthorDate: Tue Nov 14 21:28:34 2017 +0800

    add stub (#8650)
    
    * add stub
---
 src/operator/image/image_random-inl.h | 79 +++++++++++++++++++++++++++++++----
 1 file changed, 72 insertions(+), 7 deletions(-)

diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index 9506607..1281a71 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -35,17 +35,41 @@
 
 namespace mxnet {
 namespace op {
+
+
+enum ImageRandomResource { kRandom };
+
+template<typename xpu>
+static void RandomFlip(const nnvm::NodeAttrs &attrs,
+  const OpContext &ctx,
+  const std::vector<TBlob> &inputs,
+  const std::vector<OpReqType> &req,
+  const std::vector<TBlob> &outputs) {
+}
+template<typename xpu>
+static void ToTensor(const nnvm::NodeAttrs &attrs,
+  const OpContext &ctx,
+  const std::vector<TBlob> &inputs,
+  const std::vector<OpReqType> &req,
+  const std::vector<TBlob> &outputs) {
+}
+template<typename xpu>
+static void Normalize(const nnvm::NodeAttrs &attrs,
+  const OpContext &ctx,
+  const std::vector<TBlob> &inputs,
+  const std::vector<OpReqType> &req,
+  const std::vector<TBlob> &outputs) {
+}
+
 struct RandomBrightnessParam : public dmlc::Parameter<RandomBrightnessParam> {
   float max_brightness;
   DMLC_DECLARE_PARAMETER(RandomBrightnessParam) {
     DMLC_DECLARE_FIELD(max_brightness)
-    .set_default(0.0)
-    .describe("Max Contrast.");
+      .set_default(0.0)
+      .describe("Max Brightness.");
   }
 };
 
-enum ImageRandomResource { kRandom };
-
 template<typename xpu>
 static void RandomBrightness(const nnvm::NodeAttrs &attrs,
                              const OpContext &ctx,
@@ -72,6 +96,14 @@ static void RandomBrightness(const nnvm::NodeAttrs &attrs,
 
 }
 
+struct RandomContrastParam : public dmlc::Parameter<RandomContrastParam> {
+  float max_contrast;
+  DMLC_DECLARE_PARAMETER(RandomContrastParam) {
+    DMLC_DECLARE_FIELD(max_contrast)
+      .set_default(0.0)
+      .describe("Max Contrast.");
+  }
+};
 
 /*! \brief mul_add operator */
 struct mul_add {
@@ -82,7 +114,6 @@ struct mul_add {
   }
 };
 
-
 template<typename xpu>
 static void RandomContrast(const nnvm::NodeAttrs &attrs,
                            const OpContext &ctx,
@@ -99,8 +130,8 @@ static void RandomContrast(const nnvm::NodeAttrs &attrs,
   Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
 
 
-  const RandomBrightnessParam &param = nnvm::get<RandomBrightnessParam>(attrs.parsed);
-  float alpha_c = 1.0 + std::uniform_real_distribution<float>(-param.max_brightness, param.max_brightness)(prnd->GetRndEngine());
+  const RandomContrastParam &param = nnvm::get<RandomContrastParam>(attrs.parsed);
+  float alpha_c = 1.0 + std::uniform_real_distribution<float>(-param.max_contrast, param.max_contrast)(prnd->GetRndEngine());
 
   const float R2YF = 0.299f;
   const float G2YF = 0.587f;
@@ -129,6 +160,40 @@ static void RandomContrast(const nnvm::NodeAttrs &attrs,
 
 }
 
+template<typename xpu>
+static void RandomSaturation(const nnvm::NodeAttrs &attrs,
+  const OpContext &ctx,
+  const std::vector<TBlob> &inputs,
+  const std::vector<OpReqType> &req,
+  const std::vector<TBlob> &outputs) {
+}
+
+template<typename xpu>
+static void RandomHue(const nnvm::NodeAttrs &attrs,
+  const OpContext &ctx,
+  const std::vector<TBlob> &inputs,
+  const std::vector<OpReqType> &req,
+  const std::vector<TBlob> &outputs) {
+}
+
+template<typename xpu>
+static void RandomColorJitter(const nnvm::NodeAttrs &attrs,
+  const OpContext &ctx,
+  const std::vector<TBlob> &inputs,
+  const std::vector<OpReqType> &req,
+  const std::vector<TBlob> &outputs) {
+}
+
+template<typename xpu>
+static void RandomLighting(const nnvm::NodeAttrs &attrs,
+  const OpContext &ctx,
+  const std::vector<TBlob> &inputs,
+  const std::vector<OpReqType> &req,
+  const std::vector<TBlob> &outputs) {
+}
+
+
+
 
 } // namespace op
 } // namespace mxnet

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.

[incubator-mxnet] 12/20: Refactor image operators (#8761)

Posted by jx...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git

commit 29f6055579431062c8f5da6acc59a4dccab5a00d
Author: Eric Junyuan Xie <pi...@users.noreply.github.com>
AuthorDate: Tue Nov 21 17:43:21 2017 -0800

    Refactor image operators (#8761)
    
    * fix
    
    * fix
    
    * fix
    
    * fix
    
    * refactor
    
    * fix
---
 src/operator/image/image_aug_op.h     |  70 --------
 src/operator/image/image_common.h     |  89 ----------
 src/operator/image/image_random-inl.h | 314 ++++++++++++----------------------
 src/operator/image/image_random.cc    |  42 ++---
 4 files changed, 134 insertions(+), 381 deletions(-)

diff --git a/src/operator/image/image_aug_op.h b/src/operator/image/image_aug_op.h
deleted file mode 100644
index 40315ec..0000000
--- a/src/operator/image/image_aug_op.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-#ifndef MXNET_OPERATOR_IMAGE_IMAGE_AUG_OP_H_
-#define MXNET_OPERATOR_IMAGE_IMAGE_AUG_OP_H_
-
-#include <mxnet/operator_util.h>
-#include <vector>
-#include <utility>
-#include <algorithm>
-#include "../mshadow_op.h"
-#include "../elemwise_op_common.h"
-#include "../mxnet_op.h"
-
-namespace mxnet {
-namespace op {
-
-struct NormalizeParam : public dmlc::Parameter<NormalizeParam> {
-  nnvm::Tuple<float> mean, std;
-  DMLC_DECLARE_PARAMETER(NormalizeParam) {
-    DMLC_DECLARE_FIELD(mean).set_default(nnvm::Tuple<float>({0.f}))
-      .describe("");
-    DMLC_DECLARE_FIELD(std).set_default(nnvm::Tuple<float>({1.f}))
-      .describe("");
-  }
-};
-
-
-void NormalizeCompute(const nnvm::NodeAttrs& attrs,
-                      const OpContext& ctx,
-                      const std::vector<NDArray>& inputs,
-                      const std::vector<OpReqType>& req,
-                      const std::vector<NDArray>& outputs) {
-  using namespace mxnet_op;
-  const auto& params = dmlc::get<NormalizeParam>(attrs.parsed);
-  CHECK_NE(req[0], kAddTo);
-  MSHADOW_TYPE_SWITCH(inputs[0].type_flag_, DType, {
-    auto num_channel = inputs[0].shape_[0];
-    auto size = inputs[0].Size(1, inputs[0].ndim());
-    nnvm::Tuple<DType> mean(params.mean.begin(), params.mean.end());
-    nnvm::Tuple<DType> std(params.std.begin(), params.std.end());
-    DType* src = inputs[0].dptr<DType>();
-    DType* dst = outputs[0].dptr<DType>();
-    for (int i = 0; i < num_channel; ++i) {
-      for (int j = 0; j < size; ++j, ++out, ++src) {
-        *out = (*src - mean[i]) / std[i];
-      }
-    }
-  });
-}
-
-}  // namespace op
-}  // namespace mxnet
-#endif  // MXNET_OPERATOR_IMAGE_IMAGE_AUG_OP_H_
diff --git a/src/operator/image/image_common.h b/src/operator/image/image_common.h
deleted file mode 100644
index 3b6b8e3..0000000
--- a/src/operator/image/image_common.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*   http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing,
-* software distributed under the License is distributed on an
-* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-* KIND, either express or implied.  See the License for the
-* specific language governing permissions and limitations
-* under the License.
-*/
-
-/*!
-* \file image_common.h
-* \brief
-* \author
-*/
-#ifndef MXNET_OPERATOR_IMAGE_IMAGE_COMMON_H_
-#define MXNET_OPERATOR_IMAGE_IMAGE_COMMON_H_
-
-#include <mxnet/base.h>
-
-namespace mxnet {
-namespace op {
-
-/**
-* @brief convert TBlob to cv::Mat
-* @param input @see TBlob
-* @param hight
-* @param weight
-* @param channel
-* @return
-*/
-static cv::Mat mat_convert(TBlob input, int hight, int weight, int channel) {
-  cv::Mat m;
-  switch (input.type_flag_) {
-    case mshadow::kFloat32: {
-      typedef float DType;
-      m = cv::Mat(hight, weight, CV_MAKETYPE(CV_32F, channel), input.dptr<DType>());
-    }
-    break;
-    case mshadow::kFloat64: {
-      typedef double DType;
-      m = cv::Mat(hight, weight, CV_MAKETYPE(CV_64F, channel), input.dptr<DType>());
-    }
-    break;
-    case mshadow::kFloat16: {
-      typedef mshadow::half::half_t DType;
-      LOG(FATAL) << "not support type enum " << input.type_flag_;
-    }
-    break;
-    case mshadow::kUint8: {
-      typedef uint8_t DType;
-      m = cv::Mat(hight, weight, CV_MAKETYPE(CV_8U, channel), input.dptr<DType>());
-    }
-    break;
-    case mshadow::kInt8: {
-      typedef int8_t DType;
-      m = cv::Mat(hight, weight, CV_MAKETYPE(CV_8S, channel), input.dptr<DType>());
-    }
-    break;
-    case mshadow::kInt32: {
-      typedef int32_t DType;
-      m = cv::Mat(hight, weight, CV_MAKETYPE(CV_32S, channel), input.dptr<DType>());
-    }
-    break;
-    case mshadow::kInt64: {
-      typedef int64_t DType;
-      LOG(FATAL) << "not support type enum " << input.type_flag_;
-    }
-    break;
-    default:
-      LOG(FATAL) << "Unknown type enum " << input.type_flag_;
-  }
-  return m;
-}
-}  // namespace op
-}  // namespace mxnet
-
-
-#endif  // MXNET_OPERATOR_IMAGE_IMAGE_COMMON_H_
-
diff --git a/src/operator/image/image_random-inl.h b/src/operator/image/image_random-inl.h
index 6f9cdc0..f823c8c 100644
--- a/src/operator/image/image_random-inl.h
+++ b/src/operator/image/image_random-inl.h
@@ -30,16 +30,11 @@
 #include <opencv2/opencv.hpp>
 #include <opencv2/core/mat.hpp>
 #include "../mxnet_op.h"
-#include "image_common.h"
-#include "../../operator/operator_common.h"
+#include "../operator_common.h"
 
 namespace mxnet {
 namespace op {
 
-
-enum ImageRandomResource { kRandom };
-
-template<typename xpu>
 static void RandomFlip(const nnvm::NodeAttrs &attrs,
                        const OpContext &ctx,
                        const std::vector<TBlob> &inputs,
@@ -73,37 +68,25 @@ inline bool ToTensorShape(const nnvm::NodeAttrs& attrs,
   return true;
 }
 
-template<typename xpu>
 static void ToTensor(const nnvm::NodeAttrs &attrs,
                      const OpContext &ctx,
                      const std::vector<TBlob> &inputs,
                      const std::vector<OpReqType> &req,
                      const std::vector<TBlob> &outputs) {
-  auto input = inputs[0];
-  auto output = outputs[0];
-
-  int height = input.shape_[0];
-  int weight = input.shape_[1];
-  int channel = input.shape_[2];
-
-  typedef float   DstDType;
-  typedef uint8_t SrcDType;
-
   CHECK_EQ(req[0], kWriteTo)
     << "`to_tensor` does not support inplace";
 
-  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
-  MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
-    auto input_3d =  input.get<xpu, 3, SrcDType>(s);
-    auto output_3d = output.get<xpu, 3, DstDType>(s);
-    for (int h = 0; h < height; ++h) {
-      for (int w = 0; w < weight; ++w) {
-        for (int c = 0; c < channel; ++c) {
-          Assign(output_3d[c][h][w], Req, DstDType(input_3d[h][w][c] / 255.0));
-        }
-      }
+  int length = inputs[0].shape_[0] * inputs[0].shape_[1];
+  int channel = inputs[0].shape_[2];
+
+  float* output = outputs[0].dptr<float>();
+  uint8_t* input = inputs[0].dptr<uint8_t>();
+
+  for (int l = 0; l < length; ++l) {
+    for (int c = 0; c < channel; ++c) {
+      output[c*length + l] = static_cast<float>(input[l*channel + c]) / 255.0f;
     }
-  });
+  }
 }
 
 struct NormalizeParam : public dmlc::Parameter<NormalizeParam> {
@@ -117,93 +100,47 @@ struct NormalizeParam : public dmlc::Parameter<NormalizeParam> {
   }
 };
 
-struct normalize {
-  template<typename DType>
-  MSHADOW_XINLINE static void Map(int i, DType *out, const DType *in,
-                                  const OpReqType req,
-                                  const int nchannel, const int size,
-                                  const float *mean, const float *std) {
-    int c = 0;
-    switch (nchannel) {
-      case 1:
-        break;
-      case 3:
-        if (i < size) {
-          c = 0;
-        } else if (i < (size << 1)) {
-          c = 1;
-        } else {
-          c = 2;
-        }
-        break;
-      default:
-        LOG(FATAL) << "not support channel" << nchannel;
-    }
-    float m = (mean ? mean[c] : 0);
-    KERNEL_ASSIGN(out[i], req, static_cast<DType>((in[i] - m) / std[c]));
-  }
-};
 
-static void NormalizeCheckParam(const nnvm::Tuple<float> &mean,
-                                const nnvm::Tuple<float> &std,
-                                const int nchannel) {
-  CHECK(mean.ndim() == 1 || mean.ndim() == 3)
-    << "Mean must be in dimension 1 or 3.";
-  CHECK(std.ndim() == 1 || std.ndim() == 3)
-    << "Standard deviations must be in dimension 1 or 3.";
-  CHECK(nchannel == 1 || nchannel == 3) << "Image channel must be 1 or 3.";
-  CHECK_EQ(mean.ndim(), nchannel)
-    << "Mean dimension does not agree with image channel.";
-  CHECK_EQ(std.ndim(), nchannel)
-    << "Standard deviations dimension does not agree with image channel.";
-  for (uint32_t c = 0; c < std.ndim(); ++c) {
-    CHECK(std[c] > 0) << "Invalid standard deviation " << std[c];
-  }
+inline bool NormalizeShape(const nnvm::NodeAttrs& attrs,
+                          std::vector<TShape> *in_attrs,
+                          std::vector<TShape> *out_attrs) {
+  const NormalizeParam &param = nnvm::get<NormalizeParam>(attrs.parsed);
+  const auto& dshape = (*in_attrs)[0];
+  if (!dshape.ndim()) return false;
+  CHECK_EQ(dshape.ndim(), 3)
+      << "Input must have 3 dimensions";
+
+  auto nchannels = dshape[0];
+  CHECK(param.mean.ndim() == 1 || param.mean.ndim() == nchannels)
+      << "mean must have either 1 or " << nchannels << " elements";
+  CHECK(param.std.ndim() == 1 || param.std.ndim() == nchannels)
+      << "std must have either 1 or " << nchannels << " elements";
+
+  SHAPE_ASSIGN_CHECK(*out_attrs, 0, dshape);
 }
 
-template<typename xpu>
+
 static void Normalize(const nnvm::NodeAttrs &attrs,
                       const OpContext &ctx,
                       const std::vector<TBlob> &inputs,
                       const std::vector<OpReqType> &req,
                       const std::vector<TBlob> &outputs) {
   const NormalizeParam &param = nnvm::get<NormalizeParam>(attrs.parsed);
-  auto mean = param.mean;
-  auto std = param.std;
-
-  int nchannel = inputs[0].shape_[0];
-  NormalizeCheckParam(mean, std, nchannel);
-
-  int size = inputs[0].Size() / nchannel;
-  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
-  MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
-    MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-      mxnet_op::Kernel<normalize, xpu>::Launch(
-        s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(),
-        Req, nchannel, size, mean.begin(), std.begin());
-    });
-  });
-}
 
-template<typename xpu>
-static void NormalizeBackward(const nnvm::NodeAttrs &attrs,
-                              const OpContext &ctx,
-                              const std::vector<TBlob> &inputs,
-                              const std::vector<OpReqType> &req,
-                              const std::vector<TBlob> &outputs) {
-  const NormalizeParam &param = nnvm::get<NormalizeParam>(attrs.parsed);
-  int nchannel = inputs[0].shape_[0];
-
-  NormalizeCheckParam(param.mean, param.std, nchannel);
-
-  int size = inputs[0].Size() / nchannel;
-  mshadow::Stream<xpu> *s = ctx.get_stream<xpu>();
-  MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
-    MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-      mxnet_op::Kernel<normalize, xpu>::Launch(
-        s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(),
-        Req, nchannel, size, nullptr, param.std.begin());
-      });
+  int nchannels = inputs[0].shape_[0];
+  int length = inputs[0].shape_[1] * inputs[0].shape_[2];
+
+  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
+    DType* input = inputs[0].dptr<DType>();
+    DType* output = outputs[0].dptr<DType>();
+
+    for (int i = 0; i < nchannels; ++i) {
+      DType mean = param.mean[param.mean.ndim() > 1 ? i : 0];
+      DType std = param.std[param.std.ndim() > 1 ? i : 0];
+      for (int j = 0; j < length; ++j) {
+        output[i*length + j] = (input[i*length + j] - mean) / std;
+      }
+    }
   });
 }
 
@@ -211,99 +148,83 @@ struct RandomBrightnessParam : public dmlc::Parameter<RandomBrightnessParam> {
   float max_brightness;
   DMLC_DECLARE_PARAMETER(RandomBrightnessParam) {
     DMLC_DECLARE_FIELD(max_brightness)
-    .set_default(0.0)
+    .set_lower_bound(0.0)
     .describe("Max Brightness.");
   }
 };
 
-template<typename xpu>
 static void RandomBrightness(const nnvm::NodeAttrs &attrs,
                              const OpContext &ctx,
                              const std::vector<TBlob> &inputs,
                              const std::vector<OpReqType> &req,
                              const std::vector<TBlob> &outputs) {
   using namespace mshadow;
-  auto input = inputs[0];
-  auto output = outputs[0];
-  int channel = input.shape_[0];
-  int height = input.shape_[1];
-  int weight = input.shape_[2];
-  Stream<xpu> *s = ctx.get_stream<xpu>();
-  Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
-
   const RandomBrightnessParam &param = nnvm::get<RandomBrightnessParam>(attrs.parsed);
+
+  int length = inputs[0].Size();
+
+  uint8_t* output = outputs[0].dptr<uint8_t>();
+  uint8_t* input = inputs[0].dptr<uint8_t>();
+
+  Stream<cpu> *s = ctx.get_stream<cpu>();
+  Random<cpu> *prnd = ctx.requested[0].get_random<cpu, float>(s);
   float alpha_b = 1.0 + std::uniform_real_distribution<float>(
-    -param.max_brightness, param.max_brightness)(prnd->GetRndEngine());
-  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-    MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
-      mxnet_op::Kernel<mxnet_op::op_with_req<mshadow::op::mul, Req>, xpu>::Launch(
-        s, inputs[0].Size(), outputs[0].dptr<DType>(), inputs[0].dptr<DType>(), DType(alpha_b));
-    });
-  });
+      -param.max_brightness, param.max_brightness)(prnd->GetRndEngine());
+
+  for (int l = 0; l < length; ++l) {
+    float val = static_cast<float>(input[l]) * alpha_b;
+    val = std::min(std::max(val, 0.f), 255.f);
+    output[l] = static_cast<uint8_t>(val);
+  }
 }
 
+
 struct RandomContrastParam : public dmlc::Parameter<RandomContrastParam> {
   float max_contrast;
   DMLC_DECLARE_PARAMETER(RandomContrastParam) {
     DMLC_DECLARE_FIELD(max_contrast)
-    .set_default(0.0)
+    .set_lower_bound(0.0)
     .describe("Max Contrast.");
   }
 };
 
-/*! \brief mul_add operator */
-struct mul_add {
-  /*! \brief map a, b, c to result using defined operation */
-  template<typename DType>
-  MSHADOW_XINLINE static DType Map(DType a, DType b, DType c) {
-    return a * b + c;
-  }
-};
 
-template<typename xpu>
 static void RandomContrast(const nnvm::NodeAttrs &attrs,
                            const OpContext &ctx,
                            const std::vector<TBlob> &inputs,
                            const std::vector<OpReqType> &req,
                            const std::vector<TBlob> &outputs) {
   using namespace mshadow;
-  auto input = inputs[0];
-  auto output = outputs[0];
-  int channel = input.shape_[0];
-  int height = input.shape_[1];
-  int weight = input.shape_[2];
-  Stream<xpu> *s = ctx.get_stream<xpu>();
-  Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
+  static const float coef[] = { 0.299f, 0.587f, 0.114f };
+  const RandomContrastParam &param = nnvm::get<RandomContrastParam>(attrs.parsed);
 
+  int length = inputs[0].shape_[0] * inputs[0].shape_[1];
+  int nchannels = inputs[0].shape_[2];
 
-  const RandomContrastParam &param = nnvm::get<RandomContrastParam>(attrs.parsed);
+  uint8_t* output = outputs[0].dptr<uint8_t>();
+  uint8_t* input = inputs[0].dptr<uint8_t>();
+
+  Stream<cpu> *s = ctx.get_stream<cpu>();
+  Random<cpu> *prnd = ctx.requested[0].get_random<cpu, real_t>(s);
   float alpha_c = 1.0 + std::uniform_real_distribution<float>(
     -param.max_contrast, param.max_contrast)(prnd->GetRndEngine());
 
-  const float R2YF = 0.299f;
-  const float G2YF = 0.587f;
-  const float B2YF = 0.114f;
-  static const float coeffs0[] = { R2YF, G2YF, B2YF };
-
-  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-    auto input_3d = input.get<xpu, 3, DType>(s);
-    DType sum = (DType)0.0;
-    for (int c = 0; c < channel; ++c) {
-      for (int h = 0; h < height; ++h) {
-        for (int w = 0; w < weight; ++w) {
-          sum += input_3d[c][h][w] * coeffs0[c];
-        }
-      }
+  float sum = 0.f;
+  if (nchannels > 1) {
+    for (int l = 0; l < length; ++l) {
+      for (int c = 0; c < nchannels; ++c) sum += input[l*nchannels + c] * coef[c];
     }
-    float gray_mean = sum / static_cast<float>(height * weight);
-    float beta = (1 - alpha_c) * gray_mean;
-
-    MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
-      mxnet_op::Kernel<mxnet_op::op_with_req<mul_add, Req>, xpu>::Launch(
-        s, inputs[0].Size(), outputs[0].dptr<DType>(),
-        inputs[0].dptr<DType>(), DType(alpha_c), DType(beta));
-    });
-  });
+  } else {
+    for (int l = 0; l < length; ++l) sum += input[l];
+  }
+  float gray_mean = sum / static_cast<float>(length);
+  float beta = (1 - alpha_c) * gray_mean;
+
+  for (int l = 0; l < length * nchannels; ++l) {
+    float val = input[l] * alpha_c + beta;
+    val = std::min(std::max(val, 0.f), 255.f);
+    output[l] = static_cast<uint8_t>(val);
+  }
 }
 
 struct RandomSaturationParam : public dmlc::Parameter<RandomSaturationParam> {
@@ -315,55 +236,46 @@ struct RandomSaturationParam : public dmlc::Parameter<RandomSaturationParam> {
   }
 };
 
-template<typename xpu>
 static void RandomSaturation(const nnvm::NodeAttrs &attrs,
                              const OpContext &ctx,
                              const std::vector<TBlob> &inputs,
                              const std::vector<OpReqType> &req,
                              const std::vector<TBlob> &outputs) {
   using namespace mshadow;
-  auto input = inputs[0];
-  auto output = outputs[0];
-  int channel = input.shape_[0];
-  int height = input.shape_[1];
-  int weight = input.shape_[2];
-  Stream<xpu> *s = ctx.get_stream<xpu>();
-  Random<xpu> *prnd = ctx.requested[kRandom].get_random<xpu, real_t>(s);
   const RandomSaturationParam &param = nnvm::get<RandomSaturationParam>(attrs.parsed);
-  float alpha_s = 1.0 + std::uniform_real_distribution<float>(
+  static const float coef[] = { 0.299f, 0.587f, 0.114f };
+
+  int length = inputs[0].shape_[0] * inputs[0].shape_[1];
+  int nchannels = inputs[0].shape_[2];
+
+  uint8_t* output = outputs[0].dptr<uint8_t>();
+  uint8_t* input = inputs[0].dptr<uint8_t>();
+
+  Stream<cpu> *s = ctx.get_stream<cpu>();
+  Random<cpu> *prnd = ctx.requested[0].get_random<cpu, real_t>(s);
+  float alpha_s = 1.f + std::uniform_real_distribution<float>(
     -param.max_saturation, param.max_saturation)(prnd->GetRndEngine());
-  float alpha_o = 1 - alpha_s;
-  const float R2YF = 0.299f;
-  const float G2YF = 0.587f;
-  const float B2YF = 0.114f;
-  static const float coeffs0[] = { R2YF, G2YF, B2YF };
+  float alpha_o = 1.f - alpha_s;
 
+  if (nchannels == 1) {
+    for (int l = 0; l < length * nchannels; ++l) output[l] = input[l];
+    return;
+  }
 
-  MSHADOW_TYPE_SWITCH(outputs[0].type_flag_, DType, {
-    MXNET_ASSIGN_REQ_SWITCH(req[0], Req, {
-      auto input_3d =  input.get<xpu, 3, DType>(s);
-      auto output_3d = output.get<xpu, 3, DType>(s);
-      switch (channel) {
-        case 1:
-          Assign(output_3d, Req, input_3d)
-          break;
-        case 3:
-          for (int h = 0; h < height; ++h) {
-            for (int w = 0; w < weight; ++w) {
-              float gray =
-                input_3d[0][h][w] * R2YF + input_3d[1][h][w] * G2YF + input_3d[2][h][w] * B2YF;
-              Assign(output_3d[0][h][w], Req, DType(gray * alpha_s + input_3d[0][h][w] * alpha_o))
-            }
-          }
-          break;
-        default:
-          LOG(FATAL) << "not support channel" << channel;
-      }
-    });
-  });
+  for (int l = 0; l < length; ++l) {
+    float gray = 0.f;
+    for (int c = 0; c < nchannels; ++c) {
+      gray = input[l*nchannels + c] * coef[c];
+    }
+    gray *= alpha_o;
+    for (int c = 0; c < nchannels; ++c) {
+      float val = gray + input[l*nchannels + c] * alpha_s;
+      val = std::min(std::max(val, 0.f), 255.f);
+      output[l*nchannels + c] = static_cast<uint8_t>(val);
+    }
+  }
 }
 
-template<typename xpu>
 static void RandomHue(const nnvm::NodeAttrs &attrs,
                       const OpContext &ctx,
                       const std::vector<TBlob> &inputs,
@@ -371,7 +283,6 @@ static void RandomHue(const nnvm::NodeAttrs &attrs,
                       const std::vector<TBlob> &outputs) {
 }
 
-template<typename xpu>
 static void RandomColorJitter(const nnvm::NodeAttrs &attrs,
                               const OpContext &ctx,
                               const std::vector<TBlob> &inputs,
@@ -379,7 +290,6 @@ static void RandomColorJitter(const nnvm::NodeAttrs &attrs,
                               const std::vector<TBlob> &outputs) {
 }
 
-template<typename xpu>
 static void RandomLighting(const nnvm::NodeAttrs &attrs,
                            const OpContext &ctx,
                            const std::vector<TBlob> &inputs,
diff --git a/src/operator/image/image_random.cc b/src/operator/image/image_random.cc
index e32a677..7ff7328 100644
--- a/src/operator/image/image_random.cc
+++ b/src/operator/image/image_random.cc
@@ -40,10 +40,11 @@ NNVM_REGISTER_OP(_image_to_tensor)
 })
 .set_attr<nnvm::FInferShape>("FInferShape", ToTensorShape)
 .set_attr<nnvm::FInferType>("FInferType", ToTensorType)
-.set_attr<FCompute>("FCompute<cpu>", ToTensor<cpu>)
+.set_attr<FCompute>("FCompute<cpu>", ToTensor)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
 .add_argument("data", "NDArray-or-Symbol", "The input.");
 
+
 DMLC_REGISTER_PARAMETER(NormalizeParam);
 NNVM_REGISTER_OP(_image_normalize)
 .describe(R"code()code" ADD_FILELINE)
@@ -56,25 +57,14 @@ NNVM_REGISTER_OP(_image_normalize)
 .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
 .set_attr<nnvm::FInplaceOption>("FInplaceOption",
-[](const NodeAttrs& attrs){
-  return std::vector<std::pair<int, int> >{{0, 0}};
-})
-.set_attr<FCompute>("FCompute<cpu>", Normalize<cpu>)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_image_backward_normalize" })
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 0}};
+  })
+.set_attr<FCompute>("FCompute<cpu>", Normalize)
+.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(NormalizeParam::__FIELDS__());
 
-NNVM_REGISTER_OP(_image_backward_normalize)
-.describe(R"code()code" ADD_FILELINE)
-.set_num_inputs(1)
-.set_num_outputs(1)
-.set_attr_parser(ParamParser<NormalizeParam>)
-.set_attr<nnvm::TIsBackward>("TIsBackward", true)
-.set_attr<nnvm::FInplaceOption>("FInplaceOption",
-[](const NodeAttrs& attrs){
-  return std::vector<std::pair<int, int> >{{0, 0}};
-})
-.set_attr<FCompute>("FCompute<cpu>", NormalizeBackward<cpu>);
 
 DMLC_REGISTER_PARAMETER(RandomBrightnessParam);
 NNVM_REGISTER_OP(_image_random_brightness)
@@ -87,7 +77,11 @@ NNVM_REGISTER_OP(_image_random_brightness)
 })
 .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCompute>("FCompute<cpu>", RandomBrightness<cpu>)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 0}};
+  })
+.set_attr<FCompute>("FCompute<cpu>", RandomBrightness)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomBrightnessParam::__FIELDS__());
@@ -103,7 +97,11 @@ NNVM_REGISTER_OP(_image_random_contrast)
 })
 .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCompute>("FCompute<cpu>", RandomContrast<cpu>)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 0}};
+  })
+.set_attr<FCompute>("FCompute<cpu>", RandomContrast)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomContrastParam::__FIELDS__());
@@ -119,7 +117,11 @@ NNVM_REGISTER_OP(_image_random_saturation)
 })
 .set_attr<nnvm::FInferShape>("FInferShape", ElemwiseShape<1, 1>)
 .set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<FCompute>("FCompute<cpu>", RandomSaturation<cpu>)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+  [](const NodeAttrs& attrs){
+    return std::vector<std::pair<int, int> >{{0, 0}};
+  })
+.set_attr<FCompute>("FCompute<cpu>", RandomSaturation)
 .set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{ "_copy" })
 .add_argument("data", "NDArray-or-Symbol", "The input.")
 .add_arguments(RandomSaturationParam::__FIELDS__());

-- 
To stop receiving notification emails like this one, please contact
jxie@apache.org.