You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mxnet.apache.org by ta...@apache.org on 2019/03/06 01:42:27 UTC

[incubator-mxnet] branch master updated: Enhance gpu quantization (#14094)

This is an automated email from the ASF dual-hosted git repository.

taolv pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git


The following commit(s) were added to refs/heads/master by this push:
     new 49d7fc6  Enhance gpu quantization (#14094)
49d7fc6 is described below

commit 49d7fc63bd8ebe3e01a1c47f8d3b529028f3bb6b
Author: rajeshii <46...@users.noreply.github.com>
AuthorDate: Wed Mar 6 09:42:11 2019 +0800

    Enhance gpu quantization (#14094)
    
    * enhance gpu quantization
    
    * fix test and improve error message
    
    * add check srctype to quantized_conv.cu
    
    * improve infer type
    
    * fix lint
    
    * add dtype check in quantize
    
    * revert check in python level and quantized_conv
    
    * Revert "add dtype check in quantize"
    
    This reverts commit ab6866811346e12dadb679fe325e86badbe93c15.
    
    * add dtype check in quantize
    
    * fix quantize test case
---
 src/operator/quantization/quantize-inl.h       |  4 ++++
 src/operator/quantization/quantize_v2-inl.h    |  4 ++++
 tests/python/quantization/test_quantization.py | 10 ++++++++++
 tests/python/unittest/test_operator.py         |  6 +++---
 4 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/src/operator/quantization/quantize-inl.h b/src/operator/quantization/quantize-inl.h
index 747dead..1ad0016 100644
--- a/src/operator/quantization/quantize-inl.h
+++ b/src/operator/quantization/quantize-inl.h
@@ -95,6 +95,10 @@ void QuantizeCompute(const nnvm::NodeAttrs& attrs,
 
   const QuantizeParam& param = nnvm::get<QuantizeParam>(attrs.parsed);
   if (param.out_type == mshadow::kUint8) {
+    if (std::is_same<xpu, gpu>::value) {
+      LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, "
+                    "please switch to the context of CPU or int8 data type for GPU.";
+    }
     Kernel<quantize_unsigned, xpu>::Launch(s, outputs[0].Size(),
       outputs[0].dptr<uint8_t>(), outputs[1].dptr<float>(), outputs[2].dptr<float>(),
       inputs[0].dptr<float>(), inputs[1].dptr<float>(), inputs[2].dptr<float>(),
diff --git a/src/operator/quantization/quantize_v2-inl.h b/src/operator/quantization/quantize_v2-inl.h
index e3c4119..02ace6c 100644
--- a/src/operator/quantization/quantize_v2-inl.h
+++ b/src/operator/quantization/quantize_v2-inl.h
@@ -137,6 +137,10 @@ void QuantizeV2Compute(const nnvm::NodeAttrs &attrs, const OpContext &ctx,
   Stream<xpu> *s = ctx.get_stream<xpu>();
   const QuantizeV2Param &param = nnvm::get<QuantizeV2Param>(attrs.parsed);
   auto out_type = GetOutputType(param);
+  if (out_type == mshadow::kUint8 && std::is_same<xpu, gpu>::value) {
+    LOG(FATAL) << "currently, uint8 quantization is only supported by CPU, "
+                  "please switch to the context of CPU or int8 data type for GPU.";
+  }
 
   if (inputs[0].type_flag_ == mshadow::kUint8 || inputs[0].type_flag_ == mshadow::kInt8) {
     if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {
diff --git a/tests/python/quantization/test_quantization.py b/tests/python/quantization/test_quantization.py
index 3ff4b69..d8c7f08 100644
--- a/tests/python/quantization/test_quantization.py
+++ b/tests/python/quantization/test_quantization.py
@@ -450,6 +450,16 @@ def get_fp32_sym_with_multiple_outputs(length=1):
 @with_seed()
 def test_quantize_model():
     def check_quantize_model(qdtype):
+        if is_test_for_native_cpu():
+            print('skipped testing quantize_model for native cpu since it is not supported yet')
+            return
+        elif qdtype == 'int8' and is_test_for_mkldnn():
+            print('skipped testing quantize_model for mkldnn cpu int8 since it is not supported yet')
+            return
+        elif qdtype == 'uint8' and is_test_for_gpu():
+            print('skipped testing quantize_model for gpu uint8 since it is not supported yet')
+            return
+
         def check_params(params, qparams, qsym=None):
             if qsym is None:
                 assert len(params) == len(qparams)
diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py
index 0ac530c..6bb8150 100644
--- a/tests/python/unittest/test_operator.py
+++ b/tests/python/unittest/test_operator.py
@@ -4894,11 +4894,11 @@ def test_quantization_op():
     min0 = mx.nd.array([0.0])
     max0 = mx.nd.array([1.0])
     a  = mx.nd.array([[0.1392, 0.5928], [0.6027, 0.8579]])
-    qa, min1, max1 = mx.nd.contrib.quantize(a, min0, max0, out_type='uint8')
+    qa, min1, max1 = mx.nd.contrib.quantize(a, min0, max0, out_type='int8')
     a_ = mx.nd.contrib.dequantize(qa, min1, max1, out_type='float32')
 
-    qa_real = mx.nd.array([[35, 151], [154, 219]])
-    a_real  = mx.nd.array([[0.13725491, 0.59215689], [0.60392159, 0.8588236]])
+    qa_real = mx.nd.array([[18, 75], [77, 109]])
+    a_real  = mx.nd.array([[0.14173228, 0.5905512], [0.6062992, 0.8582677]])
 
     assert same(qa.asnumpy(), qa_real.asnumpy())
     assert same(a_.asnumpy(),  a_real.asnumpy())