You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tvm.apache.org by GitBox <gi...@apache.org> on 2021/10/06 21:01:15 UTC
[GitHub] [tvm] AndrewZhaoLuo commented on a change in pull request #8952: [TVM] Add importer for ONNX QLinearMatMul op

AndrewZhaoLuo commented on a change in pull request #8952:
URL: https://github.com/apache/tvm/pull/8952#discussion_r723667441



##########
File path: python/tvm/relay/frontend/onnx.py
##########
@@ -3506,6 +3506,154 @@ def _impl_v10(cls, inputs, attr, params):
         return _qnn.op.quantize(out, c_scale, c_zero_point, out_dtype=dtype)
 
 
+class QLinearMatMul(OnnxOpConverter):
+    """
+    Operator converter for QLinearMatMul from Microsoft onnxruntime contrib opset.
+
+    Limitations:
+    - Only supports 2D input tensors.
+    - Not guaranteed to meet the integer-overflow behavior stipulated in the
+      ONNX documentation for this operator.
+    """
+
+    @classmethod
+    def _impl_v10(cls, inputs, attr, params):
+
+        # We can't necessarily anticipate which TVM backends will require certain
+        # operands to be simple Relay Constant nodes.
+        #
+        # There's no guarantee that the specific, numerical value of an input
+        # will be available at graph-compilation time. So we'll make a
+        # best-effort attempt to obtain simple-Constant form here, but our code
+        # below needs to allow for the possibility that it wasn't achieved.
+        def try_convert_to_Constant(x, dtype_override=None):
+            if isinstance(x, _expr.Var) and x.name_hint in params:
+                return _op.const(params[x.name_hint].numpy(), dtype)
+
+            rank = len(infer_shape(x))
+            if rank == 0:
+                x_scalar = x

Review comment:
       this line is not needed

##########
File path: python/tvm/relay/frontend/onnx.py
##########
@@ -3506,6 +3506,155 @@ def _impl_v10(cls, inputs, attr, params):
         return _qnn.op.quantize(out, c_scale, c_zero_point, out_dtype=dtype)
 
 
+class QLinearMatMul(OnnxOpConverter):
+    """
+    Operator converter for QLinearMatMul from Microsoft onnxruntime contrib opset.
+
+    Limitations:
+    - Only supports 2D input tensors.
+    - Not guaranteed to meet the integer-overflow behavior stipulated in the
+      ONNX documentation for this operator.
+    """
+
+    @classmethod
+    def _impl_v10(cls, inputs, attr, params):
+
+        # This function has two goals, both of which are to satisfy the input requirements
+        # various Relay ops used below:
+        #
+        # (1) If a values that's conceptually a scalar is represented as a tensor,
+        #     squeeze it down to just a scalar. This will always be possible for values
+        #     meeting the shape requirements.
+        #
+        # (2) When possible, simplify an expression down to a simple Relay Const node.
+        def try_convert_to_Constant(x, dtype_override=None):

Review comment:
       nit: try_convert_to_constant

##########
File path: tests/python/frontend/onnx/test_forward.py
##########
@@ -4915,7 +4915,19 @@ def verify_eyelike(indata):
     "test_nllloss_NCd1d2d3_sum_weight_high_ii_expanded",
     "test_nllloss_NCd1d2d3d4d5_mean_weight_expanded",
     "test_nllloss_NCd1d2d3d4d5_none_no_weight_expanded",
-    "test_qlinearmatmul_2D",
+    # This nllloss test is flaky and sometimes gives NaNs

Review comment:
       The nll and pow tests should not be in this list

##########
File path: python/tvm/relay/frontend/onnx.py
##########
@@ -3506,6 +3506,155 @@ def _impl_v10(cls, inputs, attr, params):
         return _qnn.op.quantize(out, c_scale, c_zero_point, out_dtype=dtype)
 
 
+class QLinearMatMul(OnnxOpConverter):
+    """
+    Operator converter for QLinearMatMul from Microsoft onnxruntime contrib opset.
+
+    Limitations:
+    - Only supports 2D input tensors.
+    - Not guaranteed to meet the integer-overflow behavior stipulated in the
+      ONNX documentation for this operator.
+    """
+
+    @classmethod
+    def _impl_v10(cls, inputs, attr, params):
+
+        # This function has two goals, both of which are to satisfy the input requirements
+        # various Relay ops used below:
+        #
+        # (1) If a values that's conceptually a scalar is represented as a tensor,
+        #     squeeze it down to just a scalar. This will always be possible for values
+        #     meeting the shape requirements.
+        #
+        # (2) When possible, simplify an expression down to a simple Relay Const node.
+        def try_convert_to_Constant(x, dtype_override=None):

Review comment:
       this seems very useful for multiple QNN op methods. I would move it to `common` 

##########
File path: python/tvm/relay/frontend/onnx.py
##########
@@ -3506,6 +3506,155 @@ def _impl_v10(cls, inputs, attr, params):
         return _qnn.op.quantize(out, c_scale, c_zero_point, out_dtype=dtype)
 
 
+class QLinearMatMul(OnnxOpConverter):
+    """
+    Operator converter for QLinearMatMul from Microsoft onnxruntime contrib opset.
+
+    Limitations:
+    - Only supports 2D input tensors.
+    - Not guaranteed to meet the integer-overflow behavior stipulated in the
+      ONNX documentation for this operator.
+    """
+
+    @classmethod
+    def _impl_v10(cls, inputs, attr, params):
+
+        # This function has two goals, both of which are to satisfy the input requirements
+        # various Relay ops used below:
+        #
+        # (1) If a values that's conceptually a scalar is represented as a tensor,
+        #     squeeze it down to just a scalar. This will always be possible for values
+        #     meeting the shape requirements.
+        #
+        # (2) When possible, simplify an expression down to a simple Relay Const node.
+        def try_convert_to_Constant(x, dtype_override=None):
+            if isinstance(x, _expr.Var) and x.name_hint in params:
+                return _op.const(params[x.name_hint].numpy(), dtype)
+
+            rank = len(infer_shape(x))
+            if rank == 0:
+                x_scalar = x
+                return x
+            elif rank == 1:
+                x_scalar = _op.squeeze(x, [0])

Review comment:
       constants can be non-scalars in general too so we probably don't need these rank handling cases

##########
File path: tests/python/frontend/onnx/test_forward.py
##########
@@ -4941,7 +4941,6 @@ def verify_eyelike(indata):
     "test_mvn",
     # This test fails llvm with a lowering error:
     "test_nllloss_NCd1d2d3_none_no_weight_negative_ii_expanded",
-    "test_qlinearmatmul_2D",
     "test_qlinearmatmul_3D",

Review comment:
       How much work for 3d?

##########
File path: python/tvm/relay/frontend/onnx.py
##########
@@ -3506,6 +3506,155 @@ def _impl_v10(cls, inputs, attr, params):
         return _qnn.op.quantize(out, c_scale, c_zero_point, out_dtype=dtype)
 
 
+class QLinearMatMul(OnnxOpConverter):
+    """
+    Operator converter for QLinearMatMul from Microsoft onnxruntime contrib opset.
+
+    Limitations:
+    - Only supports 2D input tensors.
+    - Not guaranteed to meet the integer-overflow behavior stipulated in the
+      ONNX documentation for this operator.
+    """
+
+    @classmethod
+    def _impl_v10(cls, inputs, attr, params):
+
+        # This function has two goals, both of which are to satisfy the input requirements
+        # various Relay ops used below:
+        #
+        # (1) If a values that's conceptually a scalar is represented as a tensor,
+        #     squeeze it down to just a scalar. This will always be possible for values
+        #     meeting the shape requirements.
+        #
+        # (2) When possible, simplify an expression down to a simple Relay Const node.
+        def try_convert_to_Constant(x, dtype_override=None):
+            if isinstance(x, _expr.Var) and x.name_hint in params:
+                return _op.const(params[x.name_hint].numpy(), dtype)
+
+            rank = len(infer_shape(x))
+            if rank == 0:
+                x_scalar = x
+                return x
+            elif rank == 1:
+                x_scalar = _op.squeeze(x, [0])
+            else:
+                assert false, "op parameter '{}' must be scalar".format(x.name_hint)
+
+            if dtype_override is not None:
+                return fold_constant(_op.cast(x_scalar, dtype_override))
+            else:
+                return fold_constant(x_scalar)
+
+        # Unpack the inputs and obtain some type info...
+        a, a_scale, a_zp, b, b_scale, b_zp, y_scale, y_zp = inputs
+
+        a_type = infer_type(a).checked_type  # 'T1' in ONNX doc for this op
+        a_scale_type = infer_type(a_scale).checked_type
+        a_zp_type = infer_type(a_zp).checked_type
+
+        b_type = infer_type(b).checked_type  # 'T2' in ONNX doc for this op
+        b_scale_type = infer_type(b_scale).checked_type
+        b_zp_type = infer_type(b_zp).checked_type
+
+        y_scale_type = infer_type(y_scale).checked_type
+        y_zp_type = infer_type(y_zp).checked_type  # 'T3' in ONNX doc for this op
+
+        a_shape = infer_shape(a)
+        b_shape = infer_shape(b)
+
+        # Verify type assumptions, based on the ONNX doc for this op...
+        assert a_type.dtype in ["int8", "uint8"]
+        assert a_scale_type.dtype == "float32"
+        assert a_zp_type.dtype == a_type.dtype
+
+        assert b_type.dtype in ["int8", "uint8"]
+        assert b_scale_type.dtype == "float32"
+        assert b_zp_type.dtype == b_type.dtype
+
+        assert y_scale_type.dtype == "float32"
+        assert y_zp_type.dtype in ["int8", "uint8"]
+
+        # TODO: relax this limitation in a future version of this importer.
+        a_rank = len(a_shape)
+        b_rank = len(b_shape)
+        assert (a_rank == 2) and (b_rank == 2), (
+            "QLinearMatMul importer currently requires both 'a' and 'b' tensors to be 2D, but"
+            " rank(a)={}, rank(b)={}".format(a_rank, b_rank)
+        )
+
+        # _qnn.op.dense requires the zero-point values to have dtype int32.
+        a_scale_scalar = try_convert_to_Constant(a_scale)
+        a_zp_scalar = try_convert_to_Constant(a_zp, "int32")
+
+        b_scale_scalar = try_convert_to_Constant(b_scale)
+        b_zp_scalar = try_convert_to_Constant(b_zp, "int32")
+
+        y_scale_scalar = try_convert_to_Constant(y_scale)
+        y_zp_scalar = try_convert_to_Constant(y_zp, "int32")
+
+        # TODO: Confirm that we're using 'num_hidden_units' correctly / as intended with
+        # the '_qnn.op.dense' instance below.
+        num_hidden_units = infer_shape(b)[-1]
+
+        # - Specify the matmul result dtype as int32, so that hopefully the matmul will use
+        #   a 32-bit accumulator as seems to be required by the ONNX op's documentation.
+        #
+        # TL;DR:
+        # The ONNX documentation for this op is clear about acceptable overflow
+        # behavior during the matmul operation:
+        #   - The scalar multiplication ops MAY NOT overflow.
+        #   - The scalar addition ops, which sum the results of the scalar multiplication,
+        #     MAY overflow, but if they do so, it must behave as one would expect during
+        #     32-bit integer-addition overflow.
+        # As of this writing, Relay's qnn.op.dense operator doesn't expose a way for us to

Review comment:
       Probably make this a TODO comment




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@tvm.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org