You are viewing a plain text version of this content. The canonical link for it is here.
Posted to discuss-archive@tvm.apache.org by Max Sponner via TVM Discuss <no...@discuss.tvm.ai> on 2020/08/20 13:24:55 UTC

[TVM Discuss] [Questions] Post-Processing Ops and Quantization


Hi, 

I am currently exploring relay with the BYOC infrastructure and realized that pooling, relu and a number of other support operations are still done using float32.

As my target accelerator supports pool, relu and activations only in the int8 range, I want to quantize all operations. Am I able to enforce that?

original relay module:

    def @main(%input_1: Tensor[(1, 28, 28, 1), float32], %conv1/kernel:0: Tensor[(8, 1, 5, 5), float32], %conv1/bias:0: Tensor[(8), float32], %conv2/kernel:0: Tensor[(16, 8, 5, 5), float32], %conv2/bias:0: Tensor[(16), float32], %layer3/kernel:0: Tensor[(256, 10), float32], %layer3/bias:0: Tensor[(10), float32]) -> Tensor[(1, 10), float32] {
      %0 = transpose(%input_1, axes=[0, 3, 1, 2]) /* ty=Tensor[(1, 1, 28, 28), float32] */;
      %1 = layout_transform(%0, src_layout="NCHW", dst_layout="NHWC") /* ty=Tensor[(1, 28, 28, 1), float32] */;
      %2 = layout_transform(%conv1/kernel:0, src_layout="OIHW", dst_layout="OHWI") /* ty=Tensor[(8, 5, 5, 1), float32] */;
      %3 = nn.conv2d(%1, %2, padding=[2, 2, 2, 2], kernel_size=[5, 5], data_layout="NHWC", kernel_layout="OHWI") /* ty=Tensor[(1, 28, 28, 8), float32] */;
      %4 = layout_transform(%3, src_layout="NHWC", dst_layout="NCHW") /* ty=Tensor[(1, 8, 28, 28), float32] */;
      %5 = nn.bias_add(%4, %conv1/bias:0) /* ty=Tensor[(1, 8, 28, 28), float32] */;
      %6 = nn.relu(%5) /* ty=Tensor[(1, 8, 28, 28), float32] */;
      %7 = nn.max_pool2d(%6, pool_size=[2, 2], strides=[2, 2], padding=[0, 0, 0, 0]) /* ty=Tensor[(1, 8, 14, 14), float32] */;
      %8 = layout_transform(%7, src_layout="NCHW", dst_layout="NHWC") /* ty=Tensor[(1, 14, 14, 8), float32] */;
      %9 = layout_transform(%conv2/kernel:0, src_layout="OIHW", dst_layout="OHWI") /* ty=Tensor[(16, 5, 5, 8), float32] */;
      %10 = nn.conv2d(%8, %9, padding=[2, 2, 2, 2], kernel_size=[5, 5], data_layout="NHWC", kernel_layout="OHWI") /* ty=Tensor[(1, 14, 14, 16), float32] */;
      %11 = layout_transform(%10, src_layout="NHWC", dst_layout="NCHW") /* ty=Tensor[(1, 16, 14, 14), float32] */;
      %12 = nn.bias_add(%11, %conv2/bias:0) /* ty=Tensor[(1, 16, 14, 14), float32] */;
      %13 = nn.relu(%12) /* ty=Tensor[(1, 16, 14, 14), float32] */;
      %14 = nn.max_pool2d(%13, pool_size=[3, 3], strides=[3, 3], padding=[0, 0, 0, 0]) /* ty=Tensor[(1, 16, 4, 4), float32] */;
      %15 = transpose(%14, axes=[0, 2, 3, 1]) /* ty=Tensor[(1, 4, 4, 16), float32] */;
      %16 = nn.batch_flatten(%15) /* ty=Tensor[(1, 256), float32] */;
      %17 = transpose(%layer3/kernel:0, axes=[1, 0]) /* ty=Tensor[(10, 256), float32] */;
      %18 = nn.dense(%16, %17, units=None) /* ty=Tensor[(1, 10), float32] */;
      add(%18, %layer3/bias:0) /* ty=Tensor[(1, 10), float32] */
    }

quantized module:

    def @main(%input_1: Tensor[(1, 28, 28, 1), float32]) -> Tensor[(1, 10), float32] {
      %0 = transpose(%input_1, axes=[0, 3, 1, 2]) /* ty=Tensor[(1, 1, 28, 28), float32] */;
      %1 = layout_transform(%0, src_layout="NCHW", dst_layout="NHWC") /* ty=Tensor[(1, 28, 28, 1), float32] */;
      %2 = multiply(%1, 16f /* ty=float32 */) /* ty=Tensor[(1, 28, 28, 1), float32] */;
      %3 = round(%2) /* ty=Tensor[(1, 28, 28, 1), float32] */;
      %4 = clip(%3, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 28, 28, 1), float32] */;
      %5 = cast(%4, dtype="int8") /* ty=Tensor[(1, 28, 28, 1), int8] */;
      %6 = nn.conv2d(%5, meta[relay.Constant][0] /* ty=Tensor[(8, 5, 5, 1), int8] */ /* ty=Tensor[(8, 5, 5, 1), int8] */, padding=[2, 2, 2, 2], kernel_size=[5, 5], data_layout="NHWC", kernel_layout="OHWI", out_dtype="int32") /* ty=Tensor[(1, 28, 28, 8), int32] */;
      %7 = add(%6, 64 /* ty=int32 */) /* ty=Tensor[(1, 28, 28, 8), int32] */;
      %8 = right_shift(%7, 7 /* ty=int32 */) /* ty=Tensor[(1, 28, 28, 8), int32] */;
      %9 = clip(%8, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 28, 28, 8), int32] */;
      %10 = cast(%9, dtype="int8") /* ty=Tensor[(1, 28, 28, 8), int8] */;
      %11 = annotation.stop_fusion(%10) /* ty=Tensor[(1, 28, 28, 8), int8] */;
      %12 = cast(%11, dtype="float32") /* ty=Tensor[(1, 28, 28, 8), float32] */;
      %13 = multiply(%12, 0.0625f /* ty=float32 */) /* ty=Tensor[(1, 28, 28, 8), float32] */;
      %14 = layout_transform(%13, src_layout="NHWC", dst_layout="NCHW") /* ty=Tensor[(1, 8, 28, 28), float32] */;
      %15 = nn.bias_add(%14, meta[relay.Constant][1] /* ty=Tensor[(8), float32] */ /* ty=Tensor[(8), float32] */) /* ty=Tensor[(1, 8, 28, 28), float32] */;
      %16 = nn.relu(%15) /* ty=Tensor[(1, 8, 28, 28), float32] */;
      %17 = nn.max_pool2d(%16, pool_size=[2, 2], strides=[2, 2], padding=[0, 0, 0, 0]) /* ty=Tensor[(1, 8, 14, 14), float32] */;
      %18 = layout_transform(%17, src_layout="NCHW", dst_layout="NHWC") /* ty=Tensor[(1, 14, 14, 8), float32] */;
      %19 = multiply(%18, 16f /* ty=float32 */) /* ty=Tensor[(1, 14, 14, 8), float32] */;
      %20 = round(%19) /* ty=Tensor[(1, 14, 14, 8), float32] */;
      %21 = clip(%20, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 14, 14, 8), float32] */;
      %22 = cast(%21, dtype="int8") /* ty=Tensor[(1, 14, 14, 8), int8] */;
      %23 = nn.conv2d(%22, meta[relay.Constant][2] /* ty=Tensor[(16, 5, 5, 8), int8] */ /* ty=Tensor[(16, 5, 5, 8), int8] */, padding=[2, 2, 2, 2], kernel_size=[5, 5], data_layout="NHWC", kernel_layout="OHWI", out_dtype="int32") /* ty=Tensor[(1, 14, 14, 16), int32] */;
      %24 = add(%23, 64 /* ty=int32 */) /* ty=Tensor[(1, 14, 14, 16), int32] */;
      %25 = right_shift(%24, 7 /* ty=int32 */) /* ty=Tensor[(1, 14, 14, 16), int32] */;
      %26 = clip(%25, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 14, 14, 16), int32] */;
      %27 = cast(%26, dtype="int8") /* ty=Tensor[(1, 14, 14, 16), int8] */;
      %28 = annotation.stop_fusion(%27) /* ty=Tensor[(1, 14, 14, 16), int8] */;
      %29 = cast(%28, dtype="float32") /* ty=Tensor[(1, 14, 14, 16), float32] */;
      %30 = multiply(%29, 0.0625f /* ty=float32 */) /* ty=Tensor[(1, 14, 14, 16), float32] */;
      %31 = layout_transform(%30, src_layout="NHWC", dst_layout="NCHW") /* ty=Tensor[(1, 16, 14, 14), float32] */;
      %32 = nn.bias_add(%31, meta[relay.Constant][3] /* ty=Tensor[(16), float32] */ /* ty=Tensor[(16), float32] */) /* ty=Tensor[(1, 16, 14, 14), float32] */;
      %33 = nn.relu(%32) /* ty=Tensor[(1, 16, 14, 14), float32] */;
      %34 = nn.max_pool2d(%33, pool_size=[3, 3], strides=[3, 3], padding=[0, 0, 0, 0]) /* ty=Tensor[(1, 16, 4, 4), float32] */;
      %35 = transpose(%34, axes=[0, 2, 3, 1]) /* ty=Tensor[(1, 4, 4, 16), float32] */;
      %36 = nn.batch_flatten(%35) /* ty=Tensor[(1, 256), float32] */;
      %37 = multiply(%36, 16f /* ty=float32 */) /* ty=Tensor[(1, 256), float32] */;
      %38 = round(%37) /* ty=Tensor[(1, 256), float32] */;
      %39 = clip(%38, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 256), float32] */;
      %40 = cast(%39, dtype="int8") /* ty=Tensor[(1, 256), int8] */;
      %41 = nn.dense(%40, meta[relay.Constant][4] /* ty=Tensor[(10, 256), int8] */ /* ty=Tensor[(10, 256), int8] */, units=None, out_dtype="int32") /* ty=Tensor[(1, 10), int32] */;
      %42 = add(%41, meta[relay.Constant][5] /* ty=Tensor[(10), int32] */ /* ty=Tensor[(10), int32] */) /* ty=Tensor[(1, 10), int32] */;
      %43 = cast(%42, dtype="float32") /* ty=Tensor[(1, 10), float32] */;
      multiply(%43, 0.000488281f /* ty=float32 */) /* ty=Tensor[(1, 10), float32] */
    }

quantization configuration:

`qconfig(nbit_input=8, nbit_weight=8, nbit_activation=8, calibrate_mode=global_scale, global_scale=8, weight_scale=power2, skip_conv_layers==(nullptr), do_simulation==0, round_for_shift==1, debug_enabled_ops==(nullptr), rounding==UPWARD, partition_conversions==disabled)`





---
[Visit Topic](https://discuss.tvm.ai/t/post-processing-ops-and-quantization/7677/1) to respond.

You are receiving this because you enabled mailing list mode.

To unsubscribe from these emails, [click here](https://discuss.tvm.ai/email/unsubscribe/e86c3763fa6627629152988bf7f5a858306800de150186a065713491aae3a62e).