You are viewing a plain text version of this content. The canonical link for it is here.
Posted to discuss-archive@tvm.apache.org by Max Sponner via TVM Discuss <no...@discuss.tvm.ai> on 2020/08/20 13:24:55 UTC
[TVM Discuss] [Questions] Post-Processing Ops and Quantization
Hi,
I am currently exploring relay with the BYOC infrastructure and realized that pooling, relu and a number of other support operations are still done using float32.
As my target accelerator supports pool, relu and activations only in the int8 range, I want to quantize all operations. Am I able to enforce that?
original relay module:
def @main(%input_1: Tensor[(1, 28, 28, 1), float32], %conv1/kernel:0: Tensor[(8, 1, 5, 5), float32], %conv1/bias:0: Tensor[(8), float32], %conv2/kernel:0: Tensor[(16, 8, 5, 5), float32], %conv2/bias:0: Tensor[(16), float32], %layer3/kernel:0: Tensor[(256, 10), float32], %layer3/bias:0: Tensor[(10), float32]) -> Tensor[(1, 10), float32] {
%0 = transpose(%input_1, axes=[0, 3, 1, 2]) /* ty=Tensor[(1, 1, 28, 28), float32] */;
%1 = layout_transform(%0, src_layout="NCHW", dst_layout="NHWC") /* ty=Tensor[(1, 28, 28, 1), float32] */;
%2 = layout_transform(%conv1/kernel:0, src_layout="OIHW", dst_layout="OHWI") /* ty=Tensor[(8, 5, 5, 1), float32] */;
%3 = nn.conv2d(%1, %2, padding=[2, 2, 2, 2], kernel_size=[5, 5], data_layout="NHWC", kernel_layout="OHWI") /* ty=Tensor[(1, 28, 28, 8), float32] */;
%4 = layout_transform(%3, src_layout="NHWC", dst_layout="NCHW") /* ty=Tensor[(1, 8, 28, 28), float32] */;
%5 = nn.bias_add(%4, %conv1/bias:0) /* ty=Tensor[(1, 8, 28, 28), float32] */;
%6 = nn.relu(%5) /* ty=Tensor[(1, 8, 28, 28), float32] */;
%7 = nn.max_pool2d(%6, pool_size=[2, 2], strides=[2, 2], padding=[0, 0, 0, 0]) /* ty=Tensor[(1, 8, 14, 14), float32] */;
%8 = layout_transform(%7, src_layout="NCHW", dst_layout="NHWC") /* ty=Tensor[(1, 14, 14, 8), float32] */;
%9 = layout_transform(%conv2/kernel:0, src_layout="OIHW", dst_layout="OHWI") /* ty=Tensor[(16, 5, 5, 8), float32] */;
%10 = nn.conv2d(%8, %9, padding=[2, 2, 2, 2], kernel_size=[5, 5], data_layout="NHWC", kernel_layout="OHWI") /* ty=Tensor[(1, 14, 14, 16), float32] */;
%11 = layout_transform(%10, src_layout="NHWC", dst_layout="NCHW") /* ty=Tensor[(1, 16, 14, 14), float32] */;
%12 = nn.bias_add(%11, %conv2/bias:0) /* ty=Tensor[(1, 16, 14, 14), float32] */;
%13 = nn.relu(%12) /* ty=Tensor[(1, 16, 14, 14), float32] */;
%14 = nn.max_pool2d(%13, pool_size=[3, 3], strides=[3, 3], padding=[0, 0, 0, 0]) /* ty=Tensor[(1, 16, 4, 4), float32] */;
%15 = transpose(%14, axes=[0, 2, 3, 1]) /* ty=Tensor[(1, 4, 4, 16), float32] */;
%16 = nn.batch_flatten(%15) /* ty=Tensor[(1, 256), float32] */;
%17 = transpose(%layer3/kernel:0, axes=[1, 0]) /* ty=Tensor[(10, 256), float32] */;
%18 = nn.dense(%16, %17, units=None) /* ty=Tensor[(1, 10), float32] */;
add(%18, %layer3/bias:0) /* ty=Tensor[(1, 10), float32] */
}
quantized module:
def @main(%input_1: Tensor[(1, 28, 28, 1), float32]) -> Tensor[(1, 10), float32] {
%0 = transpose(%input_1, axes=[0, 3, 1, 2]) /* ty=Tensor[(1, 1, 28, 28), float32] */;
%1 = layout_transform(%0, src_layout="NCHW", dst_layout="NHWC") /* ty=Tensor[(1, 28, 28, 1), float32] */;
%2 = multiply(%1, 16f /* ty=float32 */) /* ty=Tensor[(1, 28, 28, 1), float32] */;
%3 = round(%2) /* ty=Tensor[(1, 28, 28, 1), float32] */;
%4 = clip(%3, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 28, 28, 1), float32] */;
%5 = cast(%4, dtype="int8") /* ty=Tensor[(1, 28, 28, 1), int8] */;
%6 = nn.conv2d(%5, meta[relay.Constant][0] /* ty=Tensor[(8, 5, 5, 1), int8] */ /* ty=Tensor[(8, 5, 5, 1), int8] */, padding=[2, 2, 2, 2], kernel_size=[5, 5], data_layout="NHWC", kernel_layout="OHWI", out_dtype="int32") /* ty=Tensor[(1, 28, 28, 8), int32] */;
%7 = add(%6, 64 /* ty=int32 */) /* ty=Tensor[(1, 28, 28, 8), int32] */;
%8 = right_shift(%7, 7 /* ty=int32 */) /* ty=Tensor[(1, 28, 28, 8), int32] */;
%9 = clip(%8, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 28, 28, 8), int32] */;
%10 = cast(%9, dtype="int8") /* ty=Tensor[(1, 28, 28, 8), int8] */;
%11 = annotation.stop_fusion(%10) /* ty=Tensor[(1, 28, 28, 8), int8] */;
%12 = cast(%11, dtype="float32") /* ty=Tensor[(1, 28, 28, 8), float32] */;
%13 = multiply(%12, 0.0625f /* ty=float32 */) /* ty=Tensor[(1, 28, 28, 8), float32] */;
%14 = layout_transform(%13, src_layout="NHWC", dst_layout="NCHW") /* ty=Tensor[(1, 8, 28, 28), float32] */;
%15 = nn.bias_add(%14, meta[relay.Constant][1] /* ty=Tensor[(8), float32] */ /* ty=Tensor[(8), float32] */) /* ty=Tensor[(1, 8, 28, 28), float32] */;
%16 = nn.relu(%15) /* ty=Tensor[(1, 8, 28, 28), float32] */;
%17 = nn.max_pool2d(%16, pool_size=[2, 2], strides=[2, 2], padding=[0, 0, 0, 0]) /* ty=Tensor[(1, 8, 14, 14), float32] */;
%18 = layout_transform(%17, src_layout="NCHW", dst_layout="NHWC") /* ty=Tensor[(1, 14, 14, 8), float32] */;
%19 = multiply(%18, 16f /* ty=float32 */) /* ty=Tensor[(1, 14, 14, 8), float32] */;
%20 = round(%19) /* ty=Tensor[(1, 14, 14, 8), float32] */;
%21 = clip(%20, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 14, 14, 8), float32] */;
%22 = cast(%21, dtype="int8") /* ty=Tensor[(1, 14, 14, 8), int8] */;
%23 = nn.conv2d(%22, meta[relay.Constant][2] /* ty=Tensor[(16, 5, 5, 8), int8] */ /* ty=Tensor[(16, 5, 5, 8), int8] */, padding=[2, 2, 2, 2], kernel_size=[5, 5], data_layout="NHWC", kernel_layout="OHWI", out_dtype="int32") /* ty=Tensor[(1, 14, 14, 16), int32] */;
%24 = add(%23, 64 /* ty=int32 */) /* ty=Tensor[(1, 14, 14, 16), int32] */;
%25 = right_shift(%24, 7 /* ty=int32 */) /* ty=Tensor[(1, 14, 14, 16), int32] */;
%26 = clip(%25, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 14, 14, 16), int32] */;
%27 = cast(%26, dtype="int8") /* ty=Tensor[(1, 14, 14, 16), int8] */;
%28 = annotation.stop_fusion(%27) /* ty=Tensor[(1, 14, 14, 16), int8] */;
%29 = cast(%28, dtype="float32") /* ty=Tensor[(1, 14, 14, 16), float32] */;
%30 = multiply(%29, 0.0625f /* ty=float32 */) /* ty=Tensor[(1, 14, 14, 16), float32] */;
%31 = layout_transform(%30, src_layout="NHWC", dst_layout="NCHW") /* ty=Tensor[(1, 16, 14, 14), float32] */;
%32 = nn.bias_add(%31, meta[relay.Constant][3] /* ty=Tensor[(16), float32] */ /* ty=Tensor[(16), float32] */) /* ty=Tensor[(1, 16, 14, 14), float32] */;
%33 = nn.relu(%32) /* ty=Tensor[(1, 16, 14, 14), float32] */;
%34 = nn.max_pool2d(%33, pool_size=[3, 3], strides=[3, 3], padding=[0, 0, 0, 0]) /* ty=Tensor[(1, 16, 4, 4), float32] */;
%35 = transpose(%34, axes=[0, 2, 3, 1]) /* ty=Tensor[(1, 4, 4, 16), float32] */;
%36 = nn.batch_flatten(%35) /* ty=Tensor[(1, 256), float32] */;
%37 = multiply(%36, 16f /* ty=float32 */) /* ty=Tensor[(1, 256), float32] */;
%38 = round(%37) /* ty=Tensor[(1, 256), float32] */;
%39 = clip(%38, a_min=-127f, a_max=127f) /* ty=Tensor[(1, 256), float32] */;
%40 = cast(%39, dtype="int8") /* ty=Tensor[(1, 256), int8] */;
%41 = nn.dense(%40, meta[relay.Constant][4] /* ty=Tensor[(10, 256), int8] */ /* ty=Tensor[(10, 256), int8] */, units=None, out_dtype="int32") /* ty=Tensor[(1, 10), int32] */;
%42 = add(%41, meta[relay.Constant][5] /* ty=Tensor[(10), int32] */ /* ty=Tensor[(10), int32] */) /* ty=Tensor[(1, 10), int32] */;
%43 = cast(%42, dtype="float32") /* ty=Tensor[(1, 10), float32] */;
multiply(%43, 0.000488281f /* ty=float32 */) /* ty=Tensor[(1, 10), float32] */
}
quantization configuration:
`qconfig(nbit_input=8, nbit_weight=8, nbit_activation=8, calibrate_mode=global_scale, global_scale=8, weight_scale=power2, skip_conv_layers==(nullptr), do_simulation==0, round_for_shift==1, debug_enabled_ops==(nullptr), rounding==UPWARD, partition_conversions==disabled)`
---
[Visit Topic](https://discuss.tvm.ai/t/post-processing-ops-and-quantization/7677/1) to respond.
You are receiving this because you enabled mailing list mode.
To unsubscribe from these emails, [click here](https://discuss.tvm.ai/email/unsubscribe/e86c3763fa6627629152988bf7f5a858306800de150186a065713491aae3a62e).