You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@tvm.apache.org by gu...@apache.org on 2023/01/22 06:29:56 UTC

[tvm] branch acc-tests-1 updated (837f117a93 -> ae2976d829)

This is an automated email from the ASF dual-hosted git repository.

guberti pushed a change to branch acc-tests-1
in repository https://gitbox.apache.org/repos/asf/tvm.git


    from 837f117a93 Functioning personal requantization!
     new f1f90f00b5 Another requantization
     new ae2976d829 Saturation

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 vww/modified.c | 135 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 119 insertions(+), 16 deletions(-)

[tvm] 02/02: Saturation

Posted by gu...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

guberti pushed a commit to branch acc-tests-1
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit ae2976d829ea50a3a985cafb4f9994336eb05f55
Author: Gavin Uberti <ga...@gmail.com>
AuthorDate: Sat Jan 21 22:29:48 2023 -0800

    Saturation
---
 vww/modified.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 117 insertions(+), 7 deletions(-)

diff --git a/vww/modified.c b/vww/modified.c
index b35a22dd4e..ad6272ac97 100644
--- a/vww/modified.c
+++ b/vww/modified.c
@@ -30932,6 +30932,120 @@ TVM_DLL int32_t tvmgen_default_fused_nn_conv2d_add_cast_multiply_add_right_shift
   return 0;
 }
 
+static const float __attribute__((section(".rodata.tvm"), aligned(16))) scale_3360883[8] = {
+    0x1.4e5b46p-124, 0x1.a1c562p-123, 0x1.52203ap-125, 0x1.2221a4p-121, 0x1.adb328p-123, 0x1.bd606ep-110, 0x1.58c1ccp-121, 0x1.afdb16p-122
+};
+
+static const int32_t __attribute__((section(".rodata.tvm"), aligned(16))) bias_11590737[8] = {
+    +0x000034d6, +0x000021dd, +0x00004223, +0x00001f3f, +0x0000240d, -0x0000098b, -0x000004fc, -0x000020ce
+};
+
+static const int16_t __attribute__((section(".rodata.tvm"), aligned(16))) kernel_21585151[72] = {
+    -0x004a, -0x0005, -0x004d, +0x006c, +0x007f, +0x0028, +0x002a, +0x002c,
+    -0x005b, -0x0071, +0x0006, -0x0007, +0x004e, +0x002c, -0x001b, +0x007f,
+    +0x0042, -0x0068, -0x0047, -0x001e, -0x000b, +0x0010, +0x004b, -0x0019,
+    +0x007f, +0x0046, -0x0001, -0x001b, -0x0012, -0x0003, +0x0011, -0x0001,
+    -0x0003, -0x0001, +0x007f, -0x002e, -0x0028, -0x0028, +0x0019, +0x0034,
+    +0x0021, -0x003e, +0x0032, +0x007f, -0x0048, -0x0039, +0x0039, +0x0031,
+    +0x0017, -0x007f, +0x0018, +0x0011, +0x0032, -0x0052, +0x0051, +0x0001,
+    +0x002f, -0x0067, -0x007f, +0x000d, +0x0022, -0x0048, +0x004b, +0x0029,
+    +0x0031, -0x001a, -0x000e, -0x007f, +0x0026, -0x0049, -0x003f, +0x003c
+};
+
+__attribute__((always_inline)) static inline int32_t tensordot_opt_x2_int16_w48_3x3_000_1_8(
+    int32_t *output, int32_t *tensor, int32_t *kernel, int32_t *bias, int32_t *scale
+) {
+  int32_t sum_0 = *bias, sum_1 = *bias;
+
+  int32_t tensor__y00_x00__y00_x01 = tensor[0];
+  int32_t tensor__y00_x02__y00_x03 = tensor[1];
+  int32_t tensor__y01_x00__y01_x01 = tensor[24];
+  int32_t tensor__y01_x02__y01_x03 = tensor[25];
+  int32_t tensor__y02_x00__y02_x01 = tensor[48];
+  int32_t tensor__y02_x02__y02_x03 = tensor[49];
+
+  int32_t kernel__y00_x00__y00_x01 = kernel[0];
+  int32_t kernel__y00_x02__y01_x00 = kernel[1];
+  int32_t kernel__y01_x01__y01_x02 = kernel[2];
+  int32_t kernel__y02_x00__y02_x01 = kernel[3];
+  int32_t kernel__y02_x02__unknown = kernel[4];
+
+  sum_0 = __smlad(tensor__y00_x00__y00_x01, kernel__y00_x00__y00_x01, sum_0);
+  sum_0 = __smlabb(tensor__y00_x02__y00_x03, kernel__y00_x02__y01_x00, sum_0);
+  sum_0 = __smlabt(tensor__y01_x00__y01_x01, kernel__y00_x02__y01_x00, sum_0);
+  sum_0 = __smlatb(tensor__y01_x00__y01_x01, kernel__y01_x01__y01_x02, sum_0);
+  sum_0 = __smlabt(tensor__y01_x02__y01_x03, kernel__y01_x01__y01_x02, sum_0);
+  sum_0 = __smlad(tensor__y02_x00__y02_x01, kernel__y02_x00__y02_x01, sum_0);
+  sum_0 = __smlabb(tensor__y02_x02__y02_x03, kernel__y02_x02__unknown, sum_0);
+  sum_1 = __smlatb(tensor__y00_x00__y00_x01, kernel__y00_x00__y00_x01, sum_1);
+  sum_1 = __smlabt(tensor__y00_x02__y00_x03, kernel__y00_x00__y00_x01, sum_1);
+  sum_1 = __smlatb(tensor__y00_x02__y00_x03, kernel__y00_x02__y01_x00, sum_1);
+  sum_1 = __smlatt(tensor__y01_x00__y01_x01, kernel__y00_x02__y01_x00, sum_1);
+  sum_1 = __smlad(tensor__y01_x02__y01_x03, kernel__y01_x01__y01_x02, sum_1);
+  sum_1 = __smlatb(tensor__y02_x00__y02_x01, kernel__y02_x00__y02_x01, sum_1);
+  sum_1 = __smlabt(tensor__y02_x02__y02_x03, kernel__y02_x00__y02_x01, sum_1);
+  sum_1 = __smlatb(tensor__y02_x02__y02_x03, kernel__y02_x02__unknown, sum_1);
+
+  int32_t scale_val = *scale;
+  int32_t requant_0 = (sum_0 * (int64_t) scale_val) >> 32;
+  requant_0 = (requant_0 + 1) >> 1;
+  requant_0 = __ssat(requant_0 + -128, 8);
+  int32_t requant_1 = (sum_1 * (int64_t) scale_val) >> 32;
+  requant_1 = (requant_1 + 1) >> 1;
+  requant_1 = __ssat(requant_1 + -128, 8);
+
+  ((int16_t*) output)[0] = (int16_t) requant_0;
+  ((int16_t*) output)[8] = (int16_t) requant_1;
+  return 0;
+}
+
+__attribute__((always_inline)) static inline int32_t tensordot_opt_x2_int16_w48_3x3_010_1_8(
+    int32_t *output, int32_t *tensor, int32_t *kernel, int32_t *bias, int32_t *scale
+) {
+  int32_t sum_0 = *bias, sum_1 = *bias;
+
+  int32_t tensor__y00_x00__y00_x01 = tensor[0];
+  int32_t tensor__y00_x02__y00_x03 = tensor[1];
+  int32_t tensor__y01_x00__y01_x01 = tensor[24];
+  int32_t tensor__y01_x02__y01_x03 = tensor[25];
+  int32_t tensor__y02_x00__y02_x01 = tensor[48];
+  int32_t tensor__y02_x02__y02_x03 = tensor[49];
+
+  int32_t kernel__unknown__y00_x00 = kernel[0];
+  int32_t kernel__y00_x01__y00_x02 = kernel[1];
+  int32_t kernel__y01_x00__y01_x01 = kernel[2];
+  int32_t kernel__y01_x02__y02_x00 = kernel[3];
+  int32_t kernel__y02_x01__y02_x02 = kernel[4];
+
+  sum_0 = __smlabt(tensor__y00_x00__y00_x01, kernel__unknown__y00_x00, sum_0);
+  sum_0 = __smlatb(tensor__y00_x00__y00_x01, kernel__y00_x01__y00_x02, sum_0);
+  sum_0 = __smlabt(tensor__y00_x02__y00_x03, kernel__y00_x01__y00_x02, sum_0);
+  sum_0 = __smlad(tensor__y01_x00__y01_x01, kernel__y01_x00__y01_x01, sum_0);
+  sum_0 = __smlabb(tensor__y01_x02__y01_x03, kernel__y01_x02__y02_x00, sum_0);
+  sum_0 = __smlabt(tensor__y02_x00__y02_x01, kernel__y01_x02__y02_x00, sum_0);
+  sum_0 = __smlatb(tensor__y02_x00__y02_x01, kernel__y02_x01__y02_x02, sum_0);
+  sum_0 = __smlabt(tensor__y02_x02__y02_x03, kernel__y02_x01__y02_x02, sum_0);
+  sum_1 = __smlatt(tensor__y00_x00__y00_x01, kernel__unknown__y00_x00, sum_1);
+  sum_1 = __smlad(tensor__y00_x02__y00_x03, kernel__y00_x01__y00_x02, sum_1);
+  sum_1 = __smlatb(tensor__y01_x00__y01_x01, kernel__y01_x00__y01_x01, sum_1);
+  sum_1 = __smlabt(tensor__y01_x02__y01_x03, kernel__y01_x00__y01_x01, sum_1);
+  sum_1 = __smlatb(tensor__y01_x02__y01_x03, kernel__y01_x02__y02_x00, sum_1);
+  sum_1 = __smlatt(tensor__y02_x00__y02_x01, kernel__y01_x02__y02_x00, sum_1);
+  sum_1 = __smlad(tensor__y02_x02__y02_x03, kernel__y02_x01__y02_x02, sum_1);
+
+  int32_t scale_val = *scale;
+  int32_t requant_0 = (sum_0 * (int64_t) scale_val) >> 32;
+  requant_0 = (requant_0 + 1) >> 1;
+  requant_0 = __ssat(requant_0 + -128, 8);
+  int32_t requant_1 = (sum_1 * (int64_t) scale_val) >> 32;
+  requant_1 = (requant_1 + 1) >> 1;
+  requant_1 = __ssat(requant_1 + -128, 8);
+
+  ((int16_t*) output)[0] = (int16_t) requant_0;
+  ((int16_t*) output)[8] = (int16_t) requant_1;
+  return 0;
+}
+
 #ifdef __cplusplus
 extern "C"
 #endif
@@ -30991,13 +31105,9 @@ TVM_DLL int32_t tvmgen_default_fused_nn_conv2d_add_cast_multiply_add_right_shift
     for (int32_t ax2_1 = 0; ax2_1 < 48; ++ax2_1) {
       for (int32_t ax3_1 = 0; ax3_1 < 8; ++ax3_1) {
         int32_t cse_var_5 = (((ax1_1 * 384) + (ax2_1 * 8)) + ax3_1);
-        int32_t __1 = ((int32_t)((((((int64_t)((int32_t*)depthwise_conv2d)[cse_var_5]) + ((int64_t)((int32_t*)fused_nn_conv2d_constant_6)[ax3_1])) * ((int64_t*)fused_nn_conv2d_add_cast_constant_7)[ax3_1]) + ((int64_t*)fused_nn_conv2d_add_cast_multiply_constant_8)[ax3_1]) >> ((int64_t*)fused_nn_conv2d_add_cast_multiply_add_constant_9)[ax3_1])) - 128;
-        int32_t __2 = (__1) < (127) ? (__1) : (127);
-        int8_t __3 = (int8_t)((__2) > (-128) ? (__2) : (-128));
-        int8_t __4 = (int8_t)127;
-        int8_t __5 = (__3) < (__4) ? (__3) : (__4);
-        int8_t __6 = (int8_t)-128;
-        ((int16_t*)T_subtract)[cse_var_5] = (((int16_t)((__5) > (__6) ? (__5) : (__6))) - (int16_t)-128);
+        int32_t __1 = ((int32_t)((((((int64_t)((int32_t*)depthwise_conv2d)[cse_var_5]) + ((int64_t)((int32_t*)fused_nn_conv2d_constant_6)[ax3_1])) * ((int64_t*)fused_nn_conv2d_add_cast_constant_7)[ax3_1]) + ((int64_t*)fused_nn_conv2d_add_cast_multiply_constant_8)[ax3_1]) >> ((int64_t*)fused_nn_conv2d_add_cast_multiply_add_constant_9)[ax3_1]));
+        int32_t requant_0 = __ssat(__1 - 128, 8);
+        ((int16_t*)T_subtract)[cse_var_5] = (((int16_t) requant_0) - (int16_t)-128);
       }
     }
   }

[tvm] 01/02: Another requantization

Posted by gu...@apache.org.

This is an automated email from the ASF dual-hosted git repository.

guberti pushed a commit to branch acc-tests-1
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit f1f90f00b5ec4b3e6bb3618b4efc51a1f70d1731
Author: Gavin Uberti <ga...@gmail.com>
AuthorDate: Sat Jan 21 21:15:11 2023 -0800

    Another requantization
---
 vww/modified.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/vww/modified.c b/vww/modified.c
index 327ea0ab19..b35a22dd4e 100644
--- a/vww/modified.c
+++ b/vww/modified.c
@@ -30855,15 +30855,8 @@ TVM_DLL int32_t tvmgen_default_fused_nn_conv2d_add_cast_multiply_add_right_shift
           ((int16_t*)T_subtract)[cse_var_137] = (((int16_t) requant_0) - (int16_t)-128);
 
           int32_t sum_1 = ((int32_t*)conv)[(cse_var_136 + 1)] + ((int32_t*)bias_8070086)[cse_var_138];
-          int32_t __7 = (
-                            (int32_t)(
-                                (
-                                    (
-                                        (sum_1) *
-                                        ((int64_t*)fused_nn_conv2d_add_cast_constant_2)[cse_var_138]
-                                    ) + ((int64_t*)fused_nn_conv2d_add_cast_multiply_constant_3)[cse_var_138]
-                                ) >> ((int64_t*)fused_nn_conv2d_add_cast_multiply_add_constant_4)[cse_var_138])
-                            );
+          int32_t __7 = (sum_1 * (int64_t) scale_val) >> 32;
+          __7 = (__7 + 1) >> 1;
           int32_t requant_1 = __ssat(__7 - 128, 8);
           ((int16_t*)T_subtract)[(cse_var_137 + 1)] = (((int16_t)requant_1) - (int16_t)-128);
         }