You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kyuubi.apache.org by ul...@apache.org on 2021/10/18 06:29:52 UTC

[incubator-kyuubi] branch master updated: [KYUUBI #1245] Improve paddingTo8Byte perf

This is an automated email from the ASF dual-hosted git repository.

ulyssesyou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-kyuubi.git


The following commit(s) were added to refs/heads/master by this push:
     new 7626973  [KYUUBI #1245] Improve paddingTo8Byte perf
7626973 is described below

commit 7626973e15646c13da1d81a360e31921c344c916
Author: ulysses-you <ul...@gmail.com>
AuthorDate: Mon Oct 18 14:29:39 2021 +0800

    [KYUUBI #1245] Improve paddingTo8Byte perf
    
    <!--
    Thanks for sending a pull request!
    
    Here are some tips for you:
      1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/contributions.html
      2. If the PR is related to an issue in https://github.com/apache/incubator-kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
      3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'.
    -->
    
    ### _Why are the changes needed?_
    <!--
    Please clarify why the changes are needed. For instance,
      1. If you add a feature, you can talk about the use case of it.
      2. If you fix a bug, you can clarify why it is a bug.
    -->
    Add a new benchmark for `paddingTo8Byte` method
    
    Before this PR:
    ```
    Java HotSpot(TM) 64-Bit Server VM 1.8.0_271-b09 on Mac OS X 10.16
    Intel(R) Core(TM) i7-4770HQ CPU  2.20GHz
    10000000 iterations paddingTo8Byte benchmark:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
    ----------------------------------------------------------------------------------------------------------------------------
    2 length benchmark                                     2112           2180          78          4.7         211.2       1.0X
    16 length benchmark                                     454            459           6         22.0          45.4       4.6X
    ```
    
    After this PR:
    ```
    Java HotSpot(TM) 64-Bit Server VM 1.8.0_271-b09 on Mac OS X 10.16
    Intel(R) Core(TM) i7-4770HQ CPU  2.20GHz
    10000000 iterations paddingTo8Byte benchmark:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
    ----------------------------------------------------------------------------------------------------------------------------
    2 length benchmark                                      167            170           3         59.9          16.7       1.0X
    16 length benchmark                                     162            164           3         61.7          16.2       1.0X
    ```
    
    ### _How was this patch tested?_
    - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible
    
    - [ ] Add screenshots for manual tests if appropriate
    
    - [x] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request
    
    Closes #1245 from ulysses-you/improve-zorder-perf.
    
    Closes #1245
    
    487c6900 [ulysses-you] improve paddingTo8Byte perf
    
    Authored-by: ulysses-you <ul...@gmail.com>
    Signed-off-by: ulysses-you <ul...@apache.org>
---
 .../benchmarks/ZorderCoreBenchmark-results.txt     |  7 ++
 .../kyuubi/sql/zorder/ZorderBytesUtils.scala       | 32 ++++------
 .../org/apache/spark/sql/ZorderCoreBenchmark.scala | 74 ++++++++++++++--------
 3 files changed, 67 insertions(+), 46 deletions(-)

diff --git a/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt b/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt
index 29cb206..642ff20 100644
--- a/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt
+++ b/dev/kyuubi-extension-spark-common/benchmarks/ZorderCoreBenchmark-results.txt
@@ -9,3 +9,10 @@ Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
 3 long columns benchmark                           3029           3133         111          0.3        3028.7       0.5X
 4 long columns benchmark                           3789           3848          89          0.3        3789.0       0.4X
 
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_271-b09 on Mac OS X 10.16
+Intel(R) Core(TM) i7-4770HQ CPU @ 2.20GHz
+10000000 iterations paddingTo8Byte benchmark:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+----------------------------------------------------------------------------------------------------------------------------
+2 length benchmark                                      167            170           3         59.9          16.7       1.0X
+16 length benchmark                                     162            164           3         61.7          16.2       1.0X
+
diff --git a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala
index fd27036..18151b2 100644
--- a/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala
+++ b/dev/kyuubi-extension-spark-common/src/main/scala/org/apache/kyuubi/sql/zorder/ZorderBytesUtils.scala
@@ -152,28 +152,18 @@ object ZorderBytesUtils {
   }
 
   def paddingTo8Byte(a: Array[Byte]): Array[Byte] = {
-    if (a.length == 8) {
-      return a
-    }
-    if (a.length > 8) {
-      val result = new Array[Byte](8);
-      a.copyToArray(result)
-      return result
+    val len = a.length
+    if (len == 8) {
+      a
+    } else if (len > 8) {
+      val result = new Array[Byte](8)
+      System.arraycopy(a, 0, result, 0, 8)
+      result
+    } else {
+      val result = new Array[Byte](8)
+      System.arraycopy(a, 0, result, 8 - len, len)
+      result
     }
-    val paddingSize = 8 - a.length;
-    val emptyArray = Array.ofDim[Byte](paddingSize)
-    arrayConcat(emptyArray, a)
-  }
-
-  def arrayConcat(bytes: Array[Byte]*): Array[Byte] = {
-    val length = bytes.foldLeft(0)(_ + _.length)
-    val result = new Array[Byte](length)
-    var pos = 0
-    bytes.foreach(arr => {
-      arr.copyToArray(result, pos)
-      pos += arr.length
-    })
-    result
   }
 
   def defaultValue(dataType: DataType): Array[Byte] = toByte {
diff --git a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala
index 66d6eee..78c9c91 100644
--- a/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala
+++ b/dev/kyuubi-extension-spark-common/src/test/scala/org/apache/spark/sql/ZorderCoreBenchmark.scala
@@ -35,6 +35,7 @@ import org.apache.kyuubi.sql.zorder.ZorderBytesUtils
  * }}}
  */
 class ZorderCoreBenchmark extends KyuubiSparkSQLExtensionTest with KyuubiBenchmarkBase {
+  private val runBenchmark = sys.env.contains("RUN_BENCHMARK")
   private val numRows = 1 * 1000 * 1000
 
   private def randomIntByteArray(numColumns: Int): Seq[Array[Array[Byte]]] = {
@@ -54,38 +55,61 @@ class ZorderCoreBenchmark extends KyuubiSparkSQLExtensionTest with KyuubiBenchma
     }
   }
 
-  test("zorder core benchmark") {
-    if (sys.env.contains("RUN_BENCHMARK")) {
-      withHeader {
-        val benchmark = new Benchmark(
-          s"$numRows rows zorder core benchmark", numRows, output = output)
-        benchmark.addCase("2 int columns benchmark", 3) { _ =>
-          randomIntByteArray(2).foreach(ZorderBytesUtils.interleaveMultiByteArray)
-        }
+  private def interleaveMultiByteArrayBenchmark(): Unit = {
+    val benchmark = new Benchmark(
+      s"$numRows rows zorder core benchmark", numRows, output = output)
+    benchmark.addCase("2 int columns benchmark", 3) { _ =>
+      randomIntByteArray(2).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+    }
+
+    benchmark.addCase("3 int columns benchmark", 3) { _ =>
+      randomIntByteArray(3).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+    }
+
+    benchmark.addCase("4 int columns benchmark", 3) { _ =>
+      randomIntByteArray(4).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+    }
+
+
+    benchmark.addCase("2 long columns benchmark", 3) { _ =>
+      randomLongByteArray(2).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+    }
 
-        benchmark.addCase("3 int columns benchmark", 3) { _ =>
-          randomIntByteArray(3).foreach(ZorderBytesUtils.interleaveMultiByteArray)
-        }
+    benchmark.addCase("3 long columns benchmark", 3) { _ =>
+      randomLongByteArray(3).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+    }
 
-        benchmark.addCase("4 int columns benchmark", 3) { _ =>
-          randomIntByteArray(4).foreach(ZorderBytesUtils.interleaveMultiByteArray)
-        }
+    benchmark.addCase("4 long columns benchmark", 3) { _ =>
+      randomLongByteArray(4).foreach(ZorderBytesUtils.interleaveMultiByteArray)
+    }
 
+    benchmark.run()
+  }
 
-        benchmark.addCase("2 long columns benchmark", 3) { _ =>
-          randomLongByteArray(2).foreach(ZorderBytesUtils.interleaveMultiByteArray)
-        }
+  private def paddingTo8ByteBenchmark() {
+    val iterations = 10 * 1000 * 1000
 
-        benchmark.addCase("3 long columns benchmark", 3) { _ =>
-          randomLongByteArray(3).foreach(ZorderBytesUtils.interleaveMultiByteArray)
-        }
+    val b2 = Array('a'.toByte, 'b'.toByte)
+    val benchmark = new Benchmark(
+      s"$iterations iterations paddingTo8Byte benchmark", iterations, output = output)
+    benchmark.addCase("2 length benchmark", 3) { _ =>
+      (1 to iterations).foreach(_ => ZorderBytesUtils.paddingTo8Byte(b2))
+    }
 
-        benchmark.addCase("4 long columns benchmark", 3) { _ =>
-          randomLongByteArray(4).foreach(ZorderBytesUtils.interleaveMultiByteArray)
-        }
+    val b16 = Array.tabulate(16) { i => i.toByte }
+    benchmark.addCase("16 length benchmark", 3) { _ =>
+      (1 to iterations).foreach(_ => ZorderBytesUtils.paddingTo8Byte(b16))
+    }
+
+    benchmark.run()
+  }
+
+  test("zorder core benchmark") {
+    assume(runBenchmark)
 
-        benchmark.run()
-      }
+    withHeader {
+      interleaveMultiByteArrayBenchmark()
+      paddingTo8ByteBenchmark()
     }
   }