You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/12/14 02:51:18 UTC
[spark] branch master updated: [SPARK-26337][SQL][TEST] Add benchmark for LongToUnsafeRowMap

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 93139af  [SPARK-26337][SQL][TEST] Add benchmark for LongToUnsafeRowMap
93139af is described below

commit 93139afb072d14870fb4eab01cb11df28eb0f8dd
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Fri Dec 14 10:50:48 2018 +0800

    [SPARK-26337][SQL][TEST] Add benchmark for LongToUnsafeRowMap
    
    ## What changes were proposed in this pull request?
    
    Regarding the performance issue of SPARK-26155, it reports the issue on TPC-DS. I think it is better to add a benchmark for `LongToUnsafeRowMap` which is the root cause of performance regression.
    
    It can be easier to show performance difference between different metric implementations in `LongToUnsafeRowMap`.
    
    ## How was this patch tested?
    
    Manually run added benchmark.
    
    Closes #23284 from viirya/SPARK-26337.
    
    Authored-by: Liang-Chi Hsieh <vi...@gmail.com>
    Signed-off-by: Wenchen Fan <we...@databricks.com>
---
 .../HashedRelationMetricsBenchmark-results.txt     | 11 +++
 .../benchmark/HashedRelationMetricsBenchmark.scala | 84 ++++++++++++++++++++++
 2 files changed, 95 insertions(+)

diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
new file mode 100644
index 0000000..338244a
--- /dev/null
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
@@ -0,0 +1,11 @@
+================================================================================================
+LongToUnsafeRowMap metrics
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_181-b13 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz
+LongToUnsafeRowMap metrics:              Best/Avg Time(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+LongToUnsafeRowMap                             234 /  315          2.1         467.3       1.0X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
new file mode 100644
index 0000000..bdf753d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.SparkConf
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.internal.config.MEMORY_OFFHEAP_ENABLED
+import org.apache.spark.memory.{StaticMemoryManager, TaskMemoryManager}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{BoundReference, UnsafeProjection}
+import org.apache.spark.sql.execution.joins.LongToUnsafeRowMap
+import org.apache.spark.sql.types.LongType
+
+/**
+ * Benchmark to measure metrics performance at HashedRelation.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt: bin/spark-submit --class <this class> <spark sql test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/HashedRelationMetricsBenchmark-results.txt".
+ * }}}
+ */
+object HashedRelationMetricsBenchmark extends SqlBasedBenchmark {
+
+  def benchmarkLongToUnsafeRowMapMetrics(numRows: Int): Unit = {
+    runBenchmark("LongToUnsafeRowMap metrics") {
+      val benchmark = new Benchmark("LongToUnsafeRowMap metrics", numRows, output = output)
+      benchmark.addCase("LongToUnsafeRowMap") { iter =>
+        val taskMemoryManager = new TaskMemoryManager(
+          new StaticMemoryManager(
+            new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
+            Long.MaxValue,
+            Long.MaxValue,
+            1),
+          0)
+        val unsafeProj = UnsafeProjection.create(Seq(BoundReference(0, LongType, false)))
+
+        val keys = Range.Long(0, numRows, 1)
+        val map = new LongToUnsafeRowMap(taskMemoryManager, 1)
+        keys.foreach { k =>
+          map.append(k, unsafeProj(InternalRow(k)))
+        }
+        map.optimize()
+
+        val threads = (0 to 100).map { _ =>
+          val thread = new Thread {
+            override def run: Unit = {
+              val row = unsafeProj(InternalRow(0L)).copy()
+              keys.foreach { k =>
+                assert(map.getValue(k, row) eq row)
+                assert(row.getLong(0) == k)
+              }
+            }
+          }
+          thread.start()
+          thread
+        }
+        threads.map(_.join())
+        map.free()
+      }
+      benchmark.run()
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    benchmarkLongToUnsafeRowMapMetrics(500000)
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org