You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by we...@apache.org on 2018/12/14 02:51:18 UTC
[spark] branch master updated: [SPARK-26337][SQL][TEST] Add
benchmark for LongToUnsafeRowMap
This is an automated email from the ASF dual-hosted git repository.
wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 93139af [SPARK-26337][SQL][TEST] Add benchmark for LongToUnsafeRowMap
93139af is described below
commit 93139afb072d14870fb4eab01cb11df28eb0f8dd
Author: Liang-Chi Hsieh <vi...@gmail.com>
AuthorDate: Fri Dec 14 10:50:48 2018 +0800
[SPARK-26337][SQL][TEST] Add benchmark for LongToUnsafeRowMap
## What changes were proposed in this pull request?
Regarding the performance issue of SPARK-26155, it reports the issue on TPC-DS. I think it is better to add a benchmark for `LongToUnsafeRowMap` which is the root cause of performance regression.
It can be easier to show performance difference between different metric implementations in `LongToUnsafeRowMap`.
## How was this patch tested?
Manually run added benchmark.
Closes #23284 from viirya/SPARK-26337.
Authored-by: Liang-Chi Hsieh <vi...@gmail.com>
Signed-off-by: Wenchen Fan <we...@databricks.com>
---
.../HashedRelationMetricsBenchmark-results.txt | 11 +++
.../benchmark/HashedRelationMetricsBenchmark.scala | 84 ++++++++++++++++++++++
2 files changed, 95 insertions(+)
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
new file mode 100644
index 0000000..338244a
--- /dev/null
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
@@ -0,0 +1,11 @@
+================================================================================================
+LongToUnsafeRowMap metrics
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_181-b13 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-7700HQ CPU @ 2.80GHz
+LongToUnsafeRowMap metrics: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+LongToUnsafeRowMap 234 / 315 2.1 467.3 1.0X
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
new file mode 100644
index 0000000..bdf753d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.SparkConf
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.internal.config.MEMORY_OFFHEAP_ENABLED
+import org.apache.spark.memory.{StaticMemoryManager, TaskMemoryManager}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{BoundReference, UnsafeProjection}
+import org.apache.spark.sql.execution.joins.LongToUnsafeRowMap
+import org.apache.spark.sql.types.LongType
+
+/**
+ * Benchmark to measure metrics performance at HashedRelation.
+ * To run this benchmark:
+ * {{{
+ * 1. without sbt: bin/spark-submit --class <this class> <spark sql test jar>
+ * 2. build/sbt "sql/test:runMain <this class>"
+ * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ * Results will be written to "benchmarks/HashedRelationMetricsBenchmark-results.txt".
+ * }}}
+ */
+object HashedRelationMetricsBenchmark extends SqlBasedBenchmark {
+
+ def benchmarkLongToUnsafeRowMapMetrics(numRows: Int): Unit = {
+ runBenchmark("LongToUnsafeRowMap metrics") {
+ val benchmark = new Benchmark("LongToUnsafeRowMap metrics", numRows, output = output)
+ benchmark.addCase("LongToUnsafeRowMap") { iter =>
+ val taskMemoryManager = new TaskMemoryManager(
+ new StaticMemoryManager(
+ new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
+ Long.MaxValue,
+ Long.MaxValue,
+ 1),
+ 0)
+ val unsafeProj = UnsafeProjection.create(Seq(BoundReference(0, LongType, false)))
+
+ val keys = Range.Long(0, numRows, 1)
+ val map = new LongToUnsafeRowMap(taskMemoryManager, 1)
+ keys.foreach { k =>
+ map.append(k, unsafeProj(InternalRow(k)))
+ }
+ map.optimize()
+
+ val threads = (0 to 100).map { _ =>
+ val thread = new Thread {
+ override def run: Unit = {
+ val row = unsafeProj(InternalRow(0L)).copy()
+ keys.foreach { k =>
+ assert(map.getValue(k, row) eq row)
+ assert(row.getLong(0) == k)
+ }
+ }
+ }
+ thread.start()
+ thread
+ }
+ threads.map(_.join())
+ map.free()
+ }
+ benchmark.run()
+ }
+ }
+
+ override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+ benchmarkLongToUnsafeRowMapMetrics(500000)
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org