You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ki...@apache.org on 2018/09/21 06:06:18 UTC
spark git commit: [SPARK-25487][SQL][TEST] Refactor
PrimitiveArrayBenchmark
Repository: spark
Updated Branches:
refs/heads/master 596af211a -> 1f4ca6f5c
[SPARK-25487][SQL][TEST] Refactor PrimitiveArrayBenchmark
## What changes were proposed in this pull request?
Refactor PrimitiveArrayBenchmark to use main method and print the output as a separate file.
Run blow command to generate benchmark results:
```
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.PrimitiveArrayBenchmark"
```
## How was this patch tested?
Manual tests.
Closes #22497 from seancxmao/SPARK-25487.
Authored-by: seancxmao <se...@gmail.com>
Signed-off-by: Kazuaki Ishizaki <is...@jp.ibm.com>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1f4ca6f5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1f4ca6f5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1f4ca6f5
Branch: refs/heads/master
Commit: 1f4ca6f5c52560585ea977bddc69243a29bf67f2
Parents: 596af21
Author: seancxmao <se...@gmail.com>
Authored: Fri Sep 21 15:04:47 2018 +0900
Committer: Kazuaki Ishizaki <is...@jp.ibm.com>
Committed: Fri Sep 21 15:04:47 2018 +0900
----------------------------------------------------------------------
.../PrimitiveArrayBenchmark-results.txt | 13 ++++++
.../benchmark/PrimitiveArrayBenchmark.scala | 47 +++++++++-----------
2 files changed, 35 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/1f4ca6f5/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
----------------------------------------------------------------------
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
new file mode 100644
index 0000000..b06b5c0
--- /dev/null
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
@@ -0,0 +1,13 @@
+================================================================================================
+Write primitive arrays in dataset
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz
+
+Write an array in Dataset: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
+------------------------------------------------------------------------------------------------
+Int 437 / 529 19.2 52.1 1.0X
+Double 638 / 670 13.1 76.1 0.7X
+
+
http://git-wip-us.apache.org/repos/asf/spark/blob/1f4ca6f5/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
index e7c8f27..7f467d1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
@@ -17,21 +17,30 @@
package org.apache.spark.sql.execution.benchmark
-import scala.concurrent.duration._
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.util.Benchmark
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.util.{Benchmark, BenchmarkBase => FileBenchmarkBase}
/**
- * Benchmark [[PrimitiveArray]] for DataFrame and Dataset program using primitive array
- * To run this:
- * 1. replace ignore(...) with test(...)
- * 2. build/sbt "sql/test-only *benchmark.PrimitiveArrayBenchmark"
- *
- * Benchmarks in this file are skipped in normal builds.
+ * Benchmark primitive arrays via DataFrame and Dataset program using primitive arrays
+ * To run this benchmark:
+ * 1. without sbt: bin/spark-submit --class <this class> <spark sql test jar>
+ * 2. build/sbt "sql/test:runMain <this class>"
+ * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ * Results will be written to "benchmarks/PrimitiveArrayBenchmark-results.txt".
*/
-class PrimitiveArrayBenchmark extends BenchmarkBase {
+object PrimitiveArrayBenchmark extends FileBenchmarkBase {
+ lazy val sparkSession = SparkSession.builder
+ .master("local[1]")
+ .appName("microbenchmark")
+ .config("spark.sql.shuffle.partitions", 1)
+ .config("spark.sql.autoBroadcastJoinThreshold", 1)
+ .getOrCreate()
+
+ override def benchmark(): Unit = {
+ runBenchmark("Write primitive arrays in dataset") {
+ writeDatasetArray(4)
+ }
+ }
def writeDatasetArray(iters: Int): Unit = {
import sparkSession.implicits._
@@ -62,21 +71,9 @@ class PrimitiveArrayBenchmark extends BenchmarkBase {
}
}
- val benchmark = new Benchmark("Write an array in Dataset", count * iters)
+ val benchmark = new Benchmark("Write an array in Dataset", count * iters, output = output)
benchmark.addCase("Int ")(intArray)
benchmark.addCase("Double")(doubleArray)
benchmark.run
- /*
- OpenJDK 64-Bit Server VM 1.8.0_91-b14 on Linux 4.4.11-200.fc22.x86_64
- Intel Xeon E3-12xx v2 (Ivy Bridge)
- Write an array in Dataset: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative
- ------------------------------------------------------------------------------------------------
- Int 352 / 401 23.8 42.0 1.0X
- Double 821 / 885 10.2 97.9 0.4X
- */
- }
-
- ignore("Write an array in Dataset") {
- writeDatasetArray(4)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org