You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/08/07 16:04:33 UTC
[orc] branch branch-1.7 updated: ORC-913: Support
data/format/compress options in Spark benchmark (#825)
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.7 by this push:
new f15062a ORC-913: Support data/format/compress options in Spark benchmark (#825)
f15062a is described below
commit f15062acf360c39c16c3f36c068d72a65a2ac43e
Author: William Hyun <wi...@apache.org>
AuthorDate: Sat Aug 7 09:03:40 2021 -0700
ORC-913: Support data/format/compress options in Spark benchmark (#825)
### What changes were proposed in this pull request?
This PR aims to support data/format/compress options in Spark benchmark.
### Why are the changes needed?
This will make Spark benchmark more controllable.
This extension does not appear in -h option, however, it will work.
### How was this patch tested?
Manual
```
java -jar spark/target/orc-benchmarks-spark-1.8.0-SNAPSHOT.jar spark data -d sales -c snappy -f orc
```
(cherry picked from commit 4f089c29a4d7fa09f711d5979628f3ab6459117c)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../org/apache/orc/bench/core/convert/GenerateVariants.java | 2 +-
.../src/java/org/apache/orc/bench/spark/SparkBenchmark.java | 12 +++++++++++-
2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java b/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
index f4c9bc6..d203bfd 100644
--- a/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
+++ b/java/bench/core/src/java/org/apache/orc/bench/core/convert/GenerateVariants.java
@@ -217,7 +217,7 @@ public class GenerateVariants implements OrcBenchmark {
}
}
- static CommandLine parseCommandLine(String[] args) throws ParseException {
+ public static CommandLine parseCommandLine(String[] args) throws ParseException {
Options options = new Options()
.addOption("h", "help", false, "Provide help")
.addOption("c", "compress", true, "List of compression")
diff --git a/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java b/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java
index f01c140..f9c817d 100644
--- a/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java
+++ b/java/bench/spark/src/java/org/apache/orc/bench/spark/SparkBenchmark.java
@@ -19,6 +19,7 @@
package org.apache.orc.bench.spark;
import com.google.auto.service.AutoService;
+import org.apache.commons.cli.CommandLine;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -27,6 +28,7 @@ import org.apache.orc.TypeDescription;
import org.apache.orc.bench.core.OrcBenchmark;
import org.apache.orc.bench.core.IOCounters;
import org.apache.orc.bench.core.Utilities;
+import org.apache.orc.bench.core.convert.GenerateVariants;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.avro.AvroFileFormat;
import org.apache.spark.sql.catalyst.InternalRow;
@@ -52,6 +54,7 @@ import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.infra.Blackhole;
import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
import scala.Function1;
import java.io.IOException;
@@ -87,7 +90,14 @@ public class SparkBenchmark implements OrcBenchmark {
@Override
public void run(String[] args) throws Exception {
- new Runner(Utilities.parseOptions(args, this.getClass())).run();
+ CommandLine cmds = GenerateVariants.parseCommandLine(args);
+ new Runner(new OptionsBuilder()
+ .parent(Utilities.parseOptions(args, this.getClass()))
+ .param("compression", cmds.getOptionValue("compress", "none,gz,snappy").split(","))
+ .param("dataset", cmds.getOptionValue("data", "taxi,sales,github").split(","))
+ .param("format", cmds.getOptionValue("format", "orc,parquet,json").split(","))
+ .build()
+ ).run();
}
@State(Scope.Thread)