You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ec...@apache.org on 2022/05/10 13:17:06 UTC
[beam] branch master updated: [BEAM-12918] Add PostCommit_Java_Tpcds_Spark job
This is an automated email from the ASF dual-hosted git repository.
echauchot pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new c654733b45e [BEAM-12918] Add PostCommit_Java_Tpcds_Spark job
new 5c21fbccec5 Merge pull request #15679 from aromanenko-dev/BEAM-12918-tpcds-jenkins
c654733b45e is described below
commit c654733b45e44ee88af3498133623aa056de6e73
Author: Alexey Romanenko <ar...@gmail.com>
AuthorDate: Thu Oct 7 18:28:12 2021 +0200
[BEAM-12918] Add PostCommit_Java_Tpcds_Spark job
---
.../jenkins/job_PostCommit_Java_Tpcds_Spark.groovy | 188 +++++++++++++++++++++
.../beam/sdk/tpcds/TpcdsParametersReader.java | 3 +-
2 files changed, 190 insertions(+), 1 deletion(-)
diff --git a/.test-infra/jenkins/job_PostCommit_Java_Tpcds_Spark.groovy b/.test-infra/jenkins/job_PostCommit_Java_Tpcds_Spark.groovy
new file mode 100644
index 00000000000..89d2169cd8b
--- /dev/null
+++ b/.test-infra/jenkins/job_PostCommit_Java_Tpcds_Spark.groovy
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import CommonJobProperties as commonJobProperties
+import NoPhraseTriggeringPostCommitBuilder
+import PhraseTriggeringPostCommitBuilder
+import InfluxDBCredentialsHelper
+
+// This job runs the Tpcds benchmark suite against the Spark runner.
+NoPhraseTriggeringPostCommitBuilder.postCommitJob('beam_PostCommit_Java_Tpcds_Spark',
+ 'Spark Runner Tpcds Tests', this) {
+ description('Runs the Tpcds suite on the Spark runner.')
+
+ // Set common parameters.
+ commonJobProperties.setTopLevelMainJobProperties(delegate, 'master', 240, true, 'beam-perf')
+ InfluxDBCredentialsHelper.useCredentials(delegate)
+
+ // Gradle goals for this job.
+ steps {
+ shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 2 RUNNER ***"')
+ gradle {
+ rootBuildScriptDir(commonJobProperties.checkoutDir)
+ tasks(':sdks:java:testing:tpcds:run')
+ commonJobProperties.setGradleSwitches(delegate)
+ switches('-Ptpcds.runner=":runners:spark:2"' +
+ ' -Ptpcds.args="' +
+ [
+ '--dataSize=1GB',
+ '--sourceType=PARQUET',
+ '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+ '--resultsDirectory=gs://beam-tpcds/results/',
+ '--tpcParallel=1',
+ '--runner=SparkRunner',
+ '--queries=3'
+ ].join(' '))
+ }
+ shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 3 RUNNER ***"')
+ gradle {
+ rootBuildScriptDir(commonJobProperties.checkoutDir)
+ tasks(':sdks:java:testing:tpcds:run')
+ commonJobProperties.setGradleSwitches(delegate)
+ switches('-Ptpcds.runner=":runners:spark:3"' +
+ ' -Ptpcds.args="' +
+ [
+ '--dataSize=1GB',
+ '--sourceType=PARQUET',
+ '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+ '--resultsDirectory=gs://beam-tpcds/results/',
+ '--tpcParallel=1',
+ '--runner=SparkRunner',
+ '--queries=3'
+ ].join(' '))
+ }
+
+ shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 2 STRUCTURED STREAMING RUNNER ***"')
+ gradle {
+ rootBuildScriptDir(commonJobProperties.checkoutDir)
+ tasks(':sdks:java:testing:tpcds:run')
+ commonJobProperties.setGradleSwitches(delegate)
+ switches('-Ptpcds.runner=":runners:spark:2"' +
+ ' -Ptpcds.args="' +
+ [
+ '--dataSize=1GB',
+ '--sourceType=PARQUET',
+ '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+ '--resultsDirectory=gs://beam-tpcds/results/',
+ '--tpcParallel=1',
+ '--runner=SparkRunner',
+ '--queries=3'
+ ].join(' '))
+ }
+ shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 3 STRUCTURED STREAMING RUNNER ***"')
+ gradle {
+ rootBuildScriptDir(commonJobProperties.checkoutDir)
+ tasks(':sdks:java:testing:tpcds:run')
+ commonJobProperties.setGradleSwitches(delegate)
+ switches('-Ptpcds.runner=":runners:spark:3"' +
+ ' -Ptpcds.args="' +
+ [
+ '--dataSize=1GB',
+ '--sourceType=PARQUET',
+ '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+ '--resultsDirectory=gs://beam-tpcds/results/',
+ '--tpcParallel=1',
+ '--runner=SparkRunner',
+ '--queries=3'
+ ].join(' '))
+ }
+ }
+ }
+
+PhraseTriggeringPostCommitBuilder.postCommitJob('beam_PostCommit_Java_Tpcds_Spark',
+ 'Run Spark Runner Tpcds Tests', 'Spark Runner Tpcds Tests', this) {
+
+ description('Runs the Tpcds suite on the Spark runner against a Pull Request, on demand.')
+
+ // Set common parameters.
+ commonJobProperties.setTopLevelMainJobProperties(delegate, 'master', 240, true, 'beam-perf')
+ InfluxDBCredentialsHelper.useCredentials(delegate)
+
+ // Gradle goals for this job.
+ steps {
+ shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 2 RUNNER ***"')
+ gradle {
+ rootBuildScriptDir(commonJobProperties.checkoutDir)
+ tasks(':sdks:java:testing:tpcds:run')
+ commonJobProperties.setGradleSwitches(delegate)
+ switches('-Ptpcds.runner=":runners:spark:2"' +
+ ' -Ptpcds.args="' +
+ [
+ '--dataSize=1GB',
+ '--sourceType=PARQUET',
+ '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+ '--resultsDirectory=gs://beam-tpcds/results/',
+ '--tpcParallel=1',
+ '--runner=SparkRunner',
+ '--queries=3'
+ ].join(' '))
+ }
+ shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 3 RUNNER ***"')
+ gradle {
+ rootBuildScriptDir(commonJobProperties.checkoutDir)
+ tasks(':sdks:java:testing:tpcds:run')
+ commonJobProperties.setGradleSwitches(delegate)
+ switches('-Ptpcds.runner=":runners:spark:3"' +
+ ' -Ptpcds.args="' +
+ [
+ '--dataSize=1GB',
+ '--sourceType=PARQUET',
+ '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+ '--resultsDirectory=gs://beam-tpcds/results/',
+ '--tpcParallel=1',
+ '--runner=SparkRunner',
+ '--queries=3'
+ ].join(' '))
+ }
+
+ shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 2 STRUCTURED STREAMING RUNNER ***"')
+ gradle {
+ rootBuildScriptDir(commonJobProperties.checkoutDir)
+ tasks(':sdks:java:testing:tpcds:run')
+ commonJobProperties.setGradleSwitches(delegate)
+ switches('-Ptpcds.runner=":runners:spark:2"' +
+ ' -Ptpcds.args="' +
+ [
+ '--dataSize=1GB',
+ '--sourceType=PARQUET',
+ '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+ '--resultsDirectory=gs://beam-tpcds/results/',
+ '--tpcParallel=1',
+ '--runner=SparkRunner',
+ '--queries=3'
+ ].join(' '))
+ }
+ shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 3 STRUCTURED STREAMING RUNNER ***"')
+ gradle {
+ rootBuildScriptDir(commonJobProperties.checkoutDir)
+ tasks(':sdks:java:testing:tpcds:run')
+ commonJobProperties.setGradleSwitches(delegate)
+ switches('-Ptpcds.runner=":runners:spark:3"' +
+ ' -Ptpcds.args="' +
+ [
+ '--dataSize=1GB',
+ '--sourceType=PARQUET',
+ '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+ '--resultsDirectory=gs://beam-tpcds/results/',
+ '--tpcParallel=1',
+ '--runner=SparkRunner',
+ '--queries=3'
+ ].join(' '))
+ }
+ }
+ }
diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java
index 4a2ed544c96..09f6376344f 100644
--- a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java
+++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java
@@ -30,7 +30,8 @@ public class TpcdsParametersReader {
/** The data sizes that have been supported. */
private static final Set<String> supportedDataSizes =
- Stream.of("1G", "10G", "100G").collect(Collectors.toCollection(HashSet::new));
+ Stream.of("1G", "1GB", "10G", "10GB", "100G", "100GB")
+ .collect(Collectors.toCollection(HashSet::new));
private static final String QUERY_PREFIX = "query";