You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ec...@apache.org on 2022/05/10 13:17:06 UTC

[beam] branch master updated: [BEAM-12918] Add PostCommit_Java_Tpcds_Spark job

This is an automated email from the ASF dual-hosted git repository.

echauchot pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new c654733b45e [BEAM-12918] Add PostCommit_Java_Tpcds_Spark job
     new 5c21fbccec5 Merge pull request #15679 from aromanenko-dev/BEAM-12918-tpcds-jenkins
c654733b45e is described below

commit c654733b45e44ee88af3498133623aa056de6e73
Author: Alexey Romanenko <ar...@gmail.com>
AuthorDate: Thu Oct 7 18:28:12 2021 +0200

    [BEAM-12918] Add PostCommit_Java_Tpcds_Spark job
---
 .../jenkins/job_PostCommit_Java_Tpcds_Spark.groovy | 188 +++++++++++++++++++++
 .../beam/sdk/tpcds/TpcdsParametersReader.java      |   3 +-
 2 files changed, 190 insertions(+), 1 deletion(-)

diff --git a/.test-infra/jenkins/job_PostCommit_Java_Tpcds_Spark.groovy b/.test-infra/jenkins/job_PostCommit_Java_Tpcds_Spark.groovy
new file mode 100644
index 00000000000..89d2169cd8b
--- /dev/null
+++ b/.test-infra/jenkins/job_PostCommit_Java_Tpcds_Spark.groovy
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import CommonJobProperties as commonJobProperties
+import NoPhraseTriggeringPostCommitBuilder
+import PhraseTriggeringPostCommitBuilder
+import InfluxDBCredentialsHelper
+
+// This job runs the Tpcds benchmark suite against the Spark runner.
+NoPhraseTriggeringPostCommitBuilder.postCommitJob('beam_PostCommit_Java_Tpcds_Spark',
+    'Spark Runner Tpcds Tests', this) {
+      description('Runs the Tpcds suite on the Spark runner.')
+
+      // Set common parameters.
+      commonJobProperties.setTopLevelMainJobProperties(delegate, 'master', 240, true, 'beam-perf')
+      InfluxDBCredentialsHelper.useCredentials(delegate)
+
+      // Gradle goals for this job.
+      steps {
+        shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 2 RUNNER ***"')
+        gradle {
+          rootBuildScriptDir(commonJobProperties.checkoutDir)
+          tasks(':sdks:java:testing:tpcds:run')
+          commonJobProperties.setGradleSwitches(delegate)
+          switches('-Ptpcds.runner=":runners:spark:2"' +
+              ' -Ptpcds.args="' +
+              [
+                '--dataSize=1GB',
+                '--sourceType=PARQUET',
+                '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+                '--resultsDirectory=gs://beam-tpcds/results/',
+                '--tpcParallel=1',
+                '--runner=SparkRunner',
+                '--queries=3'
+              ].join(' '))
+        }
+        shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 3 RUNNER ***"')
+        gradle {
+          rootBuildScriptDir(commonJobProperties.checkoutDir)
+          tasks(':sdks:java:testing:tpcds:run')
+          commonJobProperties.setGradleSwitches(delegate)
+          switches('-Ptpcds.runner=":runners:spark:3"' +
+              ' -Ptpcds.args="' +
+              [
+                '--dataSize=1GB',
+                '--sourceType=PARQUET',
+                '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+                '--resultsDirectory=gs://beam-tpcds/results/',
+                '--tpcParallel=1',
+                '--runner=SparkRunner',
+                '--queries=3'
+              ].join(' '))
+        }
+
+        shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 2 STRUCTURED STREAMING RUNNER ***"')
+        gradle {
+          rootBuildScriptDir(commonJobProperties.checkoutDir)
+          tasks(':sdks:java:testing:tpcds:run')
+          commonJobProperties.setGradleSwitches(delegate)
+          switches('-Ptpcds.runner=":runners:spark:2"' +
+              ' -Ptpcds.args="' +
+              [
+                '--dataSize=1GB',
+                '--sourceType=PARQUET',
+                '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+                '--resultsDirectory=gs://beam-tpcds/results/',
+                '--tpcParallel=1',
+                '--runner=SparkRunner',
+                '--queries=3'
+              ].join(' '))
+        }
+        shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 3 STRUCTURED STREAMING RUNNER ***"')
+        gradle {
+          rootBuildScriptDir(commonJobProperties.checkoutDir)
+          tasks(':sdks:java:testing:tpcds:run')
+          commonJobProperties.setGradleSwitches(delegate)
+          switches('-Ptpcds.runner=":runners:spark:3"' +
+              ' -Ptpcds.args="' +
+              [
+                '--dataSize=1GB',
+                '--sourceType=PARQUET',
+                '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+                '--resultsDirectory=gs://beam-tpcds/results/',
+                '--tpcParallel=1',
+                '--runner=SparkRunner',
+                '--queries=3'
+              ].join(' '))
+        }
+      }
+    }
+
+PhraseTriggeringPostCommitBuilder.postCommitJob('beam_PostCommit_Java_Tpcds_Spark',
+    'Run Spark Runner Tpcds Tests', 'Spark Runner Tpcds Tests', this) {
+
+      description('Runs the Tpcds suite on the Spark runner against a Pull Request, on demand.')
+
+      // Set common parameters.
+      commonJobProperties.setTopLevelMainJobProperties(delegate, 'master', 240, true, 'beam-perf')
+      InfluxDBCredentialsHelper.useCredentials(delegate)
+
+      // Gradle goals for this job.
+      steps {
+        shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 2 RUNNER ***"')
+        gradle {
+          rootBuildScriptDir(commonJobProperties.checkoutDir)
+          tasks(':sdks:java:testing:tpcds:run')
+          commonJobProperties.setGradleSwitches(delegate)
+          switches('-Ptpcds.runner=":runners:spark:2"' +
+              ' -Ptpcds.args="' +
+              [
+                '--dataSize=1GB',
+                '--sourceType=PARQUET',
+                '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+                '--resultsDirectory=gs://beam-tpcds/results/',
+                '--tpcParallel=1',
+                '--runner=SparkRunner',
+                '--queries=3'
+              ].join(' '))
+        }
+        shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 3 RUNNER ***"')
+        gradle {
+          rootBuildScriptDir(commonJobProperties.checkoutDir)
+          tasks(':sdks:java:testing:tpcds:run')
+          commonJobProperties.setGradleSwitches(delegate)
+          switches('-Ptpcds.runner=":runners:spark:3"' +
+              ' -Ptpcds.args="' +
+              [
+                '--dataSize=1GB',
+                '--sourceType=PARQUET',
+                '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+                '--resultsDirectory=gs://beam-tpcds/results/',
+                '--tpcParallel=1',
+                '--runner=SparkRunner',
+                '--queries=3'
+              ].join(' '))
+        }
+
+        shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 2 STRUCTURED STREAMING RUNNER ***"')
+        gradle {
+          rootBuildScriptDir(commonJobProperties.checkoutDir)
+          tasks(':sdks:java:testing:tpcds:run')
+          commonJobProperties.setGradleSwitches(delegate)
+          switches('-Ptpcds.runner=":runners:spark:2"' +
+              ' -Ptpcds.args="' +
+              [
+                '--dataSize=1GB',
+                '--sourceType=PARQUET',
+                '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+                '--resultsDirectory=gs://beam-tpcds/results/',
+                '--tpcParallel=1',
+                '--runner=SparkRunner',
+                '--queries=3'
+              ].join(' '))
+        }
+        shell('echo "*** RUN TPCDS IN BATCH MODE USING SPARK 3 STRUCTURED STREAMING RUNNER ***"')
+        gradle {
+          rootBuildScriptDir(commonJobProperties.checkoutDir)
+          tasks(':sdks:java:testing:tpcds:run')
+          commonJobProperties.setGradleSwitches(delegate)
+          switches('-Ptpcds.runner=":runners:spark:3"' +
+              ' -Ptpcds.args="' +
+              [
+                '--dataSize=1GB',
+                '--sourceType=PARQUET',
+                '--dataDirectory=gs://beam-tpcds/datasets/parquet/partitioned',
+                '--resultsDirectory=gs://beam-tpcds/results/',
+                '--tpcParallel=1',
+                '--runner=SparkRunner',
+                '--queries=3'
+              ].join(' '))
+        }
+      }
+    }
diff --git a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java
index 4a2ed544c96..09f6376344f 100644
--- a/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java
+++ b/sdks/java/testing/tpcds/src/main/java/org/apache/beam/sdk/tpcds/TpcdsParametersReader.java
@@ -30,7 +30,8 @@ public class TpcdsParametersReader {
 
   /** The data sizes that have been supported. */
   private static final Set<String> supportedDataSizes =
-      Stream.of("1G", "10G", "100G").collect(Collectors.toCollection(HashSet::new));
+      Stream.of("1G", "1GB", "10G", "10GB", "100G", "100GB")
+          .collect(Collectors.toCollection(HashSet::new));
 
   private static final String QUERY_PREFIX = "query";