You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by mw...@apache.org on 2020/06/23 13:12:48 UTC
[beam] branch master updated: [BEAM-10258] Fix Dataflow-based Jenkins jobs (#12048)

This is an automated email from the ASF dual-hosted git repository.

mwalenia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 4c24f23  [BEAM-10258] Fix Dataflow-based Jenkins jobs (#12048)
4c24f23 is described below

commit 4c24f230d57049793218e756cb8bb213e906aaaa
Author: Michal Walenia <32...@users.noreply.github.com>
AuthorDate: Tue Jun 23 15:12:24 2020 +0200

    [BEAM-10258] Fix Dataflow-based Jenkins jobs (#12048)
    
    [BEAM-10258] Add TestDataflowRunner as separate option to CommonTestProperties.
    
    Fix randomized UUID usage in Python PubSubIO tests.
    
    Co-authored-by: Kamil Wasilewski <ka...@polidea.com>
---
 .test-infra/jenkins/CommonTestProperties.groovy             | 10 +++++++---
 .../jenkins/job_PerformanceTests_PubsubIO_Python.groovy     |  6 ++----
 sdks/python/apache_beam/io/gcp/pubsub_io_perf_test.py       | 13 +++++++++----
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/.test-infra/jenkins/CommonTestProperties.groovy b/.test-infra/jenkins/CommonTestProperties.groovy
index f46323a..b78901b 100644
--- a/.test-infra/jenkins/CommonTestProperties.groovy
+++ b/.test-infra/jenkins/CommonTestProperties.groovy
@@ -26,7 +26,8 @@ class CommonTestProperties {
     }
 
     enum Runner {
-        DATAFLOW("TestDataflowRunner"),
+        DATAFLOW("DataflowRunner"),
+        TEST_DATAFLOW("TestDataflowRunner"),
         SPARK("SparkRunner"),
         SPARK_STRUCTURED_STREAMING("SparkStructuredStreamingRunner"),
         FLINK("FlinkRunner"),
@@ -36,18 +37,21 @@ class CommonTestProperties {
         def RUNNER_DEPENDENCY_MAP = [
                 JAVA: [
                         DATAFLOW: ":runners:google-cloud-dataflow-java",
+                        TEST_DATAFLOW: ":runners:google-cloud-dataflow-java",
                         SPARK: ":runners:spark",
                         SPARK_STRUCTURED_STREAMING: ":runners:spark",
                         FLINK: ":runners:flink:1.10",
                         DIRECT: ":runners:direct-java"
                 ],
                 PYTHON: [
-                        DATAFLOW: "TestDataflowRunner",
+                        DATAFLOW: "DataflowRunner",
+                        TEST_DATAFLOW: "TestDataflowRunner",
                         DIRECT: "DirectRunner",
                         PORTABLE: "PortableRunner"
                 ],
                 PYTHON_37: [
-                        DATAFLOW: "TestDataflowRunner",
+                        DATAFLOW: "DataflowRunner",
+                        TEST_DATAFLOW: "TestDataflowRunner",
                         DIRECT: "DirectRunner",
                         PORTABLE: "PortableRunner"
                 ]
diff --git a/.test-infra/jenkins/job_PerformanceTests_PubsubIO_Python.groovy b/.test-infra/jenkins/job_PerformanceTests_PubsubIO_Python.groovy
index f08e53e..e0b5e2f 100644
--- a/.test-infra/jenkins/job_PerformanceTests_PubsubIO_Python.groovy
+++ b/.test-infra/jenkins/job_PerformanceTests_PubsubIO_Python.groovy
@@ -24,14 +24,12 @@ import static java.util.UUID.randomUUID
 
 def now = new Date().format("MMddHHmmss", TimeZone.getTimeZone('UTC'))
 
-def pubsubId = randomUUID()
-def pubsubNamespace = "pubsub_io_performance_${pubsubId}"
 def withDataflowWorkerJar = true
 
 def psio_test = [
         title          : 'PubsubIO Write Performance Test Python 2GB',
         test           : 'apache_beam.io.gcp.pubsub_io_perf_test',
-        runner         : CommonTestProperties.Runner.DATAFLOW,
+        runner         : CommonTestProperties.Runner.TEST_DATAFLOW,
         pipelineOptions: [
                 job_name                  : 'performance-tests-psio-python-2gb' + now,
                 project                   : 'apache-beam-testing',
@@ -49,7 +47,7 @@ def psio_test = [
                         '"value_size": 1024}\'',
                 num_workers               : 5,
                 autoscaling_algorithm     : 'NONE',  // Disable autoscale the worker pool.
-                pubsub_namespace          : pubsubNamespace,
+                pubsub_namespace_prefix   : 'pubsub_io_performance_',
                 wait_until_finish_duration: 1000 * 60 * 10, // in milliseconds
         ]
 ]
diff --git a/sdks/python/apache_beam/io/gcp/pubsub_io_perf_test.py b/sdks/python/apache_beam/io/gcp/pubsub_io_perf_test.py
index a0457c0..24bc9b9 100644
--- a/sdks/python/apache_beam/io/gcp/pubsub_io_perf_test.py
+++ b/sdks/python/apache_beam/io/gcp/pubsub_io_perf_test.py
@@ -30,7 +30,7 @@ python -m apache_beam.io.gcp.pubsub_io_perf_test \
     --temp_location=gs://<BUCKET_NAME>/tmp
     --staging_location=gs://<BUCKET_NAME>/staging
     --wait_until_finish_duration=<TIME_IN_MS>
-    --pubsub_namespace=<PUBSUB_NAMESPACE>
+    --pubsub_namespace_prefix=<PUBSUB_NAMESPACE_PREFIX>
     --publish_to_big_query=<OPTIONAL><true/false>
     --metrics_dataset=<OPTIONAL>
     --metrics_table=<OPTIONAL>
@@ -80,15 +80,17 @@ MATCHER_PULL_TIMEOUT = 60 * 5
 
 class PubsubIOPerfTest(LoadTest):
   def _setup_env(self):
-    if not self.pipeline.get_option('pubsub_namespace'):
-      logging.error('--pubsub_namespace argument is required.')
+    if not self.pipeline.get_option('pubsub_namespace_prefix'):
+      logging.error('--pubsub_namespace_prefix argument is required.')
       sys.exit(1)
     if not self.pipeline.get_option('wait_until_finish_duration'):
       logging.error('--wait_until_finish_duration argument is required.')
       sys.exit(1)
 
     self.num_of_messages = int(self.input_options.get('num_records'))
-    self.pubsub_namespace = self.pipeline.get_option('pubsub_namespace')
+    pubsub_namespace_prefix = self.pipeline.get_option(
+        'pubsub_namespace_prefix')
+    self.pubsub_namespace = pubsub_namespace_prefix + unique_id
 
   def _setup_pubsub(self):
     self.pub_client = pubsub.PublisherClient()
@@ -215,6 +217,9 @@ class PubsubReadPerfTest(PubsubIOPerfTest):
 
 
 if __name__ == '__main__':
+  import uuid
+  unique_id = str(uuid.uuid4())
+
   logging.basicConfig(level=logging.INFO)
   PubsubWritePerfTest().run()
   PubsubReadPerfTest().run()