You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by al...@apache.org on 2018/07/26 01:26:01 UTC

[beam] branch master updated: [BEAM-4859] Enable Python VR tests in streaming in postcommit task (#6053)

This is an automated email from the ASF dual-hosted git repository.

altay pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 31fea5d  [BEAM-4859] Enable Python VR tests in streaming in postcommit task (#6053)
31fea5d is described below

commit 31fea5d86444fe879afba43cf199e7cb7a7a5e73
Author: Mark Liu <ma...@users.noreply.github.com>
AuthorDate: Wed Jul 25 18:25:58 2018 -0700

    [BEAM-4859] Enable Python VR tests in streaming in postcommit task (#6053)
    
    * [BEAM-4859] Enable Python VR tests in streaming in Jenkins postcommit
    * Increase integration test timeout since streaming takes longer time
    * Disable test_read_metrics in streaming due to BEAM-3544
    * Disable test_multi_valued_singleton_side_input in streaming due to BEAM-5025
---
 ...stCommit_Python_ValidatesRunner_Dataflow.groovy |  3 +-
 .../apache_beam/transforms/ptransform_test.py      |  4 +-
 .../apache_beam/transforms/sideinputs_test.py      |  4 +-
 sdks/python/build.gradle                           | 20 ++++--
 sdks/python/scripts/run_postcommit.sh              | 84 ++++++++++++++--------
 5 files changed, 78 insertions(+), 37 deletions(-)

diff --git a/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow.groovy b/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow.groovy
index 6ebda53..cfdd01b 100644
--- a/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow.groovy
+++ b/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow.groovy
@@ -32,7 +32,8 @@ PostcommitJobBuilder.postCommitJob('beam_PostCommit_Py_VR_Dataflow', 'Run Python
   steps {
     gradle {
       rootBuildScriptDir(commonJobProperties.checkoutDir)
-      tasks(':beam-sdks-python:validatesRunnerTests')
+      tasks(':beam-sdks-python:validatesRunnerBatchTests')
+      tasks(':beam-sdks-python:validatesRunnerStreamingTests')
       commonJobProperties.setGradleSwitches(delegate)
     }
   }
diff --git a/sdks/python/apache_beam/transforms/ptransform_test.py b/sdks/python/apache_beam/transforms/ptransform_test.py
index e23fad7..907ee04 100644
--- a/sdks/python/apache_beam/transforms/ptransform_test.py
+++ b/sdks/python/apache_beam/transforms/ptransform_test.py
@@ -180,7 +180,9 @@ class PTransformTest(unittest.TestCase):
     assert_that(r2.m, equal_to([3, 4, 5]), label='r2')
     pipeline.run()
 
-  @attr('ValidatesRunner')
+  # TODO(BEAM-3544): Disable this test in streaming temporarily.
+  # Remove sickbay-streaming tag after it's resolved.
+  @attr('ValidatesRunner', 'sickbay-streaming')
   def test_read_metrics(self):
     from apache_beam.io.utils import CountingSource
 
diff --git a/sdks/python/apache_beam/transforms/sideinputs_test.py b/sdks/python/apache_beam/transforms/sideinputs_test.py
index 1d58834..6b93b8e 100644
--- a/sdks/python/apache_beam/transforms/sideinputs_test.py
+++ b/sdks/python/apache_beam/transforms/sideinputs_test.py
@@ -146,7 +146,9 @@ class SideInputsTest(unittest.TestCase):
     assert_that(result, equal_to([(1, 'empty'), (2, 'empty')]))
     pipeline.run()
 
-  @attr('ValidatesRunner')
+  # TODO(BEAM-5025): Disable this test in streaming temporarily.
+  # Remove sickbay-streaming tag after it's fixed.
+  @attr('ValidatesRunner', 'sickbay-streaming')
   def test_multi_valued_singleton_side_input(self):
     pipeline = self.create_pipeline()
     pcol = pipeline | 'start' >> beam.Create([1, 2])
diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle
index a227af5..0cbd61d 100644
--- a/sdks/python/build.gradle
+++ b/sdks/python/build.gradle
@@ -223,20 +223,30 @@ task portableWordCount(dependsOn: 'installGcpTest') {
   }
 }
 
-task postCommitVRTests(dependsOn: 'installGcpTest') {
+task postCommitITTests(dependsOn: 'installGcpTest') {
   doLast {
     exec {
       executable 'sh'
-      args '-c', ". ${envdir}/bin/activate && ./scripts/run_postcommit.sh IT"
+      args '-c', ". ${envdir}/bin/activate && ./scripts/run_postcommit.sh IT batch"
     }
   }
 }
 
-task validatesRunnerTests(dependsOn: 'installGcpTest') {
+task validatesRunnerBatchTests(dependsOn: 'installGcpTest') {
   doLast {
     exec {
       executable 'sh'
-      args '-c', ". ${envdir}/bin/activate && ./scripts/run_postcommit.sh ValidatesRunner"
+      args '-c', ". ${envdir}/bin/activate && ./scripts/run_postcommit.sh ValidatesRunner batch"
+    }
+  }
+}
+
+task validatesRunnerStreamingTests(dependsOn: 'installGcpTest') {
+  doLast {
+    exec {
+      executable 'sh'
+      // TODO(BEAM-3544,BEAM-5025): Disable tests with sickbay-streaming tag.
+      args '-c', ". ${envdir}/bin/activate && ./scripts/run_postcommit.sh ValidatesRunner,'!sickbay-streaming' streaming"
     }
   }
 }
@@ -254,7 +264,7 @@ task postCommit() {
   dependsOn "preCommit"
   dependsOn "localWordCount"
   dependsOn "hdfsIntegrationTest"
-  dependsOn "postCommitVRTests"
+  dependsOn "postCommitITTests"
 }
 
 task dependencyUpdates(dependsOn: ':dependencyUpdates') {
diff --git a/sdks/python/scripts/run_postcommit.sh b/sdks/python/scripts/run_postcommit.sh
index d6e31aa..c13ef50 100755
--- a/sdks/python/scripts/run_postcommit.sh
+++ b/sdks/python/scripts/run_postcommit.sh
@@ -16,6 +16,8 @@
 #    limitations under the License.
 #
 
+###########################################################################
+#
 # This script will be run by Jenkins as a post commit test. In order to run
 # locally make the following changes:
 #
@@ -23,6 +25,26 @@
 # PROJECT      -> Project name to use for service jobs.
 #
 
+
+###########################################################################
+# Usage check.
+
+if (( $# < 2 )); then
+  printf "Usage: \n$> ./scripts/run_postcommit.sh <test_type> <pipeline_type> [gcp_location] [gcp_project]"
+  printf "\n\ttest_type: [required] ValidatesRunner or IT"
+  printf "\n\tpipeline_type: [required] streaming or batch"
+  printf "\n\tgcp_location: [optional] A gs:// path to stage artifacts and output results"
+  printf "\n\tgcp_project: [optional] A GCP project to run Dataflow pipelines\n"
+  exit 1
+fi
+
+set -e
+set -v
+
+
+###########################################################################
+# Build tarball and set pipeline options.
+
 # Check that the script is running in a known directory.
 if [[ $PWD != *sdks/python* ]]; then
   echo 'Unable to locate Apache Beam Python SDK root directory'
@@ -34,23 +56,10 @@ if [[ "*sdks/python" != $PWD ]]; then
   cd $(pwd | sed 's/sdks\/python.*/sdks\/python/')
 fi
 
-if [ -z "$1" ]; then
-  printf "Usage: \n$> ./scripts/run_postcommit.sh <test_type> [gcp_location] [gcp_project]"
-  printf "\n\ttest_type: ValidatesRunner or IT"
-  printf "\n\tgcp_location: A gs:// path to stage artifacts and output results"
-  printf "\n\tgcp_project: A GCP project to run Dataflow pipelines\n"
-  exit 1
-fi
-
-set -e
-set -v
-
-# Run tests on the service.
-
 # Where to store integration test outputs.
-GCS_LOCATION=${2:-gs://temp-storage-for-end-to-end-tests}
+GCS_LOCATION=${3:-gs://temp-storage-for-end-to-end-tests}
 
-PROJECT=${3:-apache-beam-testing}
+PROJECT=${4:-apache-beam-testing}
 
 # Create a tarball
 python setup.py sdist
@@ -61,21 +70,38 @@ SDK_LOCATION=$(find dist/apache-beam-*.tar.gz)
 echo "pyhamcrest" > postcommit_requirements.txt
 echo "mock" >> postcommit_requirements.txt
 
-# Run integration tests on the Google Cloud Dataflow service
-# and validate that jobs finish successfully.
-echo ">>> RUNNING TEST DATAFLOW RUNNER it tests"
+# Options used to run testing pipeline on Cloud Dataflow Service.
+PIPELINE_OPTIONS=(
+  "--runner=TestDataflowRunner"
+  "--project=$PROJECT"
+  "--staging_location=$GCS_LOCATION/staging-it"
+  "--temp_location=$GCS_LOCATION/temp-it"
+  "--output=$GCS_LOCATION/py-it-cloud/output"
+  "--sdk_location=$SDK_LOCATION"
+  "--requirements_file=postcommit_requirements.txt"
+  "--num_workers=1"
+  "--sleep_secs=20"
+)
+
+# Add streaming flag if specified.
+if [[ "$2" = "streaming" ]]; then
+  echo ">>> Set test pipeline to streaming"
+  PIPELINE_OPTIONS+=("--streaming")
+else
+  echo ">>> Set test pipeline to batch"
+fi
+
+
+###########################################################################
+# Run tests on the Google Cloud Dataflow service and validate that jobs
+# finish successfully.
+
+JOINED_OPTS=$(IFS=" " ; echo "${PIPELINE_OPTIONS[*]}")
+
+echo ">>> RUNNING TEST DATAFLOW RUNNER $1 tests"
 python setup.py nosetests \
   --attr $1 \
   --nocapture \
   --processes=8 \
-  --process-timeout=2000 \
-  --test-pipeline-options=" \
-    --runner=TestDataflowRunner \
-    --project=$PROJECT \
-    --staging_location=$GCS_LOCATION/staging-it \
-    --temp_location=$GCS_LOCATION/temp-it \
-    --output=$GCS_LOCATION/py-it-cloud/output \
-    --sdk_location=$SDK_LOCATION \
-    --requirements_file=postcommit_requirements.txt \
-    --num_workers=1 \
-    --sleep_secs=20"
+  --process-timeout=3000 \
+  --test-pipeline-options="$JOINED_OPTS"