You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by al...@apache.org on 2018/07/26 01:26:01 UTC
[beam] branch master updated: [BEAM-4859] Enable Python VR tests in
streaming in postcommit task (#6053)
This is an automated email from the ASF dual-hosted git repository.
altay pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 31fea5d [BEAM-4859] Enable Python VR tests in streaming in postcommit task (#6053)
31fea5d is described below
commit 31fea5d86444fe879afba43cf199e7cb7a7a5e73
Author: Mark Liu <ma...@users.noreply.github.com>
AuthorDate: Wed Jul 25 18:25:58 2018 -0700
[BEAM-4859] Enable Python VR tests in streaming in postcommit task (#6053)
* [BEAM-4859] Enable Python VR tests in streaming in Jenkins postcommit
* Increase integration test timeout since streaming takes longer time
* Disable test_read_metrics in streaming due to BEAM-3544
* Disable test_multi_valued_singleton_side_input in streaming due to BEAM-5025
---
...stCommit_Python_ValidatesRunner_Dataflow.groovy | 3 +-
.../apache_beam/transforms/ptransform_test.py | 4 +-
.../apache_beam/transforms/sideinputs_test.py | 4 +-
sdks/python/build.gradle | 20 ++++--
sdks/python/scripts/run_postcommit.sh | 84 ++++++++++++++--------
5 files changed, 78 insertions(+), 37 deletions(-)
diff --git a/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow.groovy b/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow.groovy
index 6ebda53..cfdd01b 100644
--- a/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow.groovy
+++ b/.test-infra/jenkins/job_PostCommit_Python_ValidatesRunner_Dataflow.groovy
@@ -32,7 +32,8 @@ PostcommitJobBuilder.postCommitJob('beam_PostCommit_Py_VR_Dataflow', 'Run Python
steps {
gradle {
rootBuildScriptDir(commonJobProperties.checkoutDir)
- tasks(':beam-sdks-python:validatesRunnerTests')
+ tasks(':beam-sdks-python:validatesRunnerBatchTests')
+ tasks(':beam-sdks-python:validatesRunnerStreamingTests')
commonJobProperties.setGradleSwitches(delegate)
}
}
diff --git a/sdks/python/apache_beam/transforms/ptransform_test.py b/sdks/python/apache_beam/transforms/ptransform_test.py
index e23fad7..907ee04 100644
--- a/sdks/python/apache_beam/transforms/ptransform_test.py
+++ b/sdks/python/apache_beam/transforms/ptransform_test.py
@@ -180,7 +180,9 @@ class PTransformTest(unittest.TestCase):
assert_that(r2.m, equal_to([3, 4, 5]), label='r2')
pipeline.run()
- @attr('ValidatesRunner')
+ # TODO(BEAM-3544): Disable this test in streaming temporarily.
+ # Remove sickbay-streaming tag after it's resolved.
+ @attr('ValidatesRunner', 'sickbay-streaming')
def test_read_metrics(self):
from apache_beam.io.utils import CountingSource
diff --git a/sdks/python/apache_beam/transforms/sideinputs_test.py b/sdks/python/apache_beam/transforms/sideinputs_test.py
index 1d58834..6b93b8e 100644
--- a/sdks/python/apache_beam/transforms/sideinputs_test.py
+++ b/sdks/python/apache_beam/transforms/sideinputs_test.py
@@ -146,7 +146,9 @@ class SideInputsTest(unittest.TestCase):
assert_that(result, equal_to([(1, 'empty'), (2, 'empty')]))
pipeline.run()
- @attr('ValidatesRunner')
+ # TODO(BEAM-5025): Disable this test in streaming temporarily.
+ # Remove sickbay-streaming tag after it's fixed.
+ @attr('ValidatesRunner', 'sickbay-streaming')
def test_multi_valued_singleton_side_input(self):
pipeline = self.create_pipeline()
pcol = pipeline | 'start' >> beam.Create([1, 2])
diff --git a/sdks/python/build.gradle b/sdks/python/build.gradle
index a227af5..0cbd61d 100644
--- a/sdks/python/build.gradle
+++ b/sdks/python/build.gradle
@@ -223,20 +223,30 @@ task portableWordCount(dependsOn: 'installGcpTest') {
}
}
-task postCommitVRTests(dependsOn: 'installGcpTest') {
+task postCommitITTests(dependsOn: 'installGcpTest') {
doLast {
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate && ./scripts/run_postcommit.sh IT"
+ args '-c', ". ${envdir}/bin/activate && ./scripts/run_postcommit.sh IT batch"
}
}
}
-task validatesRunnerTests(dependsOn: 'installGcpTest') {
+task validatesRunnerBatchTests(dependsOn: 'installGcpTest') {
doLast {
exec {
executable 'sh'
- args '-c', ". ${envdir}/bin/activate && ./scripts/run_postcommit.sh ValidatesRunner"
+ args '-c', ". ${envdir}/bin/activate && ./scripts/run_postcommit.sh ValidatesRunner batch"
+ }
+ }
+}
+
+task validatesRunnerStreamingTests(dependsOn: 'installGcpTest') {
+ doLast {
+ exec {
+ executable 'sh'
+ // TODO(BEAM-3544,BEAM-5025): Disable tests with sickbay-streaming tag.
+ args '-c', ". ${envdir}/bin/activate && ./scripts/run_postcommit.sh ValidatesRunner,'!sickbay-streaming' streaming"
}
}
}
@@ -254,7 +264,7 @@ task postCommit() {
dependsOn "preCommit"
dependsOn "localWordCount"
dependsOn "hdfsIntegrationTest"
- dependsOn "postCommitVRTests"
+ dependsOn "postCommitITTests"
}
task dependencyUpdates(dependsOn: ':dependencyUpdates') {
diff --git a/sdks/python/scripts/run_postcommit.sh b/sdks/python/scripts/run_postcommit.sh
index d6e31aa..c13ef50 100755
--- a/sdks/python/scripts/run_postcommit.sh
+++ b/sdks/python/scripts/run_postcommit.sh
@@ -16,6 +16,8 @@
# limitations under the License.
#
+###########################################################################
+#
# This script will be run by Jenkins as a post commit test. In order to run
# locally make the following changes:
#
@@ -23,6 +25,26 @@
# PROJECT -> Project name to use for service jobs.
#
+
+###########################################################################
+# Usage check.
+
+if (( $# < 2 )); then
+ printf "Usage: \n$> ./scripts/run_postcommit.sh <test_type> <pipeline_type> [gcp_location] [gcp_project]"
+ printf "\n\ttest_type: [required] ValidatesRunner or IT"
+ printf "\n\tpipeline_type: [required] streaming or batch"
+ printf "\n\tgcp_location: [optional] A gs:// path to stage artifacts and output results"
+ printf "\n\tgcp_project: [optional] A GCP project to run Dataflow pipelines\n"
+ exit 1
+fi
+
+set -e
+set -v
+
+
+###########################################################################
+# Build tarball and set pipeline options.
+
# Check that the script is running in a known directory.
if [[ $PWD != *sdks/python* ]]; then
echo 'Unable to locate Apache Beam Python SDK root directory'
@@ -34,23 +56,10 @@ if [[ "*sdks/python" != $PWD ]]; then
cd $(pwd | sed 's/sdks\/python.*/sdks\/python/')
fi
-if [ -z "$1" ]; then
- printf "Usage: \n$> ./scripts/run_postcommit.sh <test_type> [gcp_location] [gcp_project]"
- printf "\n\ttest_type: ValidatesRunner or IT"
- printf "\n\tgcp_location: A gs:// path to stage artifacts and output results"
- printf "\n\tgcp_project: A GCP project to run Dataflow pipelines\n"
- exit 1
-fi
-
-set -e
-set -v
-
-# Run tests on the service.
-
# Where to store integration test outputs.
-GCS_LOCATION=${2:-gs://temp-storage-for-end-to-end-tests}
+GCS_LOCATION=${3:-gs://temp-storage-for-end-to-end-tests}
-PROJECT=${3:-apache-beam-testing}
+PROJECT=${4:-apache-beam-testing}
# Create a tarball
python setup.py sdist
@@ -61,21 +70,38 @@ SDK_LOCATION=$(find dist/apache-beam-*.tar.gz)
echo "pyhamcrest" > postcommit_requirements.txt
echo "mock" >> postcommit_requirements.txt
-# Run integration tests on the Google Cloud Dataflow service
-# and validate that jobs finish successfully.
-echo ">>> RUNNING TEST DATAFLOW RUNNER it tests"
+# Options used to run testing pipeline on Cloud Dataflow Service.
+PIPELINE_OPTIONS=(
+ "--runner=TestDataflowRunner"
+ "--project=$PROJECT"
+ "--staging_location=$GCS_LOCATION/staging-it"
+ "--temp_location=$GCS_LOCATION/temp-it"
+ "--output=$GCS_LOCATION/py-it-cloud/output"
+ "--sdk_location=$SDK_LOCATION"
+ "--requirements_file=postcommit_requirements.txt"
+ "--num_workers=1"
+ "--sleep_secs=20"
+)
+
+# Add streaming flag if specified.
+if [[ "$2" = "streaming" ]]; then
+ echo ">>> Set test pipeline to streaming"
+ PIPELINE_OPTIONS+=("--streaming")
+else
+ echo ">>> Set test pipeline to batch"
+fi
+
+
+###########################################################################
+# Run tests on the Google Cloud Dataflow service and validate that jobs
+# finish successfully.
+
+JOINED_OPTS=$(IFS=" " ; echo "${PIPELINE_OPTIONS[*]}")
+
+echo ">>> RUNNING TEST DATAFLOW RUNNER $1 tests"
python setup.py nosetests \
--attr $1 \
--nocapture \
--processes=8 \
- --process-timeout=2000 \
- --test-pipeline-options=" \
- --runner=TestDataflowRunner \
- --project=$PROJECT \
- --staging_location=$GCS_LOCATION/staging-it \
- --temp_location=$GCS_LOCATION/temp-it \
- --output=$GCS_LOCATION/py-it-cloud/output \
- --sdk_location=$SDK_LOCATION \
- --requirements_file=postcommit_requirements.txt \
- --num_workers=1 \
- --sleep_secs=20"
+ --process-timeout=3000 \
+ --test-pipeline-options="$JOINED_OPTS"