You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by tg...@apache.org on 2017/04/12 20:52:41 UTC

[1/6] beam git commit: This closes #2466

Repository: beam
Updated Branches:
  refs/heads/master 1788cef96 -> 94c9e3817


This closes #2466


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/94c9e381
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/94c9e381
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/94c9e381

Branch: refs/heads/master
Commit: 94c9e38177d2ea3b6aab74ae50a02ac89e3c8152
Parents: 1788cef 34b9507
Author: Thomas Groh <tg...@google.com>
Authored: Wed Apr 12 13:52:28 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Wed Apr 12 13:52:28 2017 -0700

----------------------------------------------------------------------
 .jenkins/common_job_properties.groovy           | 261 ----------------
 .../job_beam_PerformanceTests_Dataflow.groovy   |  43 ---
 .jenkins/job_beam_PerformanceTests_JDBC.groovy  |  60 ----
 .jenkins/job_beam_PerformanceTests_Spark.groovy |  44 ---
 ...job_beam_PostCommit_Java_MavenInstall.groovy |  42 ---
 ..._PostCommit_Java_ValidatesRunner_Apex.groovy |  48 ---
 ...tCommit_Java_ValidatesRunner_Dataflow.groovy |  45 ---
 ...PostCommit_Java_ValidatesRunner_Flink.groovy |  43 ---
 ...tCommit_Java_ValidatesRunner_Gearpump.groovy |  49 ---
 ...PostCommit_Java_ValidatesRunner_Spark.groovy |  44 ---
 .../job_beam_PostCommit_Python_Verify.groovy    |  55 ----
 .../job_beam_PreCommit_Java_MavenInstall.groovy |  42 ---
 .../job_beam_PreCommit_Website_Stage.groovy     |  80 -----
 .jenkins/job_beam_PreCommit_Website_Test.groovy |  65 ----
 .../job_beam_Release_NightlySnapshot.groovy     |  45 ---
 .jenkins/job_seed.groovy                        |  53 ----
 .../jenkins/common_job_properties.groovy        | 261 ++++++++++++++++
 .../job_beam_PerformanceTests_Dataflow.groovy   |  43 +++
 .../job_beam_PerformanceTests_JDBC.groovy       |  60 ++++
 .../job_beam_PerformanceTests_Spark.groovy      |  44 +++
 ...job_beam_PostCommit_Java_MavenInstall.groovy |  42 +++
 ..._PostCommit_Java_ValidatesRunner_Apex.groovy |  48 +++
 ...tCommit_Java_ValidatesRunner_Dataflow.groovy |  45 +++
 ...PostCommit_Java_ValidatesRunner_Flink.groovy |  43 +++
 ...tCommit_Java_ValidatesRunner_Gearpump.groovy |  49 +++
 ...PostCommit_Java_ValidatesRunner_Spark.groovy |  44 +++
 .../job_beam_PostCommit_Python_Verify.groovy    |  55 ++++
 .../job_beam_PreCommit_Java_MavenInstall.groovy |  42 +++
 .../job_beam_PreCommit_Website_Stage.groovy     |  80 +++++
 .../job_beam_PreCommit_Website_Test.groovy      |  65 ++++
 .../job_beam_Release_NightlySnapshot.groovy     |  45 +++
 .test-infra/jenkins/job_seed.groovy             |  53 ++++
 .../SmallITCluster/cassandra-svc-rc.yaml        |  88 ++++++
 .../cassandra/SmallITCluster/start-up.sh        |  21 ++
 .../cassandra/SmallITCluster/teardown.sh        |  21 ++
 .../kubernetes/cassandra/data-load-setup.sh     |  29 ++
 .test-infra/kubernetes/cassandra/data-load.sh   |  67 +++++
 .../LargeProductionCluster/es-services.yaml     | 277 +++++++++++++++++
 .../LargeProductionCluster/start-up.sh          |  21 ++
 .../LargeProductionCluster/teardown.sh          |  20 ++
 .../SmallITCluster/elasticsearch-svc-rc.yaml    |  84 ++++++
 .../elasticsearch/SmallITCluster/start-up.sh    |  22 ++
 .../elasticsearch/SmallITCluster/teardown.sh    |  20 ++
 .../kubernetes/elasticsearch/data-load-setup.sh |  26 ++
 .../kubernetes/elasticsearch/data-load.sh       |  33 ++
 .../kubernetes/elasticsearch/es_test_data.py    | 299 +++++++++++++++++++
 .../kubernetes/elasticsearch/show-health.sh     |  25 ++
 .../postgres/postgres-service-for-local-dev.yml |  28 ++
 .test-infra/kubernetes/postgres/postgres.yml    |  56 ++++
 .test-infra/travis/README.md                    |  23 ++
 .test-infra/travis/settings.xml                 |  33 ++
 .test-infra/travis/test_wordcount.sh            | 125 ++++++++
 .travis.yml                                     |   4 +-
 .travis/README.md                               |  23 --
 .travis/settings.xml                            |  33 --
 .travis/test_wordcount.sh                       | 125 --------
 .../SmallITCluster/cassandra-svc-rc.yaml        |  88 ------
 .../cassandra/SmallITCluster/start-up.sh        |  21 --
 .../cassandra/SmallITCluster/teardown.sh        |  21 --
 .../kubernetes/cassandra/data-load-setup.sh     |  29 --
 .../resources/kubernetes/cassandra/data-load.sh |  67 -----
 .../LargeProductionCluster/es-services.yaml     | 277 -----------------
 .../LargeProductionCluster/start-up.sh          |  21 --
 .../LargeProductionCluster/teardown.sh          |  20 --
 .../SmallITCluster/elasticsearch-svc-rc.yaml    |  84 ------
 .../elasticsearch/SmallITCluster/start-up.sh    |  22 --
 .../elasticsearch/SmallITCluster/teardown.sh    |  20 --
 .../kubernetes/elasticsearch/data-load-setup.sh |  26 --
 .../kubernetes/elasticsearch/data-load.sh       |  33 --
 .../kubernetes/elasticsearch/es_test_data.py    | 299 -------------------
 .../kubernetes/elasticsearch/show-health.sh     |  25 --
 .../postgres-service-for-local-dev.yml          |  28 --
 .../src/test/resources/kubernetes/postgres.yml  |  56 ----
 .../jdbc/src/test/resources/kubernetes/setup.sh |  19 --
 .../src/test/resources/kubernetes/teardown.sh   |  19 --
 75 files changed, 2339 insertions(+), 2377 deletions(-)
----------------------------------------------------------------------



[4/6] beam git commit: Move HIFIO k8s scripts into shared dir

Posted by tg...@apache.org.
Move HIFIO k8s scripts into shared dir


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/34b95076
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/34b95076
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/34b95076

Branch: refs/heads/master
Commit: 34b95076d57baf7af6f7b8b0aaf748868952620b
Parents: 5758899
Author: Stephen Sisk <si...@google.com>
Authored: Fri Apr 7 16:11:19 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Wed Apr 12 13:52:28 2017 -0700

----------------------------------------------------------------------
 .../SmallITCluster/cassandra-svc-rc.yaml        |  88 ++++++
 .../cassandra/SmallITCluster/start-up.sh        |  21 ++
 .../cassandra/SmallITCluster/teardown.sh        |  21 ++
 .../kubernetes/cassandra/data-load-setup.sh     |  29 ++
 .test-infra/kubernetes/cassandra/data-load.sh   |  67 +++++
 .../LargeProductionCluster/es-services.yaml     | 277 +++++++++++++++++
 .../LargeProductionCluster/start-up.sh          |  21 ++
 .../LargeProductionCluster/teardown.sh          |  20 ++
 .../SmallITCluster/elasticsearch-svc-rc.yaml    |  84 ++++++
 .../elasticsearch/SmallITCluster/start-up.sh    |  22 ++
 .../elasticsearch/SmallITCluster/teardown.sh    |  20 ++
 .../kubernetes/elasticsearch/data-load-setup.sh |  26 ++
 .../kubernetes/elasticsearch/data-load.sh       |  33 ++
 .../kubernetes/elasticsearch/es_test_data.py    | 299 +++++++++++++++++++
 .../kubernetes/elasticsearch/show-health.sh     |  25 ++
 .../SmallITCluster/cassandra-svc-rc.yaml        |  88 ------
 .../cassandra/SmallITCluster/start-up.sh        |  21 --
 .../cassandra/SmallITCluster/teardown.sh        |  21 --
 .../kubernetes/cassandra/data-load-setup.sh     |  29 --
 .../resources/kubernetes/cassandra/data-load.sh |  67 -----
 .../LargeProductionCluster/es-services.yaml     | 277 -----------------
 .../LargeProductionCluster/start-up.sh          |  21 --
 .../LargeProductionCluster/teardown.sh          |  20 --
 .../SmallITCluster/elasticsearch-svc-rc.yaml    |  84 ------
 .../elasticsearch/SmallITCluster/start-up.sh    |  22 --
 .../elasticsearch/SmallITCluster/teardown.sh    |  20 --
 .../kubernetes/elasticsearch/data-load-setup.sh |  26 --
 .../kubernetes/elasticsearch/data-load.sh       |  33 --
 .../kubernetes/elasticsearch/es_test_data.py    | 299 -------------------
 .../kubernetes/elasticsearch/show-health.sh     |  25 --
 30 files changed, 1053 insertions(+), 1053 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/cassandra/SmallITCluster/cassandra-svc-rc.yaml
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/cassandra/SmallITCluster/cassandra-svc-rc.yaml b/.test-infra/kubernetes/cassandra/SmallITCluster/cassandra-svc-rc.yaml
new file mode 100644
index 0000000..7c36e34
--- /dev/null
+++ b/.test-infra/kubernetes/cassandra/SmallITCluster/cassandra-svc-rc.yaml
@@ -0,0 +1,88 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Headless service that allows us to get the IP addresses of our Cassandra nodes
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    name: cassandra-peers
+  name: cassandra-peers
+spec:
+  clusterIP: None
+  ports:
+    - port: 7000
+      name: intra-node-communication
+    - port: 7001
+      name: tls-intra-node-communication
+  selector:
+    name: cassandra
+---
+# Kubernetes service file exposing Cassandra endpoint used by clients.
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    name: cassandra
+  name: cassandra
+spec:
+  ports:
+    - port: 9042
+      name: cql
+  selector:
+    name: cassandra
+  type: LoadBalancer
+---
+# Replication Controller for Cassandra which tracks the Cassandra pods.
+apiVersion: v1
+kind: ReplicationController
+metadata:
+  labels:
+    name: cassandra
+  name: cassandra
+spec:
+  replicas: 1
+  selector:
+    name: cassandra
+  template:
+    metadata:
+      labels:
+        name: cassandra
+    spec:
+      containers:
+        - image: cassandra
+          name: cassandra
+          env:
+            - name: PEER_DISCOVERY_SERVICE
+              value: cassandra-peers
+            - name: CASSANDRA_CLUSTER_NAME
+              value: Cassandra
+            - name: CASSANDRA_DC
+              value: DC1
+            - name: CASSANDRA_RACK
+              value: Kubernetes Cluster
+# Number of tokens currently configured to 1. If this is not configured, default value is 256. You can change it as per requirement.			  
+            - name: CASSANDRA_NUM_TOKENS
+              value: '1'
+          ports:
+            - containerPort: 9042
+              name: cql
+          volumeMounts:
+            - mountPath: /var/lib/cassandra/data
+              name: data
+      volumes:
+        - name: data
+          emptyDir: {}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/cassandra/SmallITCluster/start-up.sh
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/cassandra/SmallITCluster/start-up.sh b/.test-infra/kubernetes/cassandra/SmallITCluster/start-up.sh
new file mode 100644
index 0000000..c05b771
--- /dev/null
+++ b/.test-infra/kubernetes/cassandra/SmallITCluster/start-up.sh
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -e
+
+# Create Cassandra services and Replication controller.
+kubectl create -f cassandra-svc-rc.yaml

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/cassandra/SmallITCluster/teardown.sh
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/cassandra/SmallITCluster/teardown.sh b/.test-infra/kubernetes/cassandra/SmallITCluster/teardown.sh
new file mode 100644
index 0000000..f538a75
--- /dev/null
+++ b/.test-infra/kubernetes/cassandra/SmallITCluster/teardown.sh
@@ -0,0 +1,21 @@
+#
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+#!/bin/bash
+set -e
+
+# Delete Cassandra services and Replication controller.
+kubectl delete -f cassandra-svc-rc.yaml

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/cassandra/data-load-setup.sh
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/cassandra/data-load-setup.sh b/.test-infra/kubernetes/cassandra/data-load-setup.sh
new file mode 100644
index 0000000..4e12f89
--- /dev/null
+++ b/.test-infra/kubernetes/cassandra/data-load-setup.sh
@@ -0,0 +1,29 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -e
+
+# Load YCSB tool
+echo "Downloading YCSB tool"
+echo "------------------------------"
+curl -O --location https://github.com/brianfrankcooper/YCSB/releases/download/0.12.0/ycsb-0.12.0.tar.gz
+tar xfz ycsb-0.12.0.tar.gz
+wget https://www.slf4j.org/dist/slf4j-1.7.22.tar.gz
+tar xfz slf4j-1.7.22.tar.gz
+cp slf4j-1.7.22/slf4j-simple-*.jar ycsb-0.12.0/lib/
+cp slf4j-1.7.22/slf4j-api-*.jar ycsb-0.12.0/lib/
+echo "YCSB tool loaded"

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/cassandra/data-load.sh
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/cassandra/data-load.sh b/.test-infra/kubernetes/cassandra/data-load.sh
new file mode 100644
index 0000000..59d0e22
--- /dev/null
+++ b/.test-infra/kubernetes/cassandra/data-load.sh
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -e
+
+recordcount=1000
+# Identify the pod
+cassandra_pods="kubectl get pods -l name=cassandra"
+running_seed="$(kubectl get pods -o json -l name=cassandra -o jsonpath=\
+'{.items[0].metadata.name}')"
+echo "Detected Running Pod $running_seed"
+
+# After starting the service, it takes couple of minutes to generate the external IP for the
+# service. Hence, wait for sometime.
+
+# Identify external IP of the pod
+external_ip="$(kubectl get svc cassandra -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
+echo "Waiting for the Cassandra service to come up ........"
+while [ -z "$external_ip" ]
+do
+   sleep 10s
+   external_ip="$(kubectl get svc cassandra -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
+   echo "."
+done
+echo "External IP - $external_ip"
+
+# Create keyspace
+keyspace_creation_command="drop keyspace if exists ycsb;create keyspace ycsb WITH REPLICATION = {\
+'class' : 'SimpleStrategy', 'replication_factor': 3 };"
+kubectl exec -ti $running_seed -- cqlsh -e "$keyspace_creation_command"
+echo "Keyspace creation............"
+echo "-----------------------------"
+echo "$keyspace_creation_command"
+echo
+
+# Create table
+table_creation_command="use ycsb;drop table if exists usertable;create table usertable (\
+y_id varchar primary key,field0 varchar,field1 varchar,field2 varchar,field3 varchar,\
+field4 varchar,field5 varchar,field6 varchar,field7 varchar,field8 varchar,field9 varchar);"
+kubectl exec -ti $running_seed -- cqlsh -e "$table_creation_command"
+echo "Table creation .............."
+echo "-----------------------------"
+echo "$table_creation_command"
+
+cd ycsb-0.12.0
+
+echo "Starting to load data on ${external_ip}"
+echo "-----------------------------"
+# Record count set to 1000, change this value to load as per requirement.
+# dataintegrity flag is set to true to load deterministic data
+./bin/ycsb load cassandra-cql -p hosts=${external_ip} -p dataintegrity=true -p recordcount=\
+${recordcount} -p insertorder=ordered -p fieldlength=20 -P workloads/workloadd \
+-s > workloada_load_res.txt

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/es-services.yaml
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/es-services.yaml b/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/es-services.yaml
new file mode 100644
index 0000000..38c820e
--- /dev/null
+++ b/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/es-services.yaml
@@ -0,0 +1,277 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+# Service file containing services for ES discovery, elasticsearch and master node deployment.
+
+# Kubernetes headless service for Elasticsearch discovery of nodes.
+apiVersion: v1
+kind: Service
+metadata:
+  name: elasticsearch-discovery
+  labels:
+    component: elasticsearch
+    role: master
+spec:
+  selector:
+    component: elasticsearch
+    role: master
+  ports:
+  - name: transport
+    port: 9300
+    protocol: TCP
+---
+# To create Elasticsearch frontend cluster Kubernetes service.
+# It sets up a load balancer on TCP port 9200 that distributes network traffic to the ES client nodes.
+apiVersion: v1
+kind: Service
+metadata:
+  name: elasticsearch
+  labels:
+    component: elasticsearch
+    role: client
+spec:
+  type: LoadBalancer
+  selector:
+    component: elasticsearch
+    role: client
+  ports:
+  - name: http
+    port: 9200
+    protocol: TCP
+---
+# The Kubernetes deployment script for Elasticsearch master nodes.
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  name: es-master
+  labels:
+    component: elasticsearch
+    role: master
+spec:
+  replicas: 3
+  template:
+    metadata:
+      labels:
+        component: elasticsearch
+        role: master
+      annotations:
+        pod.beta.kubernetes.io/init-containers: '[
+          {
+          "name": "sysctl",
+            "image": "busybox",
+            "imagePullPolicy": "IfNotPresent",
+            "command": ["sysctl", "-w", "vm.max_map_count=262144"],
+            "securityContext": {
+              "privileged": true
+            }
+          }
+        ]'
+    spec:
+      containers:
+      - name: es-master
+        securityContext:
+          privileged: false
+          capabilities:
+            add:
+# IPC_LOCK capability is enabled to allow Elasticsearch to lock the heap in memory so it will not be swapped.
+              - IPC_LOCK
+# SYS_RESOURCE is docker capability key to control and override the resource limits.
+# This could be needed to increase base limits.(e.g. File descriptor limit for elasticsearch)
+              - SYS_RESOURCE
+        image: quay.io/pires/docker-elasticsearch-kubernetes:5.2.2
+        env:
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: NODE_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: "CLUSTER_NAME"
+          value: "myesdb"
+        - name: "NUMBER_OF_MASTERS"
+          value: "2"
+        - name: NODE_MASTER
+          value: "true"
+        - name: NODE_INGEST
+          value: "false"
+        - name: NODE_DATA
+          value: "false"
+        - name: HTTP_ENABLE
+          value: "false"
+        - name: "ES_JAVA_OPTS"
+          value: "-Xms256m -Xmx256m"
+        ports:
+        - containerPort: 9300
+          name: transport
+          protocol: TCP
+        volumeMounts:
+        - name: storage
+          mountPath: /data
+      volumes:
+          - emptyDir:
+              medium: ""
+            name: "storage"
+---
+# Kubernetes deployment script for Elasticsearch client nodes (aka load balancing proxies).
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  name: es-client
+  labels:
+    component: elasticsearch
+    role: client
+spec:
+  # The no. of replicas can be incremented based on the client usage using HTTP API.
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        component: elasticsearch
+        role: client
+      annotations:
+      # Elasticsearch uses a hybrid mmapfs / niofs directory by default to store its indices.
+      # The default operating system limits on mmap counts is likely to be too low, which may result
+      # in out of memory exceptions. Therefore, the need to increase virtual memory
+      # vm.max_map_count for large amount of data in the pod initialization annotation.
+        pod.beta.kubernetes.io/init-containers: '[
+          {
+          "name": "sysctl",
+            "image": "busybox",
+            "imagePullPolicy": "IfNotPresent",
+            "command": ["sysctl", "-w", "vm.max_map_count=262144"],
+            "securityContext": {
+              "privileged": true
+            }
+          }
+        ]'
+    spec:
+      containers:
+      - name: es-client
+        securityContext:
+          privileged: false
+          capabilities:
+            add:
+# IPC_LOCK capability is enabled to allow Elasticsearch to lock the heap in memory so it will not be swapped.
+              - IPC_LOCK
+# SYS_RESOURCE is docker capability key to control and override the resource limits.
+# This could be needed to increase base limits.(e.g. File descriptor limit for elasticsearch)
+              - SYS_RESOURCE
+        image: quay.io/pires/docker-elasticsearch-kubernetes:5.2.2
+        env:
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: NODE_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: "CLUSTER_NAME"
+          value: "myesdb"
+        - name: NODE_MASTER
+          value: "false"
+        - name: NODE_DATA
+          value: "false"
+        - name: HTTP_ENABLE
+          value: "true"
+        - name: "ES_JAVA_OPTS"
+          value: "-Xms256m -Xmx256m"
+        ports:
+        - containerPort: 9200
+          name: http
+          protocol: TCP
+        - containerPort: 9300
+          name: transport
+          protocol: TCP
+        volumeMounts:
+        - name: storage
+          mountPath: /data
+      volumes:
+          - emptyDir:
+              medium: ""
+            name: "storage"
+---
+# Kubernetes deployment script for Elasticsearch data nodes which store and index data.
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  name: es-data
+  labels:
+    component: elasticsearch
+    role: data
+spec:
+  replicas: 2
+  template:
+    metadata:
+      labels:
+        component: elasticsearch
+        role: data
+      annotations:
+        pod.beta.kubernetes.io/init-containers: '[
+          {
+          "name": "sysctl",
+            "image": "busybox",
+            "imagePullPolicy": "IfNotPresent",
+            "command": ["sysctl", "-w", "vm.max_map_count=1048575"],
+            "securityContext": {
+              "privileged": true
+            }
+          }
+        ]'
+    spec:
+      containers:
+      - name: es-data
+        securityContext:
+          privileged: false
+          capabilities:
+            add:
+# IPC_LOCK capability is enabled to allow Elasticsearch to lock the heap in memory so it will not be swapped.
+              - IPC_LOCK
+# SYS_RESOURCE is docker capability key to control and override the resource limits.
+# This could be needed to increase base limits.(e.g. File descriptor limit for elasticsearch)
+              - SYS_RESOURCE
+        image: quay.io/pires/docker-elasticsearch-kubernetes:5.2.2
+        env:
+        - name: NAMESPACE
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.namespace
+        - name: NODE_NAME
+          valueFrom:
+            fieldRef:
+              fieldPath: metadata.name
+        - name: "CLUSTER_NAME"
+          value: "myesdb"
+        - name: NODE_MASTER
+          value: "false"
+        - name: NODE_INGEST
+          value: "false"
+        - name: HTTP_ENABLE
+          value: "false"
+        - name: "ES_JAVA_OPTS"
+          value: "-Xms256m -Xmx256m"
+        ports:
+        - containerPort: 9300
+          name: transport
+          protocol: TCP
+        volumeMounts:
+        - name: storage
+          mountPath: /data
+      volumes:
+          - emptyDir:
+              medium: ""
+            name: "storage"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/start-up.sh
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/start-up.sh b/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/start-up.sh
new file mode 100644
index 0000000..4d277c8
--- /dev/null
+++ b/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/start-up.sh
@@ -0,0 +1,21 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+
+#!/bin/sh
+set -e
+
+# Create Elasticsearch services and deployments.
+kubectl create -f es-services.yaml

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/teardown.sh
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/teardown.sh b/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/teardown.sh
new file mode 100644
index 0000000..a30793b
--- /dev/null
+++ b/.test-infra/kubernetes/elasticsearch/LargeProductionCluster/teardown.sh
@@ -0,0 +1,20 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+#!/bin/bash
+set -e
+
+# Delete elasticsearch services and deployments.
+kubectl delete -f es-services.yaml
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/elasticsearch/SmallITCluster/elasticsearch-svc-rc.yaml
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/elasticsearch/SmallITCluster/elasticsearch-svc-rc.yaml b/.test-infra/kubernetes/elasticsearch/SmallITCluster/elasticsearch-svc-rc.yaml
new file mode 100644
index 0000000..9a7ac3d
--- /dev/null
+++ b/.test-infra/kubernetes/elasticsearch/SmallITCluster/elasticsearch-svc-rc.yaml
@@ -0,0 +1,84 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+# To create Elasticsearch frontend cluster Kubernetes service. 
+# It sets up a load balancer on TCP port 9200 that distributes network traffic to the ES nodes.
+apiVersion: v1
+kind: Service
+metadata:
+  name: elasticsearch
+  labels:
+    component: elasticsearch
+spec:
+  type: LoadBalancer
+  selector:
+    component: elasticsearch
+  ports:
+  - name: http
+    port: 9200
+    protocol: TCP
+  - name: transport
+    port: 9300
+    protocol: TCP
+---
+# The Kubernetes deployment script for Elasticsearch replication nodes. It will create 1 node cluster.
+# To scale the cluster as desired, you can create replicas of node use 'kubectl scale --replicas=3 rc es' command
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  name: es
+  labels:
+    component: elasticsearch
+spec:
+  replicas: 1
+  template:
+    metadata:
+      labels:
+        component: elasticsearch
+    spec:
+      containers:
+      - name: es
+        securityContext:
+          capabilities:
+            add:
+# IPC_LOCK capability is enabled to allow Elasticsearch to lock the heap in memory so it will not be swapped.   
+              - IPC_LOCK
+# SYS_RESOURCE capability is set to control and override various resource limits.
+              - SYS_RESOURCE
+        image: quay.io/pires/docker-elasticsearch-kubernetes:5.2.2
+        env:
+        - name: "CLUSTER_NAME"
+          value: "myesdb"
+        - name: "DISCOVERY_SERVICE"
+          value: "elasticsearch"
+        - name: NODE_MASTER
+          value: "true"
+        - name: NODE_DATA
+          value: "true"
+        - name: HTTP_ENABLE
+          value: "true"
+        ports:
+        - containerPort: 9200
+          name: http
+          protocol: TCP
+        - containerPort: 9300
+          name: transport
+          protocol: TCP
+        volumeMounts:
+        - mountPath: /data
+          name: storage
+      volumes:
+      - name: storage
+        emptyDir: {}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/elasticsearch/SmallITCluster/start-up.sh
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/elasticsearch/SmallITCluster/start-up.sh b/.test-infra/kubernetes/elasticsearch/SmallITCluster/start-up.sh
new file mode 100644
index 0000000..e8cf275
--- /dev/null
+++ b/.test-infra/kubernetes/elasticsearch/SmallITCluster/start-up.sh
@@ -0,0 +1,22 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+
+#!/bin/sh
+set -e
+
+# Create Elasticsearch services and deployments.
+kubectl create -f elasticsearch-svc-rc.yaml
+

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/elasticsearch/SmallITCluster/teardown.sh
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/elasticsearch/SmallITCluster/teardown.sh b/.test-infra/kubernetes/elasticsearch/SmallITCluster/teardown.sh
new file mode 100644
index 0000000..079141d
--- /dev/null
+++ b/.test-infra/kubernetes/elasticsearch/SmallITCluster/teardown.sh
@@ -0,0 +1,20 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+#!/bin/bash
+set -e
+
+# Delete elasticsearch services and deployments.
+kubectl delete -f elasticsearch-svc-rc.yaml

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/elasticsearch/data-load-setup.sh
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/elasticsearch/data-load-setup.sh b/.test-infra/kubernetes/elasticsearch/data-load-setup.sh
new file mode 100644
index 0000000..00991bc
--- /dev/null
+++ b/.test-infra/kubernetes/elasticsearch/data-load-setup.sh
@@ -0,0 +1,26 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -e
+
+# Install python
+sudo apt-get update
+sudo apt-get install python-pip
+sudo pip install --upgrade pip
+sudo apt-get install python-dev
+sudo pip install tornado numpy
+echo

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/elasticsearch/data-load.sh
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/elasticsearch/data-load.sh b/.test-infra/kubernetes/elasticsearch/data-load.sh
new file mode 100644
index 0000000..21150fb
--- /dev/null
+++ b/.test-infra/kubernetes/elasticsearch/data-load.sh
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/bash
+set -e
+
+# Identify external IP
+external_ip="$(kubectl get svc elasticsearch -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
+echo "Waiting for the Elasticsearch service to come up ........"
+while [ -z "$external_ip" ]
+do
+   sleep 10s
+   external_ip="$(kubectl get svc elasticsearch -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
+   echo "."
+done
+echo "External IP - $external_ip"
+echo
+
+# Run the script
+/usr/bin/python es_test_data.py --count=1000 --format=Txn_ID:int,Item_Code:int,Item_ID:int,User_Name:str,last_updated:ts,Price:int,Title:str,Description:str,Age:int,Item_Name:str,Item_Price:int,Availability:bool,Batch_Num:int,Last_Ordered:tstxt,City:text --es_url=http://$external_ip:9200 &
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/elasticsearch/es_test_data.py
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/elasticsearch/es_test_data.py b/.test-infra/kubernetes/elasticsearch/es_test_data.py
new file mode 100644
index 0000000..1658e2c
--- /dev/null
+++ b/.test-infra/kubernetes/elasticsearch/es_test_data.py
@@ -0,0 +1,299 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Script to populate data on Elasticsearch
+# Hashcode for 1000 records is ed36c09b5e24a95fd8d3cc711a043a85320bb47d, 
+# For test with query to select one record from 1000 docs, 
+# hashcode is 83c108ff81e87b6f3807c638e6bb9a9e3d430dc7
+# Hashcode for 50m records (~20 gigs) is aff7390ee25c4c330f0a58dfbfe335421b11e405 
+#!/usr/bin/python
+
+import json
+import time
+import logging
+import random
+import string
+import uuid
+import datetime
+
+import tornado.gen
+import tornado.httpclient
+import tornado.ioloop
+import tornado.options
+
+async_http_client = tornado.httpclient.AsyncHTTPClient()
+id_counter = 0
+upload_data_count = 0
+_dict_data = None
+
+
+
+def delete_index(idx_name):
+    try:
+        url = "%s/%s?refresh=true" % (tornado.options.options.es_url, idx_name)
+        request = tornado.httpclient.HTTPRequest(url, method="DELETE", request_timeout=240, 
+                                                 auth_username=tornado.options.options.username, 
+                                                 auth_password=tornado.options.options.password)
+        response = tornado.httpclient.HTTPClient().fetch(request)
+        logging.info('Deleting index  "%s" done   %s' % (idx_name, response.body))
+    except tornado.httpclient.HTTPError:
+        pass
+
+
+def create_index(idx_name):
+    schema = {
+        "settings": {
+            "number_of_shards":   tornado.options.options.num_of_shards,
+            "number_of_replicas": tornado.options.options.num_of_replicas
+        },
+        "refresh": True
+    }
+
+    body = json.dumps(schema)
+    url = "%s/%s" % (tornado.options.options.es_url, idx_name)
+    try:
+        logging.info('Trying to create index %s' % (url))
+        request = tornado.httpclient.HTTPRequest(url, method="PUT", body=body, request_timeout=240,
+                                                 auth_username=tornado.options.options.username, 
+                                                 auth_password=tornado.options.options.password)
+        response = tornado.httpclient.HTTPClient().fetch(request)
+        logging.info('Creating index "%s" done   %s' % (idx_name, response.body))
+    except tornado.httpclient.HTTPError:
+        logging.info('Looks like the index exists already')
+        pass
+
+
+@tornado.gen.coroutine
+def upload_batch(upload_data_txt):
+    try:
+        request = tornado.httpclient.HTTPRequest(tornado.options.options.es_url + "/_bulk",
+                                                 method="POST", body=upload_data_txt,
+                                                 request_timeout=
+                                                 tornado.options.options.http_upload_timeout,
+                                                 auth_username=tornado.options.options.username, 
+                                                 auth_password=tornado.options.options.password)
+        response = yield async_http_client.fetch(request)
+    except Exception as ex:
+        logging.error("upload failed, error: %s" % ex)
+        return
+
+    result = json.loads(response.body.decode('utf-8'))
+    res_txt = "OK" if not result['errors'] else "FAILED"
+    took = int(result['took'])
+    logging.info("Upload: %s - upload took: %5dms, total docs uploaded: %7d" % (res_txt, took, 
+                                                                                upload_data_count))
+
+
+def get_data_for_format(format,count):
+    split_f = format.split(":")
+    if not split_f:
+        return None, None
+
+    field_name = split_f[0]
+    field_type = split_f[1]
+
+    return_val = ''
+
+    if field_type == "bool":
+        if count%2 == 0:
+           return_val = True
+        else:
+           return_val = False
+
+    elif field_type == "str":
+        return_val = field_name + str(count)
+
+    elif field_type == "int":
+        return_val = count
+    
+    elif field_type == "ipv4":
+        return_val = "{0}.{1}.{2}.{3}".format(1,2,3,count%255)
+
+    elif field_type in ["ts", "tstxt"]:
+        return_val = int(count * 1000) if field_type == "ts" else\
+        			 datetime.datetime.fromtimestamp(count)\
+        			 .strftime("%Y-%m-%dT%H:%M:%S.000-0000")
+
+    elif field_type == "words":
+        return_val = field_name + str(count)
+
+    elif field_type == "dict":
+        mydict = dict(a=field_name + str(count), b=field_name + str(count), c=field_name + str(count),
+                      d=field_name + str(count), e=field_name + str(count), f=field_name + str(count),
+                      g=field_name + str(count), h=field_name + str(count), i=field_name + str(count), 
+                      j=field_name + str(count))
+        return_val = ", ".join("=".join(_) for _ in mydict.items())
+
+    elif field_type == "text":
+        return_val = field_name + str(count)
+
+    return field_name, return_val
+
+
+def generate_count(min, max):
+    if min == max:
+        return max
+    elif min > max:
+        return random.randrange(max, min);
+    else:
+        return random.randrange(min, max);
+
+
+def generate_random_doc(format,count):
+    global id_counter
+
+    res = {}
+
+    for f in format:
+        f_key, f_val = get_data_for_format(f,count)
+        if f_key:
+            res[f_key] = f_val
+
+    if not tornado.options.options.id_type:
+        return res
+
+    if tornado.options.options.id_type == 'int':
+        res['_id'] = id_counter
+        id_counter += 1
+    elif tornado.options.options.id_type == 'uuid4':
+        res['_id'] = str(uuid.uuid4())
+
+    return res
+
+
+def set_index_refresh(val):
+
+    params = {"index": {"refresh_interval": val}}
+    body = json.dumps(params)
+    url = "%s/%s/_settings" % (tornado.options.options.es_url, tornado.options.options.index_name)
+    try:
+        request = tornado.httpclient.HTTPRequest(url, method="PUT", body=body, request_timeout=240,
+                                                 auth_username=tornado.options.options.username, 
+                                                 auth_password=tornado.options.options.password)
+        http_client = tornado.httpclient.HTTPClient()
+        http_client.fetch(request)
+        logging.info('Set index refresh to %s' % val)
+    except Exception as ex:
+        logging.exception(ex)
+
+
+@tornado.gen.coroutine
+def generate_test_data():
+
+    global upload_data_count
+
+    if tornado.options.options.force_init_index:
+        delete_index(tornado.options.options.index_name)
+
+    create_index(tornado.options.options.index_name)
+
+    # todo: query what refresh is set to, then restore later
+    if tornado.options.options.set_refresh:
+        set_index_refresh("-1")
+
+    if tornado.options.options.out_file:
+        out_file = open(tornado.options.options.out_file, "w")
+    else:
+        out_file = None
+
+    if tornado.options.options.dict_file:
+        global _dict_data
+        with open(tornado.options.options.dict_file, 'r') as f:
+            _dict_data = f.readlines()
+        logging.info("Loaded %d words from the %s" % (len(_dict_data), 
+                                                      tornado.options.options.dict_file))
+
+    format = tornado.options.options.format.split(',')
+    if not format:
+        logging.error('invalid format')
+        exit(1)
+
+    ts_start = int(time.time())
+    upload_data_txt = ""
+    total_uploaded = 0
+
+    logging.info("Generating %d docs, upload batch size is %d" % (tornado.options.options.count,
+                                                                  tornado.options
+                                                                  .options.batch_size))
+    for num in range(0, tornado.options.options.count):
+
+        item = generate_random_doc(format,num)
+
+        if out_file:
+            out_file.write("%s\n" % json.dumps(item))
+
+        cmd = {'index': {'_index': tornado.options.options.index_name,
+                         '_type': tornado.options.options.index_type}}
+        if '_id' in item:
+            cmd['index']['_id'] = item['_id']
+
+        upload_data_txt += json.dumps(cmd) + "\n"
+        upload_data_txt += json.dumps(item) + "\n"
+        upload_data_count += 1
+
+        if upload_data_count % tornado.options.options.batch_size == 0:
+            yield upload_batch(upload_data_txt)
+            upload_data_txt = ""
+
+    # upload remaining items in `upload_data_txt`
+    if upload_data_txt:
+        yield upload_batch(upload_data_txt)
+
+    if tornado.options.options.set_refresh:
+        set_index_refresh("1s")
+
+    if out_file:
+        out_file.close()
+
+    took_secs = int(time.time() - ts_start)
+
+    logging.info("Done - total docs uploaded: %d, took %d seconds" % 
+    			 (tornado.options.options.count, took_secs))
+
+
+if __name__ == '__main__':
+    tornado.options.define("es_url", type=str, default='http://localhost:9200/', 
+                           help="URL of your Elasticsearch node")
+    tornado.options.define("index_name", type=str, default='test_data', 
+                           help="Name of the index to store your messages")
+    tornado.options.define("index_type", type=str, default='test_type', help="Type")
+    tornado.options.define("batch_size", type=int, default=1000, 
+                           help="Elasticsearch bulk index batch size")
+    tornado.options.define("num_of_shards", type=int, default=2, 
+                           help="Number of shards for ES index")
+    tornado.options.define("http_upload_timeout", type=int, default=3, 
+                           help="Timeout in seconds when uploading data")
+    tornado.options.define("count", type=int, default=100000, help="Number of docs to generate")
+    tornado.options.define("format", type=str, default='name:str,age:int,last_updated:ts', 
+                           help="message format")
+    tornado.options.define("num_of_replicas", type=int, default=0, 
+                           help="Number of replicas for ES index")
+    tornado.options.define("force_init_index", type=bool, default=False, 
+                           help="Force deleting and re-initializing the Elasticsearch index")
+    tornado.options.define("set_refresh", type=bool, default=False, 
+                           help="Set refresh rate to -1 before starting the upload")
+    tornado.options.define("out_file", type=str, default=False, 
+                           help="If set, write test data to out_file as well.")
+    tornado.options.define("id_type", type=str, default=None, 
+                           help="Type of 'id' to use for the docs, \
+                           valid settings are int and uuid4, None is default")
+    tornado.options.define("dict_file", type=str, default=None, 
+                           help="Name of dictionary file to use")
+    tornado.options.define("username", type=str, default=None, help="Username for elasticsearch")
+    tornado.options.define("password", type=str, default=None, help="Password for elasticsearch")
+    tornado.options.parse_command_line()
+
+    tornado.ioloop.IOLoop.instance().run_sync(generate_test_data)

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/.test-infra/kubernetes/elasticsearch/show-health.sh
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/elasticsearch/show-health.sh b/.test-infra/kubernetes/elasticsearch/show-health.sh
new file mode 100644
index 0000000..8fa912c
--- /dev/null
+++ b/.test-infra/kubernetes/elasticsearch/show-health.sh
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/bin/sh
+set -e
+
+external_ip="$(kubectl get svc elasticsearch -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
+
+echo "Elasticsearch cluster health info"
+echo "---------------------------------"
+curl $external_ip:9200/_cluster/health
+echo # empty line since curl doesn't output CRLF
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/cassandra-svc-rc.yaml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/cassandra-svc-rc.yaml b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/cassandra-svc-rc.yaml
deleted file mode 100644
index 7c36e34..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/cassandra-svc-rc.yaml
+++ /dev/null
@@ -1,88 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Headless service that allows us to get the IP addresses of our Cassandra nodes
-apiVersion: v1
-kind: Service
-metadata:
-  labels:
-    name: cassandra-peers
-  name: cassandra-peers
-spec:
-  clusterIP: None
-  ports:
-    - port: 7000
-      name: intra-node-communication
-    - port: 7001
-      name: tls-intra-node-communication
-  selector:
-    name: cassandra
----
-# Kubernetes service file exposing Cassandra endpoint used by clients.
-apiVersion: v1
-kind: Service
-metadata:
-  labels:
-    name: cassandra
-  name: cassandra
-spec:
-  ports:
-    - port: 9042
-      name: cql
-  selector:
-    name: cassandra
-  type: LoadBalancer
----
-# Replication Controller for Cassandra which tracks the Cassandra pods.
-apiVersion: v1
-kind: ReplicationController
-metadata:
-  labels:
-    name: cassandra
-  name: cassandra
-spec:
-  replicas: 1
-  selector:
-    name: cassandra
-  template:
-    metadata:
-      labels:
-        name: cassandra
-    spec:
-      containers:
-        - image: cassandra
-          name: cassandra
-          env:
-            - name: PEER_DISCOVERY_SERVICE
-              value: cassandra-peers
-            - name: CASSANDRA_CLUSTER_NAME
-              value: Cassandra
-            - name: CASSANDRA_DC
-              value: DC1
-            - name: CASSANDRA_RACK
-              value: Kubernetes Cluster
-# Number of tokens currently configured to 1. If this is not configured, default value is 256. You can change it as per requirement.			  
-            - name: CASSANDRA_NUM_TOKENS
-              value: '1'
-          ports:
-            - containerPort: 9042
-              name: cql
-          volumeMounts:
-            - mountPath: /var/lib/cassandra/data
-              name: data
-      volumes:
-        - name: data
-          emptyDir: {}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/start-up.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/start-up.sh b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/start-up.sh
deleted file mode 100644
index c05b771..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/start-up.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-set -e
-
-# Create Cassandra services and Replication controller.
-kubectl create -f cassandra-svc-rc.yaml

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/teardown.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/teardown.sh b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/teardown.sh
deleted file mode 100644
index f538a75..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/SmallITCluster/teardown.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-#
-#!/bin/bash
-set -e
-
-# Delete Cassandra services and Replication controller.
-kubectl delete -f cassandra-svc-rc.yaml

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/data-load-setup.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/data-load-setup.sh b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/data-load-setup.sh
deleted file mode 100644
index 4e12f89..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/data-load-setup.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-set -e
-
-# Load YCSB tool
-echo "Downloading YCSB tool"
-echo "------------------------------"
-curl -O --location https://github.com/brianfrankcooper/YCSB/releases/download/0.12.0/ycsb-0.12.0.tar.gz
-tar xfz ycsb-0.12.0.tar.gz
-wget https://www.slf4j.org/dist/slf4j-1.7.22.tar.gz
-tar xfz slf4j-1.7.22.tar.gz
-cp slf4j-1.7.22/slf4j-simple-*.jar ycsb-0.12.0/lib/
-cp slf4j-1.7.22/slf4j-api-*.jar ycsb-0.12.0/lib/
-echo "YCSB tool loaded"

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/data-load.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/data-load.sh b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/data-load.sh
deleted file mode 100644
index 59d0e22..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/cassandra/data-load.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-set -e
-
-recordcount=1000
-# Identify the pod
-cassandra_pods="kubectl get pods -l name=cassandra"
-running_seed="$(kubectl get pods -o json -l name=cassandra -o jsonpath=\
-'{.items[0].metadata.name}')"
-echo "Detected Running Pod $running_seed"
-
-# After starting the service, it takes couple of minutes to generate the external IP for the
-# service. Hence, wait for sometime.
-
-# Identify external IP of the pod
-external_ip="$(kubectl get svc cassandra -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
-echo "Waiting for the Cassandra service to come up ........"
-while [ -z "$external_ip" ]
-do
-   sleep 10s
-   external_ip="$(kubectl get svc cassandra -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
-   echo "."
-done
-echo "External IP - $external_ip"
-
-# Create keyspace
-keyspace_creation_command="drop keyspace if exists ycsb;create keyspace ycsb WITH REPLICATION = {\
-'class' : 'SimpleStrategy', 'replication_factor': 3 };"
-kubectl exec -ti $running_seed -- cqlsh -e "$keyspace_creation_command"
-echo "Keyspace creation............"
-echo "-----------------------------"
-echo "$keyspace_creation_command"
-echo
-
-# Create table
-table_creation_command="use ycsb;drop table if exists usertable;create table usertable (\
-y_id varchar primary key,field0 varchar,field1 varchar,field2 varchar,field3 varchar,\
-field4 varchar,field5 varchar,field6 varchar,field7 varchar,field8 varchar,field9 varchar);"
-kubectl exec -ti $running_seed -- cqlsh -e "$table_creation_command"
-echo "Table creation .............."
-echo "-----------------------------"
-echo "$table_creation_command"
-
-cd ycsb-0.12.0
-
-echo "Starting to load data on ${external_ip}"
-echo "-----------------------------"
-# Record count set to 1000, change this value to load as per requirement.
-# dataintegrity flag is set to true to load deterministic data
-./bin/ycsb load cassandra-cql -p hosts=${external_ip} -p dataintegrity=true -p recordcount=\
-${recordcount} -p insertorder=ordered -p fieldlength=20 -P workloads/workloadd \
--s > workloada_load_res.txt

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/es-services.yaml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/es-services.yaml b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/es-services.yaml
deleted file mode 100644
index 38c820e..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/es-services.yaml
+++ /dev/null
@@ -1,277 +0,0 @@
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-
-# Service file containing services for ES discovery, elasticsearch and master node deployment.
-
-# Kubernetes headless service for Elasticsearch discovery of nodes.
-apiVersion: v1
-kind: Service
-metadata:
-  name: elasticsearch-discovery
-  labels:
-    component: elasticsearch
-    role: master
-spec:
-  selector:
-    component: elasticsearch
-    role: master
-  ports:
-  - name: transport
-    port: 9300
-    protocol: TCP
----
-# To create Elasticsearch frontend cluster Kubernetes service.
-# It sets up a load balancer on TCP port 9200 that distributes network traffic to the ES client nodes.
-apiVersion: v1
-kind: Service
-metadata:
-  name: elasticsearch
-  labels:
-    component: elasticsearch
-    role: client
-spec:
-  type: LoadBalancer
-  selector:
-    component: elasticsearch
-    role: client
-  ports:
-  - name: http
-    port: 9200
-    protocol: TCP
----
-# The Kubernetes deployment script for Elasticsearch master nodes.
-apiVersion: extensions/v1beta1
-kind: Deployment
-metadata:
-  name: es-master
-  labels:
-    component: elasticsearch
-    role: master
-spec:
-  replicas: 3
-  template:
-    metadata:
-      labels:
-        component: elasticsearch
-        role: master
-      annotations:
-        pod.beta.kubernetes.io/init-containers: '[
-          {
-          "name": "sysctl",
-            "image": "busybox",
-            "imagePullPolicy": "IfNotPresent",
-            "command": ["sysctl", "-w", "vm.max_map_count=262144"],
-            "securityContext": {
-              "privileged": true
-            }
-          }
-        ]'
-    spec:
-      containers:
-      - name: es-master
-        securityContext:
-          privileged: false
-          capabilities:
-            add:
-# IPC_LOCK capability is enabled to allow Elasticsearch to lock the heap in memory so it will not be swapped.
-              - IPC_LOCK
-# SYS_RESOURCE is docker capability key to control and override the resource limits.
-# This could be needed to increase base limits.(e.g. File descriptor limit for elasticsearch)
-              - SYS_RESOURCE
-        image: quay.io/pires/docker-elasticsearch-kubernetes:5.2.2
-        env:
-        - name: NAMESPACE
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.namespace
-        - name: NODE_NAME
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.name
-        - name: "CLUSTER_NAME"
-          value: "myesdb"
-        - name: "NUMBER_OF_MASTERS"
-          value: "2"
-        - name: NODE_MASTER
-          value: "true"
-        - name: NODE_INGEST
-          value: "false"
-        - name: NODE_DATA
-          value: "false"
-        - name: HTTP_ENABLE
-          value: "false"
-        - name: "ES_JAVA_OPTS"
-          value: "-Xms256m -Xmx256m"
-        ports:
-        - containerPort: 9300
-          name: transport
-          protocol: TCP
-        volumeMounts:
-        - name: storage
-          mountPath: /data
-      volumes:
-          - emptyDir:
-              medium: ""
-            name: "storage"
----
-# Kubernetes deployment script for Elasticsearch client nodes (aka load balancing proxies).
-apiVersion: extensions/v1beta1
-kind: Deployment
-metadata:
-  name: es-client
-  labels:
-    component: elasticsearch
-    role: client
-spec:
-  # The no. of replicas can be incremented based on the client usage using HTTP API.
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        component: elasticsearch
-        role: client
-      annotations:
-      # Elasticsearch uses a hybrid mmapfs / niofs directory by default to store its indices.
-      # The default operating system limits on mmap counts is likely to be too low, which may result
-      # in out of memory exceptions. Therefore, the need to increase virtual memory
-      # vm.max_map_count for large amount of data in the pod initialization annotation.
-        pod.beta.kubernetes.io/init-containers: '[
-          {
-          "name": "sysctl",
-            "image": "busybox",
-            "imagePullPolicy": "IfNotPresent",
-            "command": ["sysctl", "-w", "vm.max_map_count=262144"],
-            "securityContext": {
-              "privileged": true
-            }
-          }
-        ]'
-    spec:
-      containers:
-      - name: es-client
-        securityContext:
-          privileged: false
-          capabilities:
-            add:
-# IPC_LOCK capability is enabled to allow Elasticsearch to lock the heap in memory so it will not be swapped.
-              - IPC_LOCK
-# SYS_RESOURCE is docker capability key to control and override the resource limits.
-# This could be needed to increase base limits.(e.g. File descriptor limit for elasticsearch)
-              - SYS_RESOURCE
-        image: quay.io/pires/docker-elasticsearch-kubernetes:5.2.2
-        env:
-        - name: NAMESPACE
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.namespace
-        - name: NODE_NAME
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.name
-        - name: "CLUSTER_NAME"
-          value: "myesdb"
-        - name: NODE_MASTER
-          value: "false"
-        - name: NODE_DATA
-          value: "false"
-        - name: HTTP_ENABLE
-          value: "true"
-        - name: "ES_JAVA_OPTS"
-          value: "-Xms256m -Xmx256m"
-        ports:
-        - containerPort: 9200
-          name: http
-          protocol: TCP
-        - containerPort: 9300
-          name: transport
-          protocol: TCP
-        volumeMounts:
-        - name: storage
-          mountPath: /data
-      volumes:
-          - emptyDir:
-              medium: ""
-            name: "storage"
----
-# Kubernetes deployment script for Elasticsearch data nodes which store and index data.
-apiVersion: extensions/v1beta1
-kind: Deployment
-metadata:
-  name: es-data
-  labels:
-    component: elasticsearch
-    role: data
-spec:
-  replicas: 2
-  template:
-    metadata:
-      labels:
-        component: elasticsearch
-        role: data
-      annotations:
-        pod.beta.kubernetes.io/init-containers: '[
-          {
-          "name": "sysctl",
-            "image": "busybox",
-            "imagePullPolicy": "IfNotPresent",
-            "command": ["sysctl", "-w", "vm.max_map_count=1048575"],
-            "securityContext": {
-              "privileged": true
-            }
-          }
-        ]'
-    spec:
-      containers:
-      - name: es-data
-        securityContext:
-          privileged: false
-          capabilities:
-            add:
-# IPC_LOCK capability is enabled to allow Elasticsearch to lock the heap in memory so it will not be swapped.
-              - IPC_LOCK
-# SYS_RESOURCE is docker capability key to control and override the resource limits.
-# This could be needed to increase base limits.(e.g. File descriptor limit for elasticsearch)
-              - SYS_RESOURCE
-        image: quay.io/pires/docker-elasticsearch-kubernetes:5.2.2
-        env:
-        - name: NAMESPACE
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.namespace
-        - name: NODE_NAME
-          valueFrom:
-            fieldRef:
-              fieldPath: metadata.name
-        - name: "CLUSTER_NAME"
-          value: "myesdb"
-        - name: NODE_MASTER
-          value: "false"
-        - name: NODE_INGEST
-          value: "false"
-        - name: HTTP_ENABLE
-          value: "false"
-        - name: "ES_JAVA_OPTS"
-          value: "-Xms256m -Xmx256m"
-        ports:
-        - containerPort: 9300
-          name: transport
-          protocol: TCP
-        volumeMounts:
-        - name: storage
-          mountPath: /data
-      volumes:
-          - emptyDir:
-              medium: ""
-            name: "storage"
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/start-up.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/start-up.sh b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/start-up.sh
deleted file mode 100644
index 4d277c8..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/start-up.sh
+++ /dev/null
@@ -1,21 +0,0 @@
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-#
-
-#!/bin/sh
-set -e
-
-# Create Elasticsearch services and deployments.
-kubectl create -f es-services.yaml

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/teardown.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/teardown.sh b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/teardown.sh
deleted file mode 100644
index a30793b..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/LargeProductionCluster/teardown.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-
-#!/bin/bash
-set -e
-
-# Delete elasticsearch services and deployments.
-kubectl delete -f es-services.yaml
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/elasticsearch-svc-rc.yaml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/elasticsearch-svc-rc.yaml b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/elasticsearch-svc-rc.yaml
deleted file mode 100644
index 9a7ac3d..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/elasticsearch-svc-rc.yaml
+++ /dev/null
@@ -1,84 +0,0 @@
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-
-# To create Elasticsearch frontend cluster Kubernetes service. 
-# It sets up a load balancer on TCP port 9200 that distributes network traffic to the ES nodes.
-apiVersion: v1
-kind: Service
-metadata:
-  name: elasticsearch
-  labels:
-    component: elasticsearch
-spec:
-  type: LoadBalancer
-  selector:
-    component: elasticsearch
-  ports:
-  - name: http
-    port: 9200
-    protocol: TCP
-  - name: transport
-    port: 9300
-    protocol: TCP
----
-# The Kubernetes deployment script for Elasticsearch replication nodes. It will create 1 node cluster.
-# To scale the cluster as desired, you can create replicas of node use 'kubectl scale --replicas=3 rc es' command
-apiVersion: extensions/v1beta1
-kind: Deployment
-metadata:
-  name: es
-  labels:
-    component: elasticsearch
-spec:
-  replicas: 1
-  template:
-    metadata:
-      labels:
-        component: elasticsearch
-    spec:
-      containers:
-      - name: es
-        securityContext:
-          capabilities:
-            add:
-# IPC_LOCK capability is enabled to allow Elasticsearch to lock the heap in memory so it will not be swapped.   
-              - IPC_LOCK
-# SYS_RESOURCE capability is set to control and override various resource limits.
-              - SYS_RESOURCE
-        image: quay.io/pires/docker-elasticsearch-kubernetes:5.2.2
-        env:
-        - name: "CLUSTER_NAME"
-          value: "myesdb"
-        - name: "DISCOVERY_SERVICE"
-          value: "elasticsearch"
-        - name: NODE_MASTER
-          value: "true"
-        - name: NODE_DATA
-          value: "true"
-        - name: HTTP_ENABLE
-          value: "true"
-        ports:
-        - containerPort: 9200
-          name: http
-          protocol: TCP
-        - containerPort: 9300
-          name: transport
-          protocol: TCP
-        volumeMounts:
-        - mountPath: /data
-          name: storage
-      volumes:
-      - name: storage
-        emptyDir: {}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/start-up.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/start-up.sh b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/start-up.sh
deleted file mode 100644
index e8cf275..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/start-up.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-#
-
-#!/bin/sh
-set -e
-
-# Create Elasticsearch services and deployments.
-kubectl create -f elasticsearch-svc-rc.yaml
-

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/teardown.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/teardown.sh b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/teardown.sh
deleted file mode 100644
index 079141d..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/SmallITCluster/teardown.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-
-#!/bin/bash
-set -e
-
-# Delete elasticsearch services and deployments.
-kubectl delete -f elasticsearch-svc-rc.yaml

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/data-load-setup.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/data-load-setup.sh b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/data-load-setup.sh
deleted file mode 100644
index 00991bc..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/data-load-setup.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-set -e
-
-# Install python
-sudo apt-get update
-sudo apt-get install python-pip
-sudo pip install --upgrade pip
-sudo apt-get install python-dev
-sudo pip install tornado numpy
-echo

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/data-load.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/data-load.sh b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/data-load.sh
deleted file mode 100644
index 21150fb..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/data-load.sh
+++ /dev/null
@@ -1,33 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/bash
-set -e
-
-# Identify external IP
-external_ip="$(kubectl get svc elasticsearch -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
-echo "Waiting for the Elasticsearch service to come up ........"
-while [ -z "$external_ip" ]
-do
-   sleep 10s
-   external_ip="$(kubectl get svc elasticsearch -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
-   echo "."
-done
-echo "External IP - $external_ip"
-echo
-
-# Run the script
-/usr/bin/python es_test_data.py --count=1000 --format=Txn_ID:int,Item_Code:int,Item_ID:int,User_Name:str,last_updated:ts,Price:int,Title:str,Description:str,Age:int,Item_Name:str,Item_Price:int,Availability:bool,Batch_Num:int,Last_Ordered:tstxt,City:text --es_url=http://$external_ip:9200 &
\ No newline at end of file


[3/6] beam git commit: Move HIFIO k8s scripts into shared dir

Posted by tg...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/es_test_data.py
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/es_test_data.py b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/es_test_data.py
deleted file mode 100644
index 1658e2c..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/es_test_data.py
+++ /dev/null
@@ -1,299 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Script to populate data on Elasticsearch
-# Hashcode for 1000 records is ed36c09b5e24a95fd8d3cc711a043a85320bb47d, 
-# For test with query to select one record from 1000 docs, 
-# hashcode is 83c108ff81e87b6f3807c638e6bb9a9e3d430dc7
-# Hashcode for 50m records (~20 gigs) is aff7390ee25c4c330f0a58dfbfe335421b11e405 
-#!/usr/bin/python
-
-import json
-import time
-import logging
-import random
-import string
-import uuid
-import datetime
-
-import tornado.gen
-import tornado.httpclient
-import tornado.ioloop
-import tornado.options
-
-async_http_client = tornado.httpclient.AsyncHTTPClient()
-id_counter = 0
-upload_data_count = 0
-_dict_data = None
-
-
-
-def delete_index(idx_name):
-    try:
-        url = "%s/%s?refresh=true" % (tornado.options.options.es_url, idx_name)
-        request = tornado.httpclient.HTTPRequest(url, method="DELETE", request_timeout=240, 
-                                                 auth_username=tornado.options.options.username, 
-                                                 auth_password=tornado.options.options.password)
-        response = tornado.httpclient.HTTPClient().fetch(request)
-        logging.info('Deleting index  "%s" done   %s' % (idx_name, response.body))
-    except tornado.httpclient.HTTPError:
-        pass
-
-
-def create_index(idx_name):
-    schema = {
-        "settings": {
-            "number_of_shards":   tornado.options.options.num_of_shards,
-            "number_of_replicas": tornado.options.options.num_of_replicas
-        },
-        "refresh": True
-    }
-
-    body = json.dumps(schema)
-    url = "%s/%s" % (tornado.options.options.es_url, idx_name)
-    try:
-        logging.info('Trying to create index %s' % (url))
-        request = tornado.httpclient.HTTPRequest(url, method="PUT", body=body, request_timeout=240,
-                                                 auth_username=tornado.options.options.username, 
-                                                 auth_password=tornado.options.options.password)
-        response = tornado.httpclient.HTTPClient().fetch(request)
-        logging.info('Creating index "%s" done   %s' % (idx_name, response.body))
-    except tornado.httpclient.HTTPError:
-        logging.info('Looks like the index exists already')
-        pass
-
-
-@tornado.gen.coroutine
-def upload_batch(upload_data_txt):
-    try:
-        request = tornado.httpclient.HTTPRequest(tornado.options.options.es_url + "/_bulk",
-                                                 method="POST", body=upload_data_txt,
-                                                 request_timeout=
-                                                 tornado.options.options.http_upload_timeout,
-                                                 auth_username=tornado.options.options.username, 
-                                                 auth_password=tornado.options.options.password)
-        response = yield async_http_client.fetch(request)
-    except Exception as ex:
-        logging.error("upload failed, error: %s" % ex)
-        return
-
-    result = json.loads(response.body.decode('utf-8'))
-    res_txt = "OK" if not result['errors'] else "FAILED"
-    took = int(result['took'])
-    logging.info("Upload: %s - upload took: %5dms, total docs uploaded: %7d" % (res_txt, took, 
-                                                                                upload_data_count))
-
-
-def get_data_for_format(format,count):
-    split_f = format.split(":")
-    if not split_f:
-        return None, None
-
-    field_name = split_f[0]
-    field_type = split_f[1]
-
-    return_val = ''
-
-    if field_type == "bool":
-        if count%2 == 0:
-           return_val = True
-        else:
-           return_val = False
-
-    elif field_type == "str":
-        return_val = field_name + str(count)
-
-    elif field_type == "int":
-        return_val = count
-    
-    elif field_type == "ipv4":
-        return_val = "{0}.{1}.{2}.{3}".format(1,2,3,count%255)
-
-    elif field_type in ["ts", "tstxt"]:
-        return_val = int(count * 1000) if field_type == "ts" else\
-        			 datetime.datetime.fromtimestamp(count)\
-        			 .strftime("%Y-%m-%dT%H:%M:%S.000-0000")
-
-    elif field_type == "words":
-        return_val = field_name + str(count)
-
-    elif field_type == "dict":
-        mydict = dict(a=field_name + str(count), b=field_name + str(count), c=field_name + str(count),
-                      d=field_name + str(count), e=field_name + str(count), f=field_name + str(count),
-                      g=field_name + str(count), h=field_name + str(count), i=field_name + str(count), 
-                      j=field_name + str(count))
-        return_val = ", ".join("=".join(_) for _ in mydict.items())
-
-    elif field_type == "text":
-        return_val = field_name + str(count)
-
-    return field_name, return_val
-
-
-def generate_count(min, max):
-    if min == max:
-        return max
-    elif min > max:
-        return random.randrange(max, min);
-    else:
-        return random.randrange(min, max);
-
-
-def generate_random_doc(format,count):
-    global id_counter
-
-    res = {}
-
-    for f in format:
-        f_key, f_val = get_data_for_format(f,count)
-        if f_key:
-            res[f_key] = f_val
-
-    if not tornado.options.options.id_type:
-        return res
-
-    if tornado.options.options.id_type == 'int':
-        res['_id'] = id_counter
-        id_counter += 1
-    elif tornado.options.options.id_type == 'uuid4':
-        res['_id'] = str(uuid.uuid4())
-
-    return res
-
-
-def set_index_refresh(val):
-
-    params = {"index": {"refresh_interval": val}}
-    body = json.dumps(params)
-    url = "%s/%s/_settings" % (tornado.options.options.es_url, tornado.options.options.index_name)
-    try:
-        request = tornado.httpclient.HTTPRequest(url, method="PUT", body=body, request_timeout=240,
-                                                 auth_username=tornado.options.options.username, 
-                                                 auth_password=tornado.options.options.password)
-        http_client = tornado.httpclient.HTTPClient()
-        http_client.fetch(request)
-        logging.info('Set index refresh to %s' % val)
-    except Exception as ex:
-        logging.exception(ex)
-
-
-@tornado.gen.coroutine
-def generate_test_data():
-
-    global upload_data_count
-
-    if tornado.options.options.force_init_index:
-        delete_index(tornado.options.options.index_name)
-
-    create_index(tornado.options.options.index_name)
-
-    # todo: query what refresh is set to, then restore later
-    if tornado.options.options.set_refresh:
-        set_index_refresh("-1")
-
-    if tornado.options.options.out_file:
-        out_file = open(tornado.options.options.out_file, "w")
-    else:
-        out_file = None
-
-    if tornado.options.options.dict_file:
-        global _dict_data
-        with open(tornado.options.options.dict_file, 'r') as f:
-            _dict_data = f.readlines()
-        logging.info("Loaded %d words from the %s" % (len(_dict_data), 
-                                                      tornado.options.options.dict_file))
-
-    format = tornado.options.options.format.split(',')
-    if not format:
-        logging.error('invalid format')
-        exit(1)
-
-    ts_start = int(time.time())
-    upload_data_txt = ""
-    total_uploaded = 0
-
-    logging.info("Generating %d docs, upload batch size is %d" % (tornado.options.options.count,
-                                                                  tornado.options
-                                                                  .options.batch_size))
-    for num in range(0, tornado.options.options.count):
-
-        item = generate_random_doc(format,num)
-
-        if out_file:
-            out_file.write("%s\n" % json.dumps(item))
-
-        cmd = {'index': {'_index': tornado.options.options.index_name,
-                         '_type': tornado.options.options.index_type}}
-        if '_id' in item:
-            cmd['index']['_id'] = item['_id']
-
-        upload_data_txt += json.dumps(cmd) + "\n"
-        upload_data_txt += json.dumps(item) + "\n"
-        upload_data_count += 1
-
-        if upload_data_count % tornado.options.options.batch_size == 0:
-            yield upload_batch(upload_data_txt)
-            upload_data_txt = ""
-
-    # upload remaining items in `upload_data_txt`
-    if upload_data_txt:
-        yield upload_batch(upload_data_txt)
-
-    if tornado.options.options.set_refresh:
-        set_index_refresh("1s")
-
-    if out_file:
-        out_file.close()
-
-    took_secs = int(time.time() - ts_start)
-
-    logging.info("Done - total docs uploaded: %d, took %d seconds" % 
-    			 (tornado.options.options.count, took_secs))
-
-
-if __name__ == '__main__':
-    tornado.options.define("es_url", type=str, default='http://localhost:9200/', 
-                           help="URL of your Elasticsearch node")
-    tornado.options.define("index_name", type=str, default='test_data', 
-                           help="Name of the index to store your messages")
-    tornado.options.define("index_type", type=str, default='test_type', help="Type")
-    tornado.options.define("batch_size", type=int, default=1000, 
-                           help="Elasticsearch bulk index batch size")
-    tornado.options.define("num_of_shards", type=int, default=2, 
-                           help="Number of shards for ES index")
-    tornado.options.define("http_upload_timeout", type=int, default=3, 
-                           help="Timeout in seconds when uploading data")
-    tornado.options.define("count", type=int, default=100000, help="Number of docs to generate")
-    tornado.options.define("format", type=str, default='name:str,age:int,last_updated:ts', 
-                           help="message format")
-    tornado.options.define("num_of_replicas", type=int, default=0, 
-                           help="Number of replicas for ES index")
-    tornado.options.define("force_init_index", type=bool, default=False, 
-                           help="Force deleting and re-initializing the Elasticsearch index")
-    tornado.options.define("set_refresh", type=bool, default=False, 
-                           help="Set refresh rate to -1 before starting the upload")
-    tornado.options.define("out_file", type=str, default=False, 
-                           help="If set, write test data to out_file as well.")
-    tornado.options.define("id_type", type=str, default=None, 
-                           help="Type of 'id' to use for the docs, \
-                           valid settings are int and uuid4, None is default")
-    tornado.options.define("dict_file", type=str, default=None, 
-                           help="Name of dictionary file to use")
-    tornado.options.define("username", type=str, default=None, help="Username for elasticsearch")
-    tornado.options.define("password", type=str, default=None, help="Password for elasticsearch")
-    tornado.options.parse_command_line()
-
-    tornado.ioloop.IOLoop.instance().run_sync(generate_test_data)

http://git-wip-us.apache.org/repos/asf/beam/blob/34b95076/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/show-health.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/show-health.sh b/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/show-health.sh
deleted file mode 100644
index 8fa912c..0000000
--- a/sdks/java/io/hadoop/jdk1.8-tests/src/test/resources/kubernetes/elasticsearch/show-health.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#!/bin/sh
-set -e
-
-external_ip="$(kubectl get svc elasticsearch -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
-
-echo "Elasticsearch cluster health info"
-echo "---------------------------------"
-curl $external_ip:9200/_cluster/health
-echo # empty line since curl doesn't output CRLF
\ No newline at end of file


[5/6] beam git commit: Move travis/jenkins folders in a test-infra folder

Posted by tg...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PreCommit_Website_Stage.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PreCommit_Website_Stage.groovy b/.test-infra/jenkins/job_beam_PreCommit_Website_Stage.groovy
new file mode 100644
index 0000000..7c64f11
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PreCommit_Website_Stage.groovy
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// Defines a job.
+job('beam_PreCommit_Website_Stage') {
+  description('Stages the pull requests proposed for the Apache Beam ' +
+              'website to a temporary location to ease reviews.')
+
+  // Set common parameters.
+  common_job_properties.setTopLevelWebsiteJobProperties(delegate)
+
+  // Set pull request build trigger.
+  common_job_properties.setPreCommit(
+      delegate,
+      'Automatic staging of pull requests',
+      '\nJenkins built the site at commit id ${ghprbActualCommit} with ' +
+      'Jekyll and staged it [here](http://apache-beam-website-pull-' +
+      'requests.storage.googleapis.com/${ghprbPullId}/index.html). ' +
+      'Happy reviewing.\n\nNote that any previous site has been deleted. ' +
+      'This staged site will be automatically deleted after its TTL ' +
+      'expires. Push any commit to the pull request branch or re-trigger ' +
+      'the build to get it staged again.')
+
+  steps {
+    // Run the following shell script as a build step.
+    shell '''
+        # Install RVM.
+        gpg --keyserver hkp://keys.gnupg.net --recv-keys \\
+            409B6B1796C275462A1703113804BB82D39DC0E3
+        \\curl -sSL https://get.rvm.io | bash
+        source /home/jenkins/.rvm/scripts/rvm
+
+        # Install Ruby.
+        RUBY_VERSION_NUM=2.3.0
+        rvm install ruby $RUBY_VERSION_NUM --autolibs=read-only
+
+        # Install Bundler gem
+        PATH=~/.gem/ruby/$RUBY_VERSION_NUM/bin:$PATH
+        GEM_PATH=~/.gem/ruby/$RUBY_VERSION_NUM/:$GEM_PATH
+        gem install bundler --user-install
+
+        # Install all needed gems.
+        bundle install --path ~/.gem/
+
+        # Remove current site if it exists.
+        GCS_PATH="gs://apache-beam-website-pull-requests/${ghprbPullId}/"
+        gsutil -m rm -r -f ${GCS_PATH} || true
+
+        # Build the new site with the baseurl specified.
+        rm -fr ./content/
+        bundle exec jekyll build --baseurl=/${ghprbPullId}
+
+        # Install BeautifulSoup HTML Parser for python.
+        pip install --user beautifulsoup4
+
+        # Fix links on staged website.
+        python .jenkins/append_index_html_to_internal_links.py
+
+        # Upload the new site.
+        gsutil -m cp -R ./content/* ${GCS_PATH}
+    '''.stripIndent().trim()
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PreCommit_Website_Test.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PreCommit_Website_Test.groovy b/.test-infra/jenkins/job_beam_PreCommit_Website_Test.groovy
new file mode 100644
index 0000000..421b58a
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PreCommit_Website_Test.groovy
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// Defines a job.
+job('beam_PreCommit_Website_Test') {
+  description('Runs tests on the pull requests proposed for the Apache Beam ' +
+              'website.')
+
+  // Set common parameters.
+  common_job_properties.setTopLevelWebsiteJobProperties(delegate)
+
+  // Execute concurrent builds. Multiple builds of this project may be executed
+  // in parallel. This is safe because this build does not require exclusive
+  // access to any shared resources.
+  concurrentBuild()
+
+  // Set pull request build trigger.
+  common_job_properties.setPreCommit(
+      delegate,
+      'Test website (dead links, etc.)')
+
+  steps {
+    // Run the following shell script as a build step.
+    shell '''
+        # Install RVM.
+        gpg --keyserver hkp://keys.gnupg.net --recv-keys \\
+            409B6B1796C275462A1703113804BB82D39DC0E3
+        \\curl -sSL https://get.rvm.io | bash
+        source /home/jenkins/.rvm/scripts/rvm
+
+        # Install Ruby.
+        RUBY_VERSION_NUM=2.3.0
+        rvm install ruby $RUBY_VERSION_NUM --autolibs=read-only
+
+        # Install Bundler gem
+        PATH=~/.gem/ruby/$RUBY_VERSION_NUM/bin:$PATH
+        GEM_PATH=~/.gem/ruby/$RUBY_VERSION_NUM/:$GEM_PATH
+        gem install bundler --user-install
+
+        # Install all needed gems.
+        bundle install --path ~/.gem/
+
+        # Build the new site and test it.
+        rm -fr ./content/
+        bundle exec rake test
+    '''.stripIndent().trim()
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_Release_NightlySnapshot.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_Release_NightlySnapshot.groovy b/.test-infra/jenkins/job_beam_Release_NightlySnapshot.groovy
new file mode 100644
index 0000000..f2c3ff0
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_Release_NightlySnapshot.groovy
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This is the nightly snapshot build -- we use this to deploy a daily snapshot
+// to https://repository.apache.org/content/groups/snapshots/org/apache/beam.
+// Runs the postsubmit suite before deploying.
+mavenJob('beam_Release_NightlySnapshot') {
+  description('Runs a mvn clean deploy of the nightly snapshot.')
+
+  // Execute concurrent builds if necessary.
+  concurrentBuild()
+
+  // Set common parameters.
+  common_job_properties.setTopLevelMainJobProperties(delegate)
+
+  // Set maven paramaters.
+  common_job_properties.setMavenConfig(delegate)
+
+  // This is a post-commit job that runs once per day, not for every push.
+  common_job_properties.setPostCommit(
+      delegate,
+      '0 7 * * *',
+      false,
+      'dev@beam.apache.org')
+
+  // Maven goals for this job.
+  goals('-B -e clean deploy -P release,dataflow-runner -DskipITs=false -DintegrationTestPipelineOptions=\'[ "--project=apache-beam-testing", "--tempRoot=gs://temp-storage-for-end-to-end-tests", "--runner=org.apache.beam.runners.dataflow.testing.TestDataflowRunner" ]\'')
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_seed.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_seed.groovy b/.test-infra/jenkins/job_seed.groovy
new file mode 100644
index 0000000..2d1b07c
--- /dev/null
+++ b/.test-infra/jenkins/job_seed.groovy
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// Defines the seed job, which creates or updates all other Jenkins projects.
+job('beam_SeedJob') {
+  description('Automatically configures all Apache Beam Jenkins projects based' +
+              ' on Jenkins DSL groovy files checked into the code repository.')
+
+  previousNames('beam_SeedJob_Main')
+
+  // Set common parameters.
+  common_job_properties.setTopLevelMainJobProperties(delegate)
+
+  // This is a post-commit job that runs once per day, not for every push.
+  common_job_properties.setPostCommit(
+      delegate,
+      '0 6 * * *',
+      false,
+      'dev@beam.apache.org')
+
+  // Allows triggering this build against pull requests.
+  common_job_properties.enablePhraseTriggeringFromPullRequest(
+    delegate,
+    'Seed Job',
+    'Run Seed Job')
+
+  steps {
+    dsl {
+      // A list or a glob of other groovy files to process.
+      external('.test-infra/jenkins/job_*.groovy')
+
+      // If a job is removed from the script, disable it (rather than deleting).
+      removeAction('DISABLE')
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/travis/README.md
----------------------------------------------------------------------
diff --git a/.test-infra/travis/README.md b/.test-infra/travis/README.md
new file mode 100644
index 0000000..526995a
--- /dev/null
+++ b/.test-infra/travis/README.md
@@ -0,0 +1,23 @@
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+-->
+
+# Travis Scripts
+
+This directory contains scripts used for [Travis CI](https://travis-ci.org/apache/beam/)
+testing.

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/travis/settings.xml
----------------------------------------------------------------------
diff --git a/.test-infra/travis/settings.xml b/.test-infra/travis/settings.xml
new file mode 100644
index 0000000..e086aec
--- /dev/null
+++ b/.test-infra/travis/settings.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    (the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+-->
+<settings>
+  <servers>
+    <server>
+      <id>central</id>
+      <configuration>
+        <httpConfiguration>
+          <all>
+            <connectionTimeout>1000</connectionTimeout>
+          </all>
+        </httpConfiguration>
+        <timeout>5000</timeout>
+      </configuration>
+    </server>
+  </servers>
+</settings>

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/travis/test_wordcount.sh
----------------------------------------------------------------------
diff --git a/.test-infra/travis/test_wordcount.sh b/.test-infra/travis/test_wordcount.sh
new file mode 100755
index 0000000..e059a35
--- /dev/null
+++ b/.test-infra/travis/test_wordcount.sh
@@ -0,0 +1,125 @@
+#!/bin/bash
+#
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+#
+
+# This script runs WordCount example locally in a few different ways.
+# Specifically, all combinations of:
+#  a) using mvn exec, or java -cp with a bundled jar file;
+#  b) input filename with no directory component, with a relative directory, or
+#     with an absolute directory; AND
+#  c) input filename containing wildcards or not.
+#
+# The one optional parameter is a path from the directory containing the script
+# to the directory containing the top-level (parent) pom.xml.  If no parameter
+# is provided, the script assumes that directory is equal to the directory
+# containing the script itself.
+#
+# The exit-code of the script indicates success or a failure.
+
+set -e
+set -o pipefail
+
+PASS=1
+VERSION=$(mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -v '\[')
+JAR_FILE=examples/java/target/beam-examples-java-bundled-${VERSION}.jar
+
+function check_result_hash {
+  local name=$1
+  local outfile_prefix=$2
+  local expected=$3
+
+  local actual=$(LC_ALL=C sort $outfile_prefix-* | md5sum | awk '{print $1}' \
+    || LC_ALL=C sort $outfile_prefix-* | md5 -q) || exit 2  # OSX
+  if [[ "$actual" != "$expected" ]]
+  then
+    echo "FAIL $name: Output hash mismatch.  Got $actual, expected $expected."
+    PASS=""
+    echo "head hexdump of actual:"
+    head $outfile_prefix-* | hexdump -c
+  else
+    echo "pass $name"
+    # Output files are left behind in /tmp
+  fi
+}
+
+function get_outfile_prefix {
+  local name=$1
+  # NOTE: mktemp on OSX doesn't support --tmpdir
+  mktemp -u "/tmp/$name.out.XXXXXXXXXX"
+}
+
+function run_via_mvn {
+  local name=$1
+  local input=$2
+  local expected_hash=$3
+
+  local outfile_prefix="$(get_outfile_prefix "$name")" || exit 2
+  local cmd='mvn exec:java -f pom.xml -pl examples/java \
+    -Dexec.mainClass=org.apache.beam.examples.WordCount \
+    -Dexec.args="--runner=DirectRunner --inputFile='"$input"' --output='"$outfile_prefix"'"'
+  echo "$name: Running $cmd" >&2
+  sh -c "$cmd"
+  check_result_hash "$name" "$outfile_prefix" "$expected_hash"
+}
+
+function run_bundled {
+  local name=$1
+  local input=$2
+  local expected_hash=$3
+
+  local outfile_prefix="$(get_outfile_prefix "$name")" || exit 2
+  local cmd='java -cp '"$JAR_FILE"' \
+    org.apache.beam.examples.WordCount \
+    --runner=DirectRunner \
+    --inputFile='"'$input'"' \
+    --output='"$outfile_prefix"
+  echo "$name: Running $cmd" >&2
+  sh -c "$cmd"
+  check_result_hash "$name" "$outfile_prefix" "$expected_hash"
+}
+
+function run_all_ways {
+  local name=$1
+  local input=$2
+  local expected_hash=$3
+
+  run_via_mvn ${name}a "$input" $expected_hash
+  check_for_jar_file
+  run_bundled ${name}b "$input" $expected_hash
+}
+
+function check_for_jar_file {
+  if [[ ! -f $JAR_FILE ]]
+  then
+    echo "Jar file $JAR_FILE not created" >&2
+    exit 2
+  fi
+}
+
+run_all_ways wordcount1 "LICENSE" c5350a5ad4bb51e3e018612b4b044097
+run_all_ways wordcount2 "./LICENSE" c5350a5ad4bb51e3e018612b4b044097
+run_all_ways wordcount3 "$PWD/LICENSE" c5350a5ad4bb51e3e018612b4b044097
+run_all_ways wordcount4 "L*N?E*" c5350a5ad4bb51e3e018612b4b044097
+run_all_ways wordcount5 "./LICE*N?E" c5350a5ad4bb51e3e018612b4b044097
+run_all_ways wordcount6 "$PWD/*LIC?NSE" c5350a5ad4bb51e3e018612b4b044097
+
+if [[ ! "$PASS" ]]
+then
+  echo "One or more tests FAILED."
+  exit 1
+fi
+echo "All tests PASS"

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 2a75ebc..0b5d700 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -34,7 +34,7 @@ addons:
     - python2.7
 env:
   global:
-   - MAVEN_OVERRIDE="--settings=.travis/settings.xml"
+   - MAVEN_OVERRIDE="--settings=.test-infra/travis/settings.xml"
    - MAVEN_CONTAINER_OVERRIDE="-DbeamSurefireArgline='-Xmx512m'"
 
 matrix:
@@ -80,7 +80,7 @@ install:
 
 script:
   - if [ "$TEST_PYTHON" ]; then travis_retry $TOX_HOME/tox -c sdks/python/tox.ini; fi
-  - if [ ! "$TEST_PYTHON" ]; then travis_retry mvn --batch-mode --update-snapshots --no-snapshot-updates --threads 1C $MAVEN_OVERRIDE install && travis_retry bash -ex .travis/test_wordcount.sh; fi
+  - if [ ! "$TEST_PYTHON" ]; then travis_retry mvn --batch-mode --update-snapshots --no-snapshot-updates --threads 1C $MAVEN_OVERRIDE install && travis_retry bash -ex .test-infra/travis/test_wordcount.sh; fi
 
 cache:
   directories:

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.travis/README.md
----------------------------------------------------------------------
diff --git a/.travis/README.md b/.travis/README.md
deleted file mode 100644
index 526995a..0000000
--- a/.travis/README.md
+++ /dev/null
@@ -1,23 +0,0 @@
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
--->
-
-# Travis Scripts
-
-This directory contains scripts used for [Travis CI](https://travis-ci.org/apache/beam/)
-testing.

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.travis/settings.xml
----------------------------------------------------------------------
diff --git a/.travis/settings.xml b/.travis/settings.xml
deleted file mode 100644
index e086aec..0000000
--- a/.travis/settings.xml
+++ /dev/null
@@ -1,33 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    (the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
--->
-<settings>
-  <servers>
-    <server>
-      <id>central</id>
-      <configuration>
-        <httpConfiguration>
-          <all>
-            <connectionTimeout>1000</connectionTimeout>
-          </all>
-        </httpConfiguration>
-        <timeout>5000</timeout>
-      </configuration>
-    </server>
-  </servers>
-</settings>

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.travis/test_wordcount.sh
----------------------------------------------------------------------
diff --git a/.travis/test_wordcount.sh b/.travis/test_wordcount.sh
deleted file mode 100755
index e059a35..0000000
--- a/.travis/test_wordcount.sh
+++ /dev/null
@@ -1,125 +0,0 @@
-#!/bin/bash
-#
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-#
-
-# This script runs WordCount example locally in a few different ways.
-# Specifically, all combinations of:
-#  a) using mvn exec, or java -cp with a bundled jar file;
-#  b) input filename with no directory component, with a relative directory, or
-#     with an absolute directory; AND
-#  c) input filename containing wildcards or not.
-#
-# The one optional parameter is a path from the directory containing the script
-# to the directory containing the top-level (parent) pom.xml.  If no parameter
-# is provided, the script assumes that directory is equal to the directory
-# containing the script itself.
-#
-# The exit-code of the script indicates success or a failure.
-
-set -e
-set -o pipefail
-
-PASS=1
-VERSION=$(mvn org.apache.maven.plugins:maven-help-plugin:2.1.1:evaluate -Dexpression=project.version | grep -v '\[')
-JAR_FILE=examples/java/target/beam-examples-java-bundled-${VERSION}.jar
-
-function check_result_hash {
-  local name=$1
-  local outfile_prefix=$2
-  local expected=$3
-
-  local actual=$(LC_ALL=C sort $outfile_prefix-* | md5sum | awk '{print $1}' \
-    || LC_ALL=C sort $outfile_prefix-* | md5 -q) || exit 2  # OSX
-  if [[ "$actual" != "$expected" ]]
-  then
-    echo "FAIL $name: Output hash mismatch.  Got $actual, expected $expected."
-    PASS=""
-    echo "head hexdump of actual:"
-    head $outfile_prefix-* | hexdump -c
-  else
-    echo "pass $name"
-    # Output files are left behind in /tmp
-  fi
-}
-
-function get_outfile_prefix {
-  local name=$1
-  # NOTE: mktemp on OSX doesn't support --tmpdir
-  mktemp -u "/tmp/$name.out.XXXXXXXXXX"
-}
-
-function run_via_mvn {
-  local name=$1
-  local input=$2
-  local expected_hash=$3
-
-  local outfile_prefix="$(get_outfile_prefix "$name")" || exit 2
-  local cmd='mvn exec:java -f pom.xml -pl examples/java \
-    -Dexec.mainClass=org.apache.beam.examples.WordCount \
-    -Dexec.args="--runner=DirectRunner --inputFile='"$input"' --output='"$outfile_prefix"'"'
-  echo "$name: Running $cmd" >&2
-  sh -c "$cmd"
-  check_result_hash "$name" "$outfile_prefix" "$expected_hash"
-}
-
-function run_bundled {
-  local name=$1
-  local input=$2
-  local expected_hash=$3
-
-  local outfile_prefix="$(get_outfile_prefix "$name")" || exit 2
-  local cmd='java -cp '"$JAR_FILE"' \
-    org.apache.beam.examples.WordCount \
-    --runner=DirectRunner \
-    --inputFile='"'$input'"' \
-    --output='"$outfile_prefix"
-  echo "$name: Running $cmd" >&2
-  sh -c "$cmd"
-  check_result_hash "$name" "$outfile_prefix" "$expected_hash"
-}
-
-function run_all_ways {
-  local name=$1
-  local input=$2
-  local expected_hash=$3
-
-  run_via_mvn ${name}a "$input" $expected_hash
-  check_for_jar_file
-  run_bundled ${name}b "$input" $expected_hash
-}
-
-function check_for_jar_file {
-  if [[ ! -f $JAR_FILE ]]
-  then
-    echo "Jar file $JAR_FILE not created" >&2
-    exit 2
-  fi
-}
-
-run_all_ways wordcount1 "LICENSE" c5350a5ad4bb51e3e018612b4b044097
-run_all_ways wordcount2 "./LICENSE" c5350a5ad4bb51e3e018612b4b044097
-run_all_ways wordcount3 "$PWD/LICENSE" c5350a5ad4bb51e3e018612b4b044097
-run_all_ways wordcount4 "L*N?E*" c5350a5ad4bb51e3e018612b4b044097
-run_all_ways wordcount5 "./LICE*N?E" c5350a5ad4bb51e3e018612b4b044097
-run_all_ways wordcount6 "$PWD/*LIC?NSE" c5350a5ad4bb51e3e018612b4b044097
-
-if [[ ! "$PASS" ]]
-then
-  echo "One or more tests FAILED."
-  exit 1
-fi
-echo "All tests PASS"


[2/6] beam git commit: Move jdbc's postgres k8s scripts into shared k8s dir

Posted by tg...@apache.org.
Move jdbc's postgres k8s scripts into shared k8s dir


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/57588990
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/57588990
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/57588990

Branch: refs/heads/master
Commit: 57588990ae37024c1fa21c547b4626b4447107cf
Parents: 66b20af
Author: Stephen Sisk <si...@google.com>
Authored: Fri Apr 7 16:06:15 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Wed Apr 12 13:52:28 2017 -0700

----------------------------------------------------------------------
 .../postgres/postgres-service-for-local-dev.yml | 28 ++++++++++
 .test-infra/kubernetes/postgres/postgres.yml    | 56 ++++++++++++++++++++
 .../postgres-service-for-local-dev.yml          | 28 ----------
 .../src/test/resources/kubernetes/postgres.yml  | 56 --------------------
 .../jdbc/src/test/resources/kubernetes/setup.sh | 19 -------
 .../src/test/resources/kubernetes/teardown.sh   | 19 -------
 6 files changed, 84 insertions(+), 122 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/57588990/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml b/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml
new file mode 100644
index 0000000..5d2c664
--- /dev/null
+++ b/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml
@@ -0,0 +1,28 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: postgres-for-dev
+  labels:
+    name: postgres
+spec:
+  ports:
+    - port: 5432
+  selector:
+    name: postgres
+  type: LoadBalancer

http://git-wip-us.apache.org/repos/asf/beam/blob/57588990/.test-infra/kubernetes/postgres/postgres.yml
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/postgres/postgres.yml b/.test-infra/kubernetes/postgres/postgres.yml
new file mode 100644
index 0000000..6244968
--- /dev/null
+++ b/.test-infra/kubernetes/postgres/postgres.yml
@@ -0,0 +1,56 @@
+#    Licensed to the Apache Software Foundation (ASF) under one or more
+#    contributor license agreements.  See the NOTICE file distributed with
+#    this work for additional information regarding copyright ownership.
+#    The ASF licenses this file to You under the Apache License, Version 2.0
+#    (the "License"); you may not use this file except in compliance with
+#    the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+
+apiVersion: v1
+kind: Service
+metadata:
+  name: postgres
+  labels:
+    name: postgres
+spec:
+  ports:
+    - port: 5432
+      nodePort: 31234
+  selector:
+    name: postgres
+  type: NodePort
+
+---
+
+apiVersion: v1
+kind: ReplicationController
+metadata:
+  name: postgres
+spec:
+  replicas: 1
+  selector:
+    name: postgres
+  template:
+    metadata:
+      name: postgres
+      labels:
+        name: postgres
+    spec:
+      containers:
+        - name: postgres
+          image: postgres
+          env:
+            - name: POSTGRES_PASSWORD
+              value: uuinkks
+            - name: PGDATA
+              value: /var/lib/postgresql/data/pgdata
+          ports:
+            - containerPort: 5432

http://git-wip-us.apache.org/repos/asf/beam/blob/57588990/sdks/java/io/jdbc/src/test/resources/kubernetes/postgres-service-for-local-dev.yml
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/src/test/resources/kubernetes/postgres-service-for-local-dev.yml b/sdks/java/io/jdbc/src/test/resources/kubernetes/postgres-service-for-local-dev.yml
deleted file mode 100644
index 5d2c664..0000000
--- a/sdks/java/io/jdbc/src/test/resources/kubernetes/postgres-service-for-local-dev.yml
+++ /dev/null
@@ -1,28 +0,0 @@
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-
-
-apiVersion: v1
-kind: Service
-metadata:
-  name: postgres-for-dev
-  labels:
-    name: postgres
-spec:
-  ports:
-    - port: 5432
-  selector:
-    name: postgres
-  type: LoadBalancer

http://git-wip-us.apache.org/repos/asf/beam/blob/57588990/sdks/java/io/jdbc/src/test/resources/kubernetes/postgres.yml
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/src/test/resources/kubernetes/postgres.yml b/sdks/java/io/jdbc/src/test/resources/kubernetes/postgres.yml
deleted file mode 100644
index 6244968..0000000
--- a/sdks/java/io/jdbc/src/test/resources/kubernetes/postgres.yml
+++ /dev/null
@@ -1,56 +0,0 @@
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-
-
-apiVersion: v1
-kind: Service
-metadata:
-  name: postgres
-  labels:
-    name: postgres
-spec:
-  ports:
-    - port: 5432
-      nodePort: 31234
-  selector:
-    name: postgres
-  type: NodePort
-
----
-
-apiVersion: v1
-kind: ReplicationController
-metadata:
-  name: postgres
-spec:
-  replicas: 1
-  selector:
-    name: postgres
-  template:
-    metadata:
-      name: postgres
-      labels:
-        name: postgres
-    spec:
-      containers:
-        - name: postgres
-          image: postgres
-          env:
-            - name: POSTGRES_PASSWORD
-              value: uuinkks
-            - name: PGDATA
-              value: /var/lib/postgresql/data/pgdata
-          ports:
-            - containerPort: 5432

http://git-wip-us.apache.org/repos/asf/beam/blob/57588990/sdks/java/io/jdbc/src/test/resources/kubernetes/setup.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/src/test/resources/kubernetes/setup.sh b/sdks/java/io/jdbc/src/test/resources/kubernetes/setup.sh
deleted file mode 100644
index 76e4e34..0000000
--- a/sdks/java/io/jdbc/src/test/resources/kubernetes/setup.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-#
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-#
-
-kubectl create -f ./sdks/java/io/jdbc/src/test/resources/kubernetes/postgres.yml

http://git-wip-us.apache.org/repos/asf/beam/blob/57588990/sdks/java/io/jdbc/src/test/resources/kubernetes/teardown.sh
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/src/test/resources/kubernetes/teardown.sh b/sdks/java/io/jdbc/src/test/resources/kubernetes/teardown.sh
deleted file mode 100644
index 4fca74a..0000000
--- a/sdks/java/io/jdbc/src/test/resources/kubernetes/teardown.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-#
-#    Licensed to the Apache Software Foundation (ASF) under one or more
-#    contributor license agreements.  See the NOTICE file distributed with
-#    this work for additional information regarding copyright ownership.
-#    The ASF licenses this file to You under the Apache License, Version 2.0
-#    (the "License"); you may not use this file except in compliance with
-#    the License.  You may obtain a copy of the License at
-#
-#       http://www.apache.org/licenses/LICENSE-2.0
-#
-#    Unless required by applicable law or agreed to in writing, software
-#    distributed under the License is distributed on an "AS IS" BASIS,
-#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#    See the License for the specific language governing permissions and
-#    limitations under the License.
-#
-
-kubectl delete -f ./sdks/java/io/jdbc/src/test/resources/kubernetes/postgres.yml


[6/6] beam git commit: Move travis/jenkins folders in a test-infra folder

Posted by tg...@apache.org.
Move travis/jenkins folders in a test-infra folder


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/66b20af9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/66b20af9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/66b20af9

Branch: refs/heads/master
Commit: 66b20af9fed36ff419ca6da3de4b1b1a02e66464
Parents: 1788cef
Author: Stephen Sisk <si...@google.com>
Authored: Fri Apr 7 15:57:33 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Wed Apr 12 13:52:28 2017 -0700

----------------------------------------------------------------------
 .jenkins/common_job_properties.groovy           | 261 -------------------
 .../job_beam_PerformanceTests_Dataflow.groovy   |  43 ---
 .jenkins/job_beam_PerformanceTests_JDBC.groovy  |  60 -----
 .jenkins/job_beam_PerformanceTests_Spark.groovy |  44 ----
 ...job_beam_PostCommit_Java_MavenInstall.groovy |  42 ---
 ..._PostCommit_Java_ValidatesRunner_Apex.groovy |  48 ----
 ...tCommit_Java_ValidatesRunner_Dataflow.groovy |  45 ----
 ...PostCommit_Java_ValidatesRunner_Flink.groovy |  43 ---
 ...tCommit_Java_ValidatesRunner_Gearpump.groovy |  49 ----
 ...PostCommit_Java_ValidatesRunner_Spark.groovy |  44 ----
 .../job_beam_PostCommit_Python_Verify.groovy    |  55 ----
 .../job_beam_PreCommit_Java_MavenInstall.groovy |  42 ---
 .../job_beam_PreCommit_Website_Stage.groovy     |  80 ------
 .jenkins/job_beam_PreCommit_Website_Test.groovy |  65 -----
 .../job_beam_Release_NightlySnapshot.groovy     |  45 ----
 .jenkins/job_seed.groovy                        |  53 ----
 .../jenkins/common_job_properties.groovy        | 261 +++++++++++++++++++
 .../job_beam_PerformanceTests_Dataflow.groovy   |  43 +++
 .../job_beam_PerformanceTests_JDBC.groovy       |  60 +++++
 .../job_beam_PerformanceTests_Spark.groovy      |  44 ++++
 ...job_beam_PostCommit_Java_MavenInstall.groovy |  42 +++
 ..._PostCommit_Java_ValidatesRunner_Apex.groovy |  48 ++++
 ...tCommit_Java_ValidatesRunner_Dataflow.groovy |  45 ++++
 ...PostCommit_Java_ValidatesRunner_Flink.groovy |  43 +++
 ...tCommit_Java_ValidatesRunner_Gearpump.groovy |  49 ++++
 ...PostCommit_Java_ValidatesRunner_Spark.groovy |  44 ++++
 .../job_beam_PostCommit_Python_Verify.groovy    |  55 ++++
 .../job_beam_PreCommit_Java_MavenInstall.groovy |  42 +++
 .../job_beam_PreCommit_Website_Stage.groovy     |  80 ++++++
 .../job_beam_PreCommit_Website_Test.groovy      |  65 +++++
 .../job_beam_Release_NightlySnapshot.groovy     |  45 ++++
 .test-infra/jenkins/job_seed.groovy             |  53 ++++
 .test-infra/travis/README.md                    |  23 ++
 .test-infra/travis/settings.xml                 |  33 +++
 .test-infra/travis/test_wordcount.sh            | 125 +++++++++
 .travis.yml                                     |   4 +-
 .travis/README.md                               |  23 --
 .travis/settings.xml                            |  33 ---
 .travis/test_wordcount.sh                       | 125 ---------
 39 files changed, 1202 insertions(+), 1202 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/common_job_properties.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/common_job_properties.groovy b/.jenkins/common_job_properties.groovy
deleted file mode 100644
index ee10281..0000000
--- a/.jenkins/common_job_properties.groovy
+++ /dev/null
@@ -1,261 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Contains functions that help build Jenkins projects. Functions typically set
-// common properties that are shared among all Jenkins projects.
-// Code in this directory should conform to the Groovy style guide.
-//  http://groovy-lang.org/style-guide.html
-class common_job_properties {
-
-  // Sets common top-level job properties for website repository jobs.
-  static void setTopLevelWebsiteJobProperties(context) {
-    setTopLevelJobProperties(context, 'beam-site', 'asf-site', 30)
-  }
-
-  // Sets common top-level job properties for main repository jobs.
-  static void setTopLevelMainJobProperties(context,
-                                           String defaultBranch = 'master',
-                                           int defaultTimeout = 100) {
-    setTopLevelJobProperties(context, 'beam', defaultBranch, defaultTimeout)
-  }
-
-  // Sets common top-level job properties. Accessed through one of the above
-  // methods to protect jobs from internal details of param defaults.
-  private static void setTopLevelJobProperties(context,
-                                               String repositoryName,
-                                               String defaultBranch,
-                                               int defaultTimeout) {
-
-    // GitHub project.
-    context.properties {
-      githubProjectUrl('https://github.com/apache/' + repositoryName + '/')
-    }
-
-    // Set JDK version.
-    context.jdk('JDK 1.8 (latest)')
-
-    // Restrict this project to run only on Jenkins executors dedicated to the
-    // Apache Beam project.
-    context.label('beam')
-
-    // Discard old builds. Build records are only kept up to this number of days.
-    context.logRotator {
-      daysToKeep(14)
-    }
-
-    // Source code management.
-    context.scm {
-      git {
-        remote {
-          url('https://github.com/apache/' + repositoryName + '.git')
-          refspec('+refs/heads/*:refs/remotes/origin/* ' +
-                  '+refs/pull/*:refs/remotes/origin/pr/*')
-        }
-        branch('${sha1}')
-        extensions {
-          cleanAfterCheckout()
-          pruneBranches()
-        }
-      }
-    }
-
-    context.parameters {
-      // This is a recommended setup if you want to run the job manually. The
-      // ${sha1} parameter needs to be provided, and defaults to the main branch.
-      stringParam(
-          'sha1',
-          defaultBranch,
-          'Commit id or refname (eg: origin/pr/9/head) you want to build.')
-    }
-
-    context.wrappers {
-      // Abort the build if it's stuck for more minutes than specified.
-      timeout {
-        absolute(defaultTimeout)
-        abortBuild()
-      }
-
-      // Set SPARK_LOCAL_IP for spark tests.
-      environmentVariables {
-        env('SPARK_LOCAL_IP', '127.0.0.1')
-      }
-      credentialsBinding {
-        string("COVERALLS_REPO_TOKEN", "beam-coveralls-token")
-      }
-    }
-  }
-
-  // Sets the pull request build trigger. Accessed through precommit methods
-  // below to insulate callers from internal parameter defaults.
-  private static void setPullRequestBuildTrigger(context,
-                                                 String commitStatusContext,
-                                                 String successComment = '--none--',
-                                                 String prTriggerPhrase = '') {
-    context.triggers {
-      githubPullRequest {
-        admins(['asfbot'])
-        useGitHubHooks()
-        orgWhitelist(['apache'])
-        allowMembersOfWhitelistedOrgsAsAdmin()
-        permitAll()
-        // prTriggerPhrase is the argument which gets set when we want to allow
-        // post-commit builds to run against pending pull requests. This block
-        // overrides the default trigger phrase with the new one. Setting this
-        // will disable automatic invocation of this build; the phrase will be
-        // required to start it.
-        if (prTriggerPhrase) {
-          triggerPhrase(prTriggerPhrase)
-          onlyTriggerPhrase()
-        }
-
-        extensions {
-          commitStatus {
-            // This is the name that will show up in the GitHub pull request UI
-            // for this Jenkins project.
-            delegate.context("Jenkins: " + commitStatusContext)
-          }
-
-          /*
-            This section is disabled, because of jenkinsci/ghprb-plugin#417 issue.
-            For the time being, an equivalent configure section below is added.
-
-          // Comment messages after build completes.
-          buildStatus {
-            completedStatus('SUCCESS', successComment)
-            completedStatus('FAILURE', '--none--')
-            completedStatus('ERROR', '--none--')
-          }
-          */
-        }
-      }
-    }
-
-    // Comment messages after build completes.
-    context.configure {
-      def messages = it / triggers / 'org.jenkinsci.plugins.ghprb.GhprbTrigger' / extensions / 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildStatus' / messages
-      messages << 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildResultMessage' {
-        message(successComment)
-        result('SUCCESS')
-      }
-      messages << 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildResultMessage' {
-        message('--none--')
-        result('ERROR')
-      }
-      messages << 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildResultMessage' {
-        message('--none--')
-        result('FAILURE')
-      }
-    }
-  }
-
-  // Sets common config for Maven jobs.
-  static void setMavenConfig(context) {
-    context.mavenInstallation('Maven 3.3.3')
-    context.mavenOpts('-Dorg.slf4j.simpleLogger.showDateTime=true')
-    context.mavenOpts('-Dorg.slf4j.simpleLogger.dateTimeFormat=yyyy-MM-dd\\\'T\\\'HH:mm:ss.SSS')
-    // The -XX:+TieredCompilation -XX:TieredStopAtLevel=1 JVM options enable
-    // tiered compilation to make the JVM startup times faster during the tests.
-    context.mavenOpts('-XX:+TieredCompilation')
-    context.mavenOpts('-XX:TieredStopAtLevel=1')
-    context.rootPOM('pom.xml')
-    // Use a repository local to the workspace for better isolation of jobs.
-    context.localRepository(LocalRepositoryLocation.LOCAL_TO_WORKSPACE)
-    // Disable archiving the built artifacts by default, as this is slow and flaky.
-    // We can usually recreate them easily, and we can also opt-in individual jobs
-    // to artifact archiving.
-    context.archivingDisabled(true)
-  }
-
-  // Sets common config for PreCommit jobs.
-  static void setPreCommit(context,
-                           String commitStatusName,
-                           String successComment = '--none--') {
-    // Set pull request build trigger.
-    setPullRequestBuildTrigger(context, commitStatusName, successComment)
-  }
-
-  // Enable triggering postcommit runs against pull requests. Users can comment the trigger phrase
-  // specified in the postcommit job and have the job run against their PR to run
-  // tests not in the presubmit suite for additional confidence.
-  static void enablePhraseTriggeringFromPullRequest(context,
-                                                    String commitStatusName,
-                                                    String prTriggerPhrase) {
-    setPullRequestBuildTrigger(
-      context,
-      commitStatusName,
-      '--none--',
-      prTriggerPhrase)
-  }
-
-  // Sets common config for PostCommit jobs.
-  static void setPostCommit(context,
-                            String buildSchedule = '0 */6 * * *',
-                            boolean triggerEveryPush = true,
-                            String notifyAddress = 'commits@beam.apache.org',
-                            boolean emailIndividuals = true) {
-    // Set build triggers
-    context.triggers {
-      // By default runs every 6 hours.
-      cron(buildSchedule)
-      if (triggerEveryPush) {
-        githubPush()
-      }
-    }
-
-    context.publishers {
-      // Notify an email address for each failed build (defaults to commits@).
-      mailer(notifyAddress, false, emailIndividuals)
-    }
-  }
-
-  // Configures the argument list for performance tests, adding the standard
-  // performance test job arguments.
-  private static def genPerformanceArgs(def argMap) {
-    def standard_args = [
-      project: 'apache-beam-testing',
-      dpb_log_level: 'INFO',
-      maven_binary: '/home/jenkins/tools/maven/latest/bin/mvn',
-      bigquery_table: 'beam_performance.pkb_results',
-      // Publishes results with official tag, for use in dashboards.
-      official: 'true'
-    ]
-    // Note: in case of key collision, keys present in ArgMap win.
-    def joined_args = standard_args.plus(argMap)
-    def argList = []
-    joined_args.each({
-        // FYI: Replacement only works with double quotes.
-        key, value -> argList.add("--$key=$value")
-    })
-    return argList.join(' ')
-  }
-
-  // Adds the standard performance test job steps.
-  static def buildPerformanceTest(def context, def argMap) {
-    def pkbArgs = genPerformanceArgs(argMap)
-    context.steps {
-        // Clean up environment.
-        shell('rm -rf PerfKitBenchmarker')
-        // Clone appropriate perfkit branch
-        shell('git clone https://github.com/GoogleCloudPlatform/PerfKitBenchmarker.git')
-        // Install job requirements.
-        shell('pip install --user -r PerfKitBenchmarker/requirements.txt')
-        // Launch performance test.
-        shell("python PerfKitBenchmarker/pkb.py $pkbArgs")
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PerformanceTests_Dataflow.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PerformanceTests_Dataflow.groovy b/.jenkins/job_beam_PerformanceTests_Dataflow.groovy
deleted file mode 100644
index 51c73f3..0000000
--- a/.jenkins/job_beam_PerformanceTests_Dataflow.groovy
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This job runs the Beam performance tests on PerfKit Benchmarker.
-job('beam_PerformanceTests_Dataflow'){
-    // Set default Beam job properties.
-    common_job_properties.setTopLevelMainJobProperties(delegate)
-
-    // Run job in postcommit every 6 hours, don't trigger every push, and
-    // don't email individual committers.
-    common_job_properties.setPostCommit(
-        delegate,
-        '0 */6 * * *',
-        false,
-        'commits@beam.apache.org',
-        false)
-
-    def argMap = [
-      benchmarks: 'dpb_wordcount_benchmark',
-      dpb_dataflow_staging_location: 'gs://temp-storage-for-perf-tests/staging',
-      dpb_wordcount_input: 'dataflow-samples/shakespeare/kinglear.txt',
-      config_override: 'dpb_wordcount_benchmark.dpb_service.service_type=dataflow'
-    ]
-
-    common_job_properties.buildPerformanceTest(delegate, argMap)
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PerformanceTests_JDBC.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PerformanceTests_JDBC.groovy b/.jenkins/job_beam_PerformanceTests_JDBC.groovy
deleted file mode 100644
index 8e581c2..0000000
--- a/.jenkins/job_beam_PerformanceTests_JDBC.groovy
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This job runs the Beam performance tests on PerfKit Benchmarker.
-job('beam_PerformanceTests_JDBC'){
-    // Set default Beam job properties.
-    common_job_properties.setTopLevelMainJobProperties(delegate)
-
-    // Run job in postcommit every 6 hours, don't trigger every push, and
-    // don't email individual committers.
-    common_job_properties.setPostCommit(
-        delegate,
-        '0 */6 * * *',
-        false,
-        'commits@beam.apache.org',
-        false)
-
-    def pipelineArgs = [
-        tempRoot: 'gs://temp-storage-for-end-to-end-tests',
-        project: 'apache-beam-testing',
-        postgresServerName: '10.36.0.11',
-        postgresUsername: 'postgres',
-        postgresDatabaseName: 'postgres',
-        postgresPassword: 'uuinkks',
-        postgresSsl: 'false'
-    ]
-    def pipelineArgList = []
-    pipelineArgs.each({
-        key, value -> pipelineArgList.add("--$key=$value")
-    })
-    def pipelineArgsJoined = pipelineArgList.join(',')
-
-    def argMap = [
-      benchmarks: 'beam_integration_benchmark',
-      beam_it_module: 'sdks/java/io/jdbc',
-      beam_it_args: pipelineArgsJoined,
-      beam_it_class: 'org.apache.beam.sdk.io.jdbc.JdbcIOIT',
-      // Profile is located in $BEAM_ROOT/sdks/java/io/pom.xml.
-      beam_it_profile: 'io-it'
-    ]
-
-    common_job_properties.buildPerformanceTest(delegate, argMap)
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PerformanceTests_Spark.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PerformanceTests_Spark.groovy b/.jenkins/job_beam_PerformanceTests_Spark.groovy
deleted file mode 100644
index ba719bf..0000000
--- a/.jenkins/job_beam_PerformanceTests_Spark.groovy
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This job runs the Beam performance tests on PerfKit Benchmarker.
-job('beam_PerformanceTests_Spark'){
-    // Set default Beam job properties.
-    common_job_properties.setTopLevelMainJobProperties(delegate)
-
-    // Run job in postcommit every 6 hours, don't trigger every push, and
-    // don't email individual committers.
-    common_job_properties.setPostCommit(
-        delegate,
-        '0 */6 * * *',
-        false,
-        'commits@beam.apache.org',
-        false)
-
-    def argMap = [
-      benchmarks: 'dpb_wordcount_benchmark',
-      // There are currently problems uploading to Dataproc, so we use a file
-      // already present on the machines as input.
-      dpb_wordcount_input: '/etc/hosts',
-      config_override: 'dpb_wordcount_benchmark.dpb_service.service_type=dataproc'
-    ]
-
-    common_job_properties.buildPerformanceTest(delegate, argMap)
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PostCommit_Java_MavenInstall.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PostCommit_Java_MavenInstall.groovy b/.jenkins/job_beam_PostCommit_Java_MavenInstall.groovy
deleted file mode 100644
index a288a84..0000000
--- a/.jenkins/job_beam_PostCommit_Java_MavenInstall.groovy
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This job runs the Java postcommit tests, including the suite of integration
-// tests.
-mavenJob('beam_PostCommit_Java_MavenInstall') {
-  description('Runs postcommit tests on the Java SDK.')
-
-  previousNames('beam_PostCommit_MavenVerify')
-
-  // Execute concurrent builds if necessary.
-  concurrentBuild()
-
-  // Set common parameters.
-  common_job_properties.setTopLevelMainJobProperties(delegate)
-
-  // Set maven parameters.
-  common_job_properties.setMavenConfig(delegate)
-
-  // Sets that this is a PostCommit job.
-  common_job_properties.setPostCommit(delegate)
-
-  // Maven goals for this job.
-  goals('-B -e -P release,dataflow-runner clean install coveralls:report -DrepoToken=$COVERALLS_REPO_TOKEN -DskipITs=false -DintegrationTestPipelineOptions=\'[ "--project=apache-beam-testing", "--tempRoot=gs://temp-storage-for-end-to-end-tests", "--runner=org.apache.beam.runners.dataflow.testing.TestDataflowRunner" ]\'')
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Apex.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Apex.groovy b/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Apex.groovy
deleted file mode 100644
index c16a1e2..0000000
--- a/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Apex.groovy
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This job runs the suite of ValidatesRunner tests against the Apex runner.
-mavenJob('beam_PostCommit_Java_ValidatesRunner_Apex') {
-  description('Runs the ValidatesRunner suite on the Apex runner.')
-  previousNames('beam_PostCommit_Java_RunnableOnService_Apex')
-
-  // Set common parameters.
-  common_job_properties.setTopLevelMainJobProperties(delegate)
-
-  // Set maven parameters.
-  common_job_properties.setMavenConfig(delegate)
-
-  // Sets that this is a PostCommit job.
-  common_job_properties.setPostCommit(delegate)
-
-  // Allows triggering this build against pull requests.
-  common_job_properties.enablePhraseTriggeringFromPullRequest(
-    delegate,
-    'Apache Apex Runner ValidatesRunner Tests',
-    'Run Apex ValidatesRunner')
-
-  // Maven goals for this job.
-  goals('''clean verify --projects runners/apex \
-      --also-make \
-      --batch-mode \
-      --errors \
-      --activate-profiles validates-runner-tests \
-      --activate-profiles local-validates-runner-tests''')
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Dataflow.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Dataflow.groovy b/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Dataflow.groovy
deleted file mode 100644
index 33235ff..0000000
--- a/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Dataflow.groovy
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This job runs the suite of ValidatesRunner tests against the Dataflow
-// runner.
-mavenJob('beam_PostCommit_Java_ValidatesRunner_Dataflow') {
-  description('Runs the ValidatesRunner suite on the Dataflow runner.')
-  previousNames('beam_PostCommit_Java_RunnableOnService_Dataflow')
-
-
-  // Set common parameters.
-  common_job_properties.setTopLevelMainJobProperties(delegate, 'master', 120)
-
-  // Set maven parameters.
-  common_job_properties.setMavenConfig(delegate)
-
-  // Sets that this is a PostCommit job.
-  common_job_properties.setPostCommit(delegate)
-
-  // Allows triggering this build against pull requests.
-  common_job_properties.enablePhraseTriggeringFromPullRequest(
-    delegate,
-    'Google Cloud Dataflow Runner ValidatesRunner Tests',
-    'Run Dataflow ValidatesRunner')
-
-  // Maven goals for this job.
-  goals('-B -e clean verify -am -pl runners/google-cloud-dataflow-java -DforkCount=0 -DvalidatesRunnerPipelineOptions=\'[ "--runner=org.apache.beam.runners.dataflow.testing.TestDataflowRunner", "--project=apache-beam-testing", "--tempRoot=gs://temp-storage-for-validates-runner-tests/" ]\'')
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy b/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy
deleted file mode 100644
index 411106d..0000000
--- a/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This job runs the suite of ValidatesRunner tests against the Flink runner.
-mavenJob('beam_PostCommit_Java_ValidatesRunner_Flink') {
-  description('Runs the ValidatesRunner suite on the Flink runner.')
-  previousNames('beam_PostCommit_Java_RunnableOnService_Flink')
-
-  // Set common parameters.
-  common_job_properties.setTopLevelMainJobProperties(delegate)
-
-  // Set maven parameters.
-  common_job_properties.setMavenConfig(delegate)
-
-  // Sets that this is a PostCommit job.
-  common_job_properties.setPostCommit(delegate)
-
-  // Allows triggering this build against pull requests.
-  common_job_properties.enablePhraseTriggeringFromPullRequest(
-    delegate,
-    'Apache Flink Runner ValidatesRunner Tests',
-    'Run Flink ValidatesRunner')
-
-  // Maven goals for this job.
-  goals('-B -e clean verify -am -pl runners/flink/runner -Plocal-validates-runner-tests -Pvalidates-runner-tests')
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Gearpump.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Gearpump.groovy b/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Gearpump.groovy
deleted file mode 100644
index 1348a19..0000000
--- a/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Gearpump.groovy
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This job runs the suite of ValidatesRunner tests against the Gearpump
-// runner.
-mavenJob('beam_PostCommit_Java_ValidatesRunner_Gearpump') {
-  description('Runs the ValidatesRunner suite on the Gearpump runner.')
-
-  previousNames('beam_PostCommit_Java_RunnableOnService_Gearpump')
-
-  // Set common parameters.
-  common_job_properties.setTopLevelMainJobProperties(
-      delegate,
-      'gearpump-runner')
-
-  // Set maven parameters.
-  common_job_properties.setMavenConfig(delegate)
-
-  // Sets that this is a PostCommit job.
-  // 0 5 31 2 * will run on Feb 31 (i.e. never) according to job properties.
-  // In post-commit this job triggers only on SCM changes.
-  common_job_properties.setPostCommit(delegate, '0 5 31 2 *')
-
-  // Allows triggering this build against pull requests.
-  common_job_properties.enablePhraseTriggeringFromPullRequest(
-    delegate,
-    'Apache Gearpump Runner ValidatesRunner Tests',
-    'Run Gearpump ValidatesRunner')
-
-  // Maven goals for this job.
-  goals('-B -e clean verify -am -pl runners/gearpump -DforkCount=0 -DvalidatesRunnerPipelineOptions=\'[ "--runner=TestGearpumpRunner", "--streaming=false" ]\'')
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Spark.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Spark.groovy b/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Spark.groovy
deleted file mode 100644
index 9fbc219..0000000
--- a/.jenkins/job_beam_PostCommit_Java_ValidatesRunner_Spark.groovy
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This job runs the suite of ValidatesRunner tests against the Spark runner.
-mavenJob('beam_PostCommit_Java_ValidatesRunner_Spark') {
-  description('Runs the ValidatesRunner suite on the Spark runner.')
-
-  previousNames('beam_PostCommit_Java_RunnableOnService_Spark')
-
-  // Set common parameters.
-  common_job_properties.setTopLevelMainJobProperties(delegate)
-
-  // Set maven parameters.
-  common_job_properties.setMavenConfig(delegate)
-
-  // Sets that this is a PostCommit job.
-  common_job_properties.setPostCommit(delegate)
-
-  // Allows triggering this build against pull requests.
-  common_job_properties.enablePhraseTriggeringFromPullRequest(
-    delegate,
-    'Apache Spark Runner ValidatesRunner Tests',
-    'Run Spark ValidatesRunner')
-
-  // Maven goals for this job.
-  goals('-B -e clean verify -am -pl runners/spark -Pvalidates-runner-tests -Plocal-validates-runner-tests -Dspark.ui.enabled=false')
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PostCommit_Python_Verify.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PostCommit_Python_Verify.groovy b/.jenkins/job_beam_PostCommit_Python_Verify.groovy
deleted file mode 100644
index 28cf77e..0000000
--- a/.jenkins/job_beam_PostCommit_Python_Verify.groovy
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This job defines the Python postcommit tests.
-job('beam_PostCommit_Python_Verify') {
-  description('Runs postcommit tests on the Python SDK.')
-
-  previousNames('beam_PostCommit_PythonVerify')
-
-  // Set common parameters.
-  common_job_properties.setTopLevelMainJobProperties(delegate)
-
-  // Sets that this is a PostCommit job.
-  common_job_properties.setPostCommit(delegate, '0 3-22/6 * * *')
-
-  // Allows triggering this build against pull requests.
-  common_job_properties.enablePhraseTriggeringFromPullRequest(
-    delegate,
-    'Python SDK PostCommit Tests',
-    'Run Python PostCommit')
-
-  // Allow the test to only run on particular nodes
-  // TODO(BEAM-1817): Remove once the tests can run on all nodes
-  parameters {
-      nodeParam('TEST_HOST') {
-          description('select test host as either beam1, 2 or 3')
-          defaultNodes(['beam3'])
-          allowedNodes(['beam1', 'beam2', 'beam3'])
-          trigger('multiSelectionDisallowed')
-          eligibility('IgnoreOfflineNodeEligibility')
-      }
-  }
-
-  // Execute shell command to test Python SDK.
-  steps {
-    shell('bash sdks/python/run_postcommit.sh')
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PreCommit_Java_MavenInstall.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PreCommit_Java_MavenInstall.groovy b/.jenkins/job_beam_PreCommit_Java_MavenInstall.groovy
deleted file mode 100644
index 3718551..0000000
--- a/.jenkins/job_beam_PreCommit_Java_MavenInstall.groovy
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This is the Java precommit which runs a maven install, and the current set
-// of precommit tests.
-mavenJob('beam_PreCommit_Java_MavenInstall') {
-  description('Runs an install of the current GitHub Pull Request.')
-
-  previousNames('beam_PreCommit_MavenVerify')
-
-  // Execute concurrent builds if necessary.
-  concurrentBuild()
-
-  // Set common parameters.
-  common_job_properties.setTopLevelMainJobProperties(delegate)
-
-  // Set Maven parameters.
-  common_job_properties.setMavenConfig(delegate)
-
-  // Sets that this is a PreCommit job.
-  common_job_properties.setPreCommit(delegate, 'Maven clean install')
-
-  // Maven goals for this job.
-  goals('-B -e -Prelease,include-runners,jenkins-precommit,direct-runner,dataflow-runner,spark-runner,flink-runner,apex-runner -DrepoToken=$COVERALLS_REPO_TOKEN -DpullRequest=$ghprbPullId help:effective-settings clean install coveralls:report')
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PreCommit_Website_Stage.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PreCommit_Website_Stage.groovy b/.jenkins/job_beam_PreCommit_Website_Stage.groovy
deleted file mode 100644
index 7c64f11..0000000
--- a/.jenkins/job_beam_PreCommit_Website_Stage.groovy
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// Defines a job.
-job('beam_PreCommit_Website_Stage') {
-  description('Stages the pull requests proposed for the Apache Beam ' +
-              'website to a temporary location to ease reviews.')
-
-  // Set common parameters.
-  common_job_properties.setTopLevelWebsiteJobProperties(delegate)
-
-  // Set pull request build trigger.
-  common_job_properties.setPreCommit(
-      delegate,
-      'Automatic staging of pull requests',
-      '\nJenkins built the site at commit id ${ghprbActualCommit} with ' +
-      'Jekyll and staged it [here](http://apache-beam-website-pull-' +
-      'requests.storage.googleapis.com/${ghprbPullId}/index.html). ' +
-      'Happy reviewing.\n\nNote that any previous site has been deleted. ' +
-      'This staged site will be automatically deleted after its TTL ' +
-      'expires. Push any commit to the pull request branch or re-trigger ' +
-      'the build to get it staged again.')
-
-  steps {
-    // Run the following shell script as a build step.
-    shell '''
-        # Install RVM.
-        gpg --keyserver hkp://keys.gnupg.net --recv-keys \\
-            409B6B1796C275462A1703113804BB82D39DC0E3
-        \\curl -sSL https://get.rvm.io | bash
-        source /home/jenkins/.rvm/scripts/rvm
-
-        # Install Ruby.
-        RUBY_VERSION_NUM=2.3.0
-        rvm install ruby $RUBY_VERSION_NUM --autolibs=read-only
-
-        # Install Bundler gem
-        PATH=~/.gem/ruby/$RUBY_VERSION_NUM/bin:$PATH
-        GEM_PATH=~/.gem/ruby/$RUBY_VERSION_NUM/:$GEM_PATH
-        gem install bundler --user-install
-
-        # Install all needed gems.
-        bundle install --path ~/.gem/
-
-        # Remove current site if it exists.
-        GCS_PATH="gs://apache-beam-website-pull-requests/${ghprbPullId}/"
-        gsutil -m rm -r -f ${GCS_PATH} || true
-
-        # Build the new site with the baseurl specified.
-        rm -fr ./content/
-        bundle exec jekyll build --baseurl=/${ghprbPullId}
-
-        # Install BeautifulSoup HTML Parser for python.
-        pip install --user beautifulsoup4
-
-        # Fix links on staged website.
-        python .jenkins/append_index_html_to_internal_links.py
-
-        # Upload the new site.
-        gsutil -m cp -R ./content/* ${GCS_PATH}
-    '''.stripIndent().trim()
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_PreCommit_Website_Test.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_PreCommit_Website_Test.groovy b/.jenkins/job_beam_PreCommit_Website_Test.groovy
deleted file mode 100644
index 421b58a..0000000
--- a/.jenkins/job_beam_PreCommit_Website_Test.groovy
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// Defines a job.
-job('beam_PreCommit_Website_Test') {
-  description('Runs tests on the pull requests proposed for the Apache Beam ' +
-              'website.')
-
-  // Set common parameters.
-  common_job_properties.setTopLevelWebsiteJobProperties(delegate)
-
-  // Execute concurrent builds. Multiple builds of this project may be executed
-  // in parallel. This is safe because this build does not require exclusive
-  // access to any shared resources.
-  concurrentBuild()
-
-  // Set pull request build trigger.
-  common_job_properties.setPreCommit(
-      delegate,
-      'Test website (dead links, etc.)')
-
-  steps {
-    // Run the following shell script as a build step.
-    shell '''
-        # Install RVM.
-        gpg --keyserver hkp://keys.gnupg.net --recv-keys \\
-            409B6B1796C275462A1703113804BB82D39DC0E3
-        \\curl -sSL https://get.rvm.io | bash
-        source /home/jenkins/.rvm/scripts/rvm
-
-        # Install Ruby.
-        RUBY_VERSION_NUM=2.3.0
-        rvm install ruby $RUBY_VERSION_NUM --autolibs=read-only
-
-        # Install Bundler gem
-        PATH=~/.gem/ruby/$RUBY_VERSION_NUM/bin:$PATH
-        GEM_PATH=~/.gem/ruby/$RUBY_VERSION_NUM/:$GEM_PATH
-        gem install bundler --user-install
-
-        # Install all needed gems.
-        bundle install --path ~/.gem/
-
-        # Build the new site and test it.
-        rm -fr ./content/
-        bundle exec rake test
-    '''.stripIndent().trim()
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_beam_Release_NightlySnapshot.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_beam_Release_NightlySnapshot.groovy b/.jenkins/job_beam_Release_NightlySnapshot.groovy
deleted file mode 100644
index f2c3ff0..0000000
--- a/.jenkins/job_beam_Release_NightlySnapshot.groovy
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// This is the nightly snapshot build -- we use this to deploy a daily snapshot
-// to https://repository.apache.org/content/groups/snapshots/org/apache/beam.
-// Runs the postsubmit suite before deploying.
-mavenJob('beam_Release_NightlySnapshot') {
-  description('Runs a mvn clean deploy of the nightly snapshot.')
-
-  // Execute concurrent builds if necessary.
-  concurrentBuild()
-
-  // Set common parameters.
-  common_job_properties.setTopLevelMainJobProperties(delegate)
-
-  // Set maven paramaters.
-  common_job_properties.setMavenConfig(delegate)
-
-  // This is a post-commit job that runs once per day, not for every push.
-  common_job_properties.setPostCommit(
-      delegate,
-      '0 7 * * *',
-      false,
-      'dev@beam.apache.org')
-
-  // Maven goals for this job.
-  goals('-B -e clean deploy -P release,dataflow-runner -DskipITs=false -DintegrationTestPipelineOptions=\'[ "--project=apache-beam-testing", "--tempRoot=gs://temp-storage-for-end-to-end-tests", "--runner=org.apache.beam.runners.dataflow.testing.TestDataflowRunner" ]\'')
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.jenkins/job_seed.groovy
----------------------------------------------------------------------
diff --git a/.jenkins/job_seed.groovy b/.jenkins/job_seed.groovy
deleted file mode 100644
index 7e5272b..0000000
--- a/.jenkins/job_seed.groovy
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import common_job_properties
-
-// Defines the seed job, which creates or updates all other Jenkins projects.
-job('beam_SeedJob') {
-  description('Automatically configures all Apache Beam Jenkins projects based' +
-              ' on Jenkins DSL groovy files checked into the code repository.')
-
-  previousNames('beam_SeedJob_Main')
-
-  // Set common parameters.
-  common_job_properties.setTopLevelMainJobProperties(delegate)
-
-  // This is a post-commit job that runs once per day, not for every push.
-  common_job_properties.setPostCommit(
-      delegate,
-      '0 6 * * *',
-      false,
-      'dev@beam.apache.org')
-
-  // Allows triggering this build against pull requests.
-  common_job_properties.enablePhraseTriggeringFromPullRequest(
-    delegate,
-    'Seed Job',
-    'Run Seed Job')
-
-  steps {
-    dsl {
-      // A list or a glob of other groovy files to process.
-      external('.jenkins/job_*.groovy')
-
-      // If a job is removed from the script, disable it (rather than deleting).
-      removeAction('DISABLE')
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/common_job_properties.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/common_job_properties.groovy b/.test-infra/jenkins/common_job_properties.groovy
new file mode 100644
index 0000000..ee10281
--- /dev/null
+++ b/.test-infra/jenkins/common_job_properties.groovy
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Contains functions that help build Jenkins projects. Functions typically set
+// common properties that are shared among all Jenkins projects.
+// Code in this directory should conform to the Groovy style guide.
+//  http://groovy-lang.org/style-guide.html
+class common_job_properties {
+
+  // Sets common top-level job properties for website repository jobs.
+  static void setTopLevelWebsiteJobProperties(context) {
+    setTopLevelJobProperties(context, 'beam-site', 'asf-site', 30)
+  }
+
+  // Sets common top-level job properties for main repository jobs.
+  static void setTopLevelMainJobProperties(context,
+                                           String defaultBranch = 'master',
+                                           int defaultTimeout = 100) {
+    setTopLevelJobProperties(context, 'beam', defaultBranch, defaultTimeout)
+  }
+
+  // Sets common top-level job properties. Accessed through one of the above
+  // methods to protect jobs from internal details of param defaults.
+  private static void setTopLevelJobProperties(context,
+                                               String repositoryName,
+                                               String defaultBranch,
+                                               int defaultTimeout) {
+
+    // GitHub project.
+    context.properties {
+      githubProjectUrl('https://github.com/apache/' + repositoryName + '/')
+    }
+
+    // Set JDK version.
+    context.jdk('JDK 1.8 (latest)')
+
+    // Restrict this project to run only on Jenkins executors dedicated to the
+    // Apache Beam project.
+    context.label('beam')
+
+    // Discard old builds. Build records are only kept up to this number of days.
+    context.logRotator {
+      daysToKeep(14)
+    }
+
+    // Source code management.
+    context.scm {
+      git {
+        remote {
+          url('https://github.com/apache/' + repositoryName + '.git')
+          refspec('+refs/heads/*:refs/remotes/origin/* ' +
+                  '+refs/pull/*:refs/remotes/origin/pr/*')
+        }
+        branch('${sha1}')
+        extensions {
+          cleanAfterCheckout()
+          pruneBranches()
+        }
+      }
+    }
+
+    context.parameters {
+      // This is a recommended setup if you want to run the job manually. The
+      // ${sha1} parameter needs to be provided, and defaults to the main branch.
+      stringParam(
+          'sha1',
+          defaultBranch,
+          'Commit id or refname (eg: origin/pr/9/head) you want to build.')
+    }
+
+    context.wrappers {
+      // Abort the build if it's stuck for more minutes than specified.
+      timeout {
+        absolute(defaultTimeout)
+        abortBuild()
+      }
+
+      // Set SPARK_LOCAL_IP for spark tests.
+      environmentVariables {
+        env('SPARK_LOCAL_IP', '127.0.0.1')
+      }
+      credentialsBinding {
+        string("COVERALLS_REPO_TOKEN", "beam-coveralls-token")
+      }
+    }
+  }
+
+  // Sets the pull request build trigger. Accessed through precommit methods
+  // below to insulate callers from internal parameter defaults.
+  private static void setPullRequestBuildTrigger(context,
+                                                 String commitStatusContext,
+                                                 String successComment = '--none--',
+                                                 String prTriggerPhrase = '') {
+    context.triggers {
+      githubPullRequest {
+        admins(['asfbot'])
+        useGitHubHooks()
+        orgWhitelist(['apache'])
+        allowMembersOfWhitelistedOrgsAsAdmin()
+        permitAll()
+        // prTriggerPhrase is the argument which gets set when we want to allow
+        // post-commit builds to run against pending pull requests. This block
+        // overrides the default trigger phrase with the new one. Setting this
+        // will disable automatic invocation of this build; the phrase will be
+        // required to start it.
+        if (prTriggerPhrase) {
+          triggerPhrase(prTriggerPhrase)
+          onlyTriggerPhrase()
+        }
+
+        extensions {
+          commitStatus {
+            // This is the name that will show up in the GitHub pull request UI
+            // for this Jenkins project.
+            delegate.context("Jenkins: " + commitStatusContext)
+          }
+
+          /*
+            This section is disabled, because of jenkinsci/ghprb-plugin#417 issue.
+            For the time being, an equivalent configure section below is added.
+
+          // Comment messages after build completes.
+          buildStatus {
+            completedStatus('SUCCESS', successComment)
+            completedStatus('FAILURE', '--none--')
+            completedStatus('ERROR', '--none--')
+          }
+          */
+        }
+      }
+    }
+
+    // Comment messages after build completes.
+    context.configure {
+      def messages = it / triggers / 'org.jenkinsci.plugins.ghprb.GhprbTrigger' / extensions / 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildStatus' / messages
+      messages << 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildResultMessage' {
+        message(successComment)
+        result('SUCCESS')
+      }
+      messages << 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildResultMessage' {
+        message('--none--')
+        result('ERROR')
+      }
+      messages << 'org.jenkinsci.plugins.ghprb.extensions.comments.GhprbBuildResultMessage' {
+        message('--none--')
+        result('FAILURE')
+      }
+    }
+  }
+
+  // Sets common config for Maven jobs.
+  static void setMavenConfig(context) {
+    context.mavenInstallation('Maven 3.3.3')
+    context.mavenOpts('-Dorg.slf4j.simpleLogger.showDateTime=true')
+    context.mavenOpts('-Dorg.slf4j.simpleLogger.dateTimeFormat=yyyy-MM-dd\\\'T\\\'HH:mm:ss.SSS')
+    // The -XX:+TieredCompilation -XX:TieredStopAtLevel=1 JVM options enable
+    // tiered compilation to make the JVM startup times faster during the tests.
+    context.mavenOpts('-XX:+TieredCompilation')
+    context.mavenOpts('-XX:TieredStopAtLevel=1')
+    context.rootPOM('pom.xml')
+    // Use a repository local to the workspace for better isolation of jobs.
+    context.localRepository(LocalRepositoryLocation.LOCAL_TO_WORKSPACE)
+    // Disable archiving the built artifacts by default, as this is slow and flaky.
+    // We can usually recreate them easily, and we can also opt-in individual jobs
+    // to artifact archiving.
+    context.archivingDisabled(true)
+  }
+
+  // Sets common config for PreCommit jobs.
+  static void setPreCommit(context,
+                           String commitStatusName,
+                           String successComment = '--none--') {
+    // Set pull request build trigger.
+    setPullRequestBuildTrigger(context, commitStatusName, successComment)
+  }
+
+  // Enable triggering postcommit runs against pull requests. Users can comment the trigger phrase
+  // specified in the postcommit job and have the job run against their PR to run
+  // tests not in the presubmit suite for additional confidence.
+  static void enablePhraseTriggeringFromPullRequest(context,
+                                                    String commitStatusName,
+                                                    String prTriggerPhrase) {
+    setPullRequestBuildTrigger(
+      context,
+      commitStatusName,
+      '--none--',
+      prTriggerPhrase)
+  }
+
+  // Sets common config for PostCommit jobs.
+  static void setPostCommit(context,
+                            String buildSchedule = '0 */6 * * *',
+                            boolean triggerEveryPush = true,
+                            String notifyAddress = 'commits@beam.apache.org',
+                            boolean emailIndividuals = true) {
+    // Set build triggers
+    context.triggers {
+      // By default runs every 6 hours.
+      cron(buildSchedule)
+      if (triggerEveryPush) {
+        githubPush()
+      }
+    }
+
+    context.publishers {
+      // Notify an email address for each failed build (defaults to commits@).
+      mailer(notifyAddress, false, emailIndividuals)
+    }
+  }
+
+  // Configures the argument list for performance tests, adding the standard
+  // performance test job arguments.
+  private static def genPerformanceArgs(def argMap) {
+    def standard_args = [
+      project: 'apache-beam-testing',
+      dpb_log_level: 'INFO',
+      maven_binary: '/home/jenkins/tools/maven/latest/bin/mvn',
+      bigquery_table: 'beam_performance.pkb_results',
+      // Publishes results with official tag, for use in dashboards.
+      official: 'true'
+    ]
+    // Note: in case of key collision, keys present in ArgMap win.
+    def joined_args = standard_args.plus(argMap)
+    def argList = []
+    joined_args.each({
+        // FYI: Replacement only works with double quotes.
+        key, value -> argList.add("--$key=$value")
+    })
+    return argList.join(' ')
+  }
+
+  // Adds the standard performance test job steps.
+  static def buildPerformanceTest(def context, def argMap) {
+    def pkbArgs = genPerformanceArgs(argMap)
+    context.steps {
+        // Clean up environment.
+        shell('rm -rf PerfKitBenchmarker')
+        // Clone appropriate perfkit branch
+        shell('git clone https://github.com/GoogleCloudPlatform/PerfKitBenchmarker.git')
+        // Install job requirements.
+        shell('pip install --user -r PerfKitBenchmarker/requirements.txt')
+        // Launch performance test.
+        shell("python PerfKitBenchmarker/pkb.py $pkbArgs")
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PerformanceTests_Dataflow.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PerformanceTests_Dataflow.groovy b/.test-infra/jenkins/job_beam_PerformanceTests_Dataflow.groovy
new file mode 100644
index 0000000..51c73f3
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PerformanceTests_Dataflow.groovy
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This job runs the Beam performance tests on PerfKit Benchmarker.
+job('beam_PerformanceTests_Dataflow'){
+    // Set default Beam job properties.
+    common_job_properties.setTopLevelMainJobProperties(delegate)
+
+    // Run job in postcommit every 6 hours, don't trigger every push, and
+    // don't email individual committers.
+    common_job_properties.setPostCommit(
+        delegate,
+        '0 */6 * * *',
+        false,
+        'commits@beam.apache.org',
+        false)
+
+    def argMap = [
+      benchmarks: 'dpb_wordcount_benchmark',
+      dpb_dataflow_staging_location: 'gs://temp-storage-for-perf-tests/staging',
+      dpb_wordcount_input: 'dataflow-samples/shakespeare/kinglear.txt',
+      config_override: 'dpb_wordcount_benchmark.dpb_service.service_type=dataflow'
+    ]
+
+    common_job_properties.buildPerformanceTest(delegate, argMap)
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PerformanceTests_JDBC.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PerformanceTests_JDBC.groovy b/.test-infra/jenkins/job_beam_PerformanceTests_JDBC.groovy
new file mode 100644
index 0000000..8e581c2
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PerformanceTests_JDBC.groovy
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This job runs the Beam performance tests on PerfKit Benchmarker.
+job('beam_PerformanceTests_JDBC'){
+    // Set default Beam job properties.
+    common_job_properties.setTopLevelMainJobProperties(delegate)
+
+    // Run job in postcommit every 6 hours, don't trigger every push, and
+    // don't email individual committers.
+    common_job_properties.setPostCommit(
+        delegate,
+        '0 */6 * * *',
+        false,
+        'commits@beam.apache.org',
+        false)
+
+    def pipelineArgs = [
+        tempRoot: 'gs://temp-storage-for-end-to-end-tests',
+        project: 'apache-beam-testing',
+        postgresServerName: '10.36.0.11',
+        postgresUsername: 'postgres',
+        postgresDatabaseName: 'postgres',
+        postgresPassword: 'uuinkks',
+        postgresSsl: 'false'
+    ]
+    def pipelineArgList = []
+    pipelineArgs.each({
+        key, value -> pipelineArgList.add("--$key=$value")
+    })
+    def pipelineArgsJoined = pipelineArgList.join(',')
+
+    def argMap = [
+      benchmarks: 'beam_integration_benchmark',
+      beam_it_module: 'sdks/java/io/jdbc',
+      beam_it_args: pipelineArgsJoined,
+      beam_it_class: 'org.apache.beam.sdk.io.jdbc.JdbcIOIT',
+      // Profile is located in $BEAM_ROOT/sdks/java/io/pom.xml.
+      beam_it_profile: 'io-it'
+    ]
+
+    common_job_properties.buildPerformanceTest(delegate, argMap)
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PerformanceTests_Spark.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PerformanceTests_Spark.groovy b/.test-infra/jenkins/job_beam_PerformanceTests_Spark.groovy
new file mode 100644
index 0000000..ba719bf
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PerformanceTests_Spark.groovy
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This job runs the Beam performance tests on PerfKit Benchmarker.
+job('beam_PerformanceTests_Spark'){
+    // Set default Beam job properties.
+    common_job_properties.setTopLevelMainJobProperties(delegate)
+
+    // Run job in postcommit every 6 hours, don't trigger every push, and
+    // don't email individual committers.
+    common_job_properties.setPostCommit(
+        delegate,
+        '0 */6 * * *',
+        false,
+        'commits@beam.apache.org',
+        false)
+
+    def argMap = [
+      benchmarks: 'dpb_wordcount_benchmark',
+      // There are currently problems uploading to Dataproc, so we use a file
+      // already present on the machines as input.
+      dpb_wordcount_input: '/etc/hosts',
+      config_override: 'dpb_wordcount_benchmark.dpb_service.service_type=dataproc'
+    ]
+
+    common_job_properties.buildPerformanceTest(delegate, argMap)
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PostCommit_Java_MavenInstall.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PostCommit_Java_MavenInstall.groovy b/.test-infra/jenkins/job_beam_PostCommit_Java_MavenInstall.groovy
new file mode 100644
index 0000000..a288a84
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PostCommit_Java_MavenInstall.groovy
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This job runs the Java postcommit tests, including the suite of integration
+// tests.
+mavenJob('beam_PostCommit_Java_MavenInstall') {
+  description('Runs postcommit tests on the Java SDK.')
+
+  previousNames('beam_PostCommit_MavenVerify')
+
+  // Execute concurrent builds if necessary.
+  concurrentBuild()
+
+  // Set common parameters.
+  common_job_properties.setTopLevelMainJobProperties(delegate)
+
+  // Set maven parameters.
+  common_job_properties.setMavenConfig(delegate)
+
+  // Sets that this is a PostCommit job.
+  common_job_properties.setPostCommit(delegate)
+
+  // Maven goals for this job.
+  goals('-B -e -P release,dataflow-runner clean install coveralls:report -DrepoToken=$COVERALLS_REPO_TOKEN -DskipITs=false -DintegrationTestPipelineOptions=\'[ "--project=apache-beam-testing", "--tempRoot=gs://temp-storage-for-end-to-end-tests", "--runner=org.apache.beam.runners.dataflow.testing.TestDataflowRunner" ]\'')
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Apex.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Apex.groovy b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Apex.groovy
new file mode 100644
index 0000000..c16a1e2
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Apex.groovy
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This job runs the suite of ValidatesRunner tests against the Apex runner.
+mavenJob('beam_PostCommit_Java_ValidatesRunner_Apex') {
+  description('Runs the ValidatesRunner suite on the Apex runner.')
+  previousNames('beam_PostCommit_Java_RunnableOnService_Apex')
+
+  // Set common parameters.
+  common_job_properties.setTopLevelMainJobProperties(delegate)
+
+  // Set maven parameters.
+  common_job_properties.setMavenConfig(delegate)
+
+  // Sets that this is a PostCommit job.
+  common_job_properties.setPostCommit(delegate)
+
+  // Allows triggering this build against pull requests.
+  common_job_properties.enablePhraseTriggeringFromPullRequest(
+    delegate,
+    'Apache Apex Runner ValidatesRunner Tests',
+    'Run Apex ValidatesRunner')
+
+  // Maven goals for this job.
+  goals('''clean verify --projects runners/apex \
+      --also-make \
+      --batch-mode \
+      --errors \
+      --activate-profiles validates-runner-tests \
+      --activate-profiles local-validates-runner-tests''')
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Dataflow.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Dataflow.groovy b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Dataflow.groovy
new file mode 100644
index 0000000..33235ff
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Dataflow.groovy
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This job runs the suite of ValidatesRunner tests against the Dataflow
+// runner.
+mavenJob('beam_PostCommit_Java_ValidatesRunner_Dataflow') {
+  description('Runs the ValidatesRunner suite on the Dataflow runner.')
+  previousNames('beam_PostCommit_Java_RunnableOnService_Dataflow')
+
+
+  // Set common parameters.
+  common_job_properties.setTopLevelMainJobProperties(delegate, 'master', 120)
+
+  // Set maven parameters.
+  common_job_properties.setMavenConfig(delegate)
+
+  // Sets that this is a PostCommit job.
+  common_job_properties.setPostCommit(delegate)
+
+  // Allows triggering this build against pull requests.
+  common_job_properties.enablePhraseTriggeringFromPullRequest(
+    delegate,
+    'Google Cloud Dataflow Runner ValidatesRunner Tests',
+    'Run Dataflow ValidatesRunner')
+
+  // Maven goals for this job.
+  goals('-B -e clean verify -am -pl runners/google-cloud-dataflow-java -DforkCount=0 -DvalidatesRunnerPipelineOptions=\'[ "--runner=org.apache.beam.runners.dataflow.testing.TestDataflowRunner", "--project=apache-beam-testing", "--tempRoot=gs://temp-storage-for-validates-runner-tests/" ]\'')
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy
new file mode 100644
index 0000000..411106d
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This job runs the suite of ValidatesRunner tests against the Flink runner.
+mavenJob('beam_PostCommit_Java_ValidatesRunner_Flink') {
+  description('Runs the ValidatesRunner suite on the Flink runner.')
+  previousNames('beam_PostCommit_Java_RunnableOnService_Flink')
+
+  // Set common parameters.
+  common_job_properties.setTopLevelMainJobProperties(delegate)
+
+  // Set maven parameters.
+  common_job_properties.setMavenConfig(delegate)
+
+  // Sets that this is a PostCommit job.
+  common_job_properties.setPostCommit(delegate)
+
+  // Allows triggering this build against pull requests.
+  common_job_properties.enablePhraseTriggeringFromPullRequest(
+    delegate,
+    'Apache Flink Runner ValidatesRunner Tests',
+    'Run Flink ValidatesRunner')
+
+  // Maven goals for this job.
+  goals('-B -e clean verify -am -pl runners/flink/runner -Plocal-validates-runner-tests -Pvalidates-runner-tests')
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Gearpump.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Gearpump.groovy b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Gearpump.groovy
new file mode 100644
index 0000000..1348a19
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Gearpump.groovy
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This job runs the suite of ValidatesRunner tests against the Gearpump
+// runner.
+mavenJob('beam_PostCommit_Java_ValidatesRunner_Gearpump') {
+  description('Runs the ValidatesRunner suite on the Gearpump runner.')
+
+  previousNames('beam_PostCommit_Java_RunnableOnService_Gearpump')
+
+  // Set common parameters.
+  common_job_properties.setTopLevelMainJobProperties(
+      delegate,
+      'gearpump-runner')
+
+  // Set maven parameters.
+  common_job_properties.setMavenConfig(delegate)
+
+  // Sets that this is a PostCommit job.
+  // 0 5 31 2 * will run on Feb 31 (i.e. never) according to job properties.
+  // In post-commit this job triggers only on SCM changes.
+  common_job_properties.setPostCommit(delegate, '0 5 31 2 *')
+
+  // Allows triggering this build against pull requests.
+  common_job_properties.enablePhraseTriggeringFromPullRequest(
+    delegate,
+    'Apache Gearpump Runner ValidatesRunner Tests',
+    'Run Gearpump ValidatesRunner')
+
+  // Maven goals for this job.
+  goals('-B -e clean verify -am -pl runners/gearpump -DforkCount=0 -DvalidatesRunnerPipelineOptions=\'[ "--runner=TestGearpumpRunner", "--streaming=false" ]\'')
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Spark.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Spark.groovy b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Spark.groovy
new file mode 100644
index 0000000..9fbc219
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Spark.groovy
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This job runs the suite of ValidatesRunner tests against the Spark runner.
+mavenJob('beam_PostCommit_Java_ValidatesRunner_Spark') {
+  description('Runs the ValidatesRunner suite on the Spark runner.')
+
+  previousNames('beam_PostCommit_Java_RunnableOnService_Spark')
+
+  // Set common parameters.
+  common_job_properties.setTopLevelMainJobProperties(delegate)
+
+  // Set maven parameters.
+  common_job_properties.setMavenConfig(delegate)
+
+  // Sets that this is a PostCommit job.
+  common_job_properties.setPostCommit(delegate)
+
+  // Allows triggering this build against pull requests.
+  common_job_properties.enablePhraseTriggeringFromPullRequest(
+    delegate,
+    'Apache Spark Runner ValidatesRunner Tests',
+    'Run Spark ValidatesRunner')
+
+  // Maven goals for this job.
+  goals('-B -e clean verify -am -pl runners/spark -Pvalidates-runner-tests -Plocal-validates-runner-tests -Dspark.ui.enabled=false')
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PostCommit_Python_Verify.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PostCommit_Python_Verify.groovy b/.test-infra/jenkins/job_beam_PostCommit_Python_Verify.groovy
new file mode 100644
index 0000000..28cf77e
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PostCommit_Python_Verify.groovy
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This job defines the Python postcommit tests.
+job('beam_PostCommit_Python_Verify') {
+  description('Runs postcommit tests on the Python SDK.')
+
+  previousNames('beam_PostCommit_PythonVerify')
+
+  // Set common parameters.
+  common_job_properties.setTopLevelMainJobProperties(delegate)
+
+  // Sets that this is a PostCommit job.
+  common_job_properties.setPostCommit(delegate, '0 3-22/6 * * *')
+
+  // Allows triggering this build against pull requests.
+  common_job_properties.enablePhraseTriggeringFromPullRequest(
+    delegate,
+    'Python SDK PostCommit Tests',
+    'Run Python PostCommit')
+
+  // Allow the test to only run on particular nodes
+  // TODO(BEAM-1817): Remove once the tests can run on all nodes
+  parameters {
+      nodeParam('TEST_HOST') {
+          description('select test host as either beam1, 2 or 3')
+          defaultNodes(['beam3'])
+          allowedNodes(['beam1', 'beam2', 'beam3'])
+          trigger('multiSelectionDisallowed')
+          eligibility('IgnoreOfflineNodeEligibility')
+      }
+  }
+
+  // Execute shell command to test Python SDK.
+  steps {
+    shell('bash sdks/python/run_postcommit.sh')
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/66b20af9/.test-infra/jenkins/job_beam_PreCommit_Java_MavenInstall.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PreCommit_Java_MavenInstall.groovy b/.test-infra/jenkins/job_beam_PreCommit_Java_MavenInstall.groovy
new file mode 100644
index 0000000..3718551
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PreCommit_Java_MavenInstall.groovy
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// This is the Java precommit which runs a maven install, and the current set
+// of precommit tests.
+mavenJob('beam_PreCommit_Java_MavenInstall') {
+  description('Runs an install of the current GitHub Pull Request.')
+
+  previousNames('beam_PreCommit_MavenVerify')
+
+  // Execute concurrent builds if necessary.
+  concurrentBuild()
+
+  // Set common parameters.
+  common_job_properties.setTopLevelMainJobProperties(delegate)
+
+  // Set Maven parameters.
+  common_job_properties.setMavenConfig(delegate)
+
+  // Sets that this is a PreCommit job.
+  common_job_properties.setPreCommit(delegate, 'Maven clean install')
+
+  // Maven goals for this job.
+  goals('-B -e -Prelease,include-runners,jenkins-precommit,direct-runner,dataflow-runner,spark-runner,flink-runner,apex-runner -DrepoToken=$COVERALLS_REPO_TOKEN -DpullRequest=$ghprbPullId help:effective-settings clean install coveralls:report')
+}