You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by tg...@apache.org on 2017/07/20 20:35:06 UTC
[3/3] beam git commit: Add maven support for invoking perfkit
benchmarker to run IO ITs
Add maven support for invoking perfkit benchmarker to run IO ITs
Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4192ac6c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4192ac6c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4192ac6c
Branch: refs/heads/master
Commit: 4192ac6c72765e8c55480eec66c499ef0798ecf3
Parents: afeba37
Author: Stephen Sisk <si...@google.com>
Authored: Wed Jun 14 09:57:35 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Thu Jul 20 13:34:49 2017 -0700
----------------------------------------------------------------------
.../kubernetes/postgres/pkb-config-local.yml | 34 +++++
.test-infra/kubernetes/postgres/pkb-config.yml | 32 +++++
runners/google-cloud-dataflow-java/pom.xml | 23 +++
sdks/java/io/google-cloud-platform/pom.xml | 91 ++++++++++++
sdks/java/io/jdbc/pom.xml | 139 +++++++++++++++++++
sdks/java/io/pom.xml | 36 +++++
6 files changed, 355 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/beam/blob/4192ac6c/.test-infra/kubernetes/postgres/pkb-config-local.yml
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/postgres/pkb-config-local.yml b/.test-infra/kubernetes/postgres/pkb-config-local.yml
new file mode 100644
index 0000000..1bac0c4
--- /dev/null
+++ b/.test-infra/kubernetes/postgres/pkb-config-local.yml
@@ -0,0 +1,34 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is a pkb benchmark configuration file, used when running the IO ITs
+# that use this data store. It allows users to run tests when they are on a
+# separate network from the kubernetes cluster by reading the postgres IP
+# address from the LoadBalancer service.
+#
+# This file defines pipeline options to pass to beam, as well as how to derive
+# the values for those pipeline options from kubernetes (where appropriate.)
+
+static_pipeline_options:
+ - postgresUsername: postgres
+ - postgresPassword: uuinkks
+ - postgresDatabaseName: postgres
+ - postgresSsl: false
+dynamic_pipeline_options:
+ - name: postgresServerName
+ type: LoadBalancerIp
+ serviceName: postgres-for-dev
http://git-wip-us.apache.org/repos/asf/beam/blob/4192ac6c/.test-infra/kubernetes/postgres/pkb-config.yml
----------------------------------------------------------------------
diff --git a/.test-infra/kubernetes/postgres/pkb-config.yml b/.test-infra/kubernetes/postgres/pkb-config.yml
new file mode 100644
index 0000000..b943b17
--- /dev/null
+++ b/.test-infra/kubernetes/postgres/pkb-config.yml
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is a pkb benchmark configuration file, used when running the IO ITs
+# that use this data store.
+#
+# This file defines pipeline options to pass to beam, as well as how to derive
+# the values for those pipeline options from kubernetes (where appropriate.)
+
+static_pipeline_options:
+ - postgresUsername: postgres
+ - postgresPassword: uuinkks
+ - postgresDatabaseName: postgres
+ - postgresSsl: false
+dynamic_pipeline_options:
+ - name: postgresServerName
+ type: NodePortIp
+ podLabel: name=postgres
http://git-wip-us.apache.org/repos/asf/beam/blob/4192ac6c/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index c8d63ac..1181b79 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -122,6 +122,29 @@
</plugins>
</build>
</profile>
+
+ <!-- this profile enables IO IT benchmarking by disabling unit tests -->
+ <profile>
+ <id>io-it</id>
+ <activation>
+ <property><name>io-it</name></property>
+ </activation>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>${surefire-plugin.version}</version>
+ <configuration>
+ <skipTests>true</skipTests>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ <properties>
+ <skipITs>false</skipITs>
+ </properties>
+ </profile>
</profiles>
<build>
http://git-wip-us.apache.org/repos/asf/beam/blob/4192ac6c/sdks/java/io/google-cloud-platform/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/pom.xml b/sdks/java/io/google-cloud-platform/pom.xml
index a1495f2..adb7e32 100644
--- a/sdks/java/io/google-cloud-platform/pom.xml
+++ b/sdks/java/io/google-cloud-platform/pom.xml
@@ -60,6 +60,97 @@
</plugins>
</build>
+ <profiles>
+ <!-- This profile invokes PerfKitBenchmarker, which does benchmarking of
+ the IO ITs. The arguments passed to it allow it to invoke mvn again
+ with the desired benchmark.
+
+ To invoke this, run:
+ mvn verify -Dio-it-suite -pl sdks/java/io/google-cloud-platform
+ -DpkbLocation="path-to-pkb.py" \
+ -DintegrationTestPipelineOptions='["-tempRoot=gs://bucket/staging", "-project=your-project-id"]' \
+ -DgcpIoItClass=[your favorite IO's IT]
+ -->
+ <profile>
+ <id>io-it-suite</id>
+ <properties>
+ <!-- This is based on the location of the current pom relative to the
+ root. See discussion in BEAM-2460. -->
+ <beamRootProjectDir>
+ ${project.parent.parent.parent.parent.basedir}
+ </beamRootProjectDir>
+ </properties>
+ <activation><property><name>io-it-suite</name></property></activation>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.gmaven</groupId>
+ <artifactId>groovy-maven-plugin</artifactId>
+ <version>${groovy-maven-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>find-supported-python-for-compile</id>
+ <phase>initialize</phase>
+ <goals>
+ <goal>execute</goal>
+ </goals>
+ <configuration>
+ <source>${beamRootProjectDir}/sdks/python/findSupportedPython.groovy</source>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>exec-maven-plugin</artifactId>
+ <version>${maven-exec-plugin.version}</version>
+ <executions>
+ <execution>
+ <phase>verify</phase>
+ <goals>
+ <goal>exec</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <executable>${python.interpreter.bin}</executable>
+ <arguments>
+ <argument>${pkbLocation}</argument>
+ <argument>-benchmarks=beam_integration_benchmark</argument>
+ <argument>-beam_it_profile=io-it</argument>
+ <argument>-beam_location=${beamRootProjectDir}</argument>
+ <argument>-beam_prebuilt=true</argument>
+ <argument>-beam_sdk=java</argument>
+ <argument>-kubeconfig=${kubeconfig}</argument>
+ <argument>-kubectl=${kubectl}</argument>
+ <!-- runner overrides, controlled via forceDirectRunner -->
+ <argument>${pkbBeamRunnerProfile}</argument>
+ <argument>${pkbBeamRunnerOption}</argument>
+ <!-- specific to this IO -->
+ <argument>-beam_it_module=runners/google-cloud-dataflow-java</argument>
+ <!-- Most IOs have only one IT so this can be hard coded, but
+ since the GCP IO dir contains multiple IOs, we allow the
+ user to specify which particular one they want to run. -->
+ <argument>-beam_it_class=${gcpIoItClass}</argument>
+ <!-- arguments typically defined by user -->
+ <argument>-beam_it_options=${integrationTestPipelineOptions}</argument>
+ </arguments>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>${surefire-plugin.version}</version>
+ <configuration>
+ <skipTests>true</skipTests>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+ </profiles>
+
<dependencies>
<dependency>
<groupId>org.apache.beam</groupId>
http://git-wip-us.apache.org/repos/asf/beam/blob/4192ac6c/sdks/java/io/jdbc/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/pom.xml b/sdks/java/io/jdbc/pom.xml
index e5f4d7e..3e8ba57 100644
--- a/sdks/java/io/jdbc/pom.xml
+++ b/sdks/java/io/jdbc/pom.xml
@@ -65,6 +65,145 @@
</dependencies>
</profile>
+ <!--
+ This profile invokes PerfKitBenchmarker, which does benchmarking of
+ the IO ITs. The arguments passed to it allow it to invoke mvn again
+ with the desired benchmark.
+
+ To invoke this, run:
+
+ mvn verify -Dio-it-suite -pl sdks/java/io/jdbc
+ -DpkbLocation="path-to-pkb.py" \
+ -DintegrationTestPipelineOptions='["-tempRoot=gs://bucket/staging"]'
+ -->
+ <profile>
+ <id>io-it-suite</id>
+ <activation>
+ <property><name>io-it-suite</name></property>
+ </activation>
+ <properties>
+ <!-- This is based on the location of the current pom relative to the root
+ See discussion in BEAM-2460 -->
+ <beamRootProjectDir>${project.parent.parent.parent.parent.basedir}</beamRootProjectDir>
+ </properties>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.gmaven</groupId>
+ <artifactId>groovy-maven-plugin</artifactId>
+ <version>${groovy-maven-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>find-supported-python-for-compile</id>
+ <phase>initialize</phase>
+ <goals>
+ <goal>execute</goal>
+ </goals>
+ <configuration>
+ <source>${beamRootProjectDir}/sdks/python/findSupportedPython.groovy</source>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>exec-maven-plugin</artifactId>
+ <version>${maven-exec-plugin.version}</version>
+ <executions>
+ <execution>
+ <phase>verify</phase>
+ <goals>
+ <goal>exec</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <executable>${python.interpreter.bin}</executable>
+ <arguments>
+ <argument>${pkbLocation}</argument>
+ <argument>-benchmarks=beam_integration_benchmark</argument>
+ <argument>-beam_it_profile=io-it</argument>
+ <argument>-beam_location=${beamRootProjectDir}</argument>
+ <argument>-beam_prebuilt=true</argument>
+ <argument>-beam_sdk=java</argument>
+ <argument>-kubeconfig=${kubeconfig}</argument>
+ <argument>-kubectl=${kubectl}</argument>
+ <!-- runner overrides, controlled via forceDirectRunner -->
+ <argument>${pkbBeamRunnerProfile}</argument>
+ <argument>${pkbBeamRunnerOption}</argument>
+ <!-- specific to this IO -->
+ <argument>-beam_options_config_file=${beamRootProjectDir}/.test-infra/kubernetes/postgres/pkb-config.yml</argument>
+ <argument>-beam_kubernetes_scripts=${beamRootProjectDir}/.test-infra/kubernetes/postgres/postgres.yml</argument>
+ <argument>-beam_it_module=sdks/java/io/jdbc</argument>
+ <argument>-beam_it_class=org.apache.beam.sdk.io.jdbc.JdbcIOIT</argument>
+ <!-- arguments typically defined by user -->
+ <argument>-beam_it_options=${integrationTestPipelineOptions}</argument>
+ </arguments>
+ </configuration>
+ </plugin>
+
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>${surefire-plugin.version}</version>
+ <configuration>
+ <skipTests>true</skipTests>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+
+ <!--
+ io-it-suite overrides part of io-it-suite, allowing users to run tests
+ when they are on a separate network from the kubernetes cluster by
+ creating a LoadBalancer service.
+ -->
+ <profile>
+ <id>io-it-suite-local</id>
+ <activation><property><name>io-it-suite-local</name></property></activation>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>exec-maven-plugin</artifactId>
+ <version>${maven-exec-plugin.version}</version>
+ <executions>
+ <execution>
+ <phase>verify</phase>
+ <goals>
+ <goal>exec</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <executable>${python.interpreter.bin}</executable>
+ <arguments>
+ <argument>${pkbLocation}</argument>
+ <argument>-benchmarks=beam_integration_benchmark</argument>
+ <argument>-beam_it_profile=io-it</argument>
+ <argument>-beam_location=${beamRootProjectDir}</argument>
+ <argument>-beam_prebuilt=true</argument>
+ <argument>-beam_sdk=java</argument>
+ <argument>-kubeconfig=${kubeconfig}</argument>
+ <argument>-kubectl=${kubectl}</argument>
+ <!-- runner overrides, controlled via forceDirectRunner -->
+ <argument>${pkbBeamRunnerProfile}</argument>
+ <argument>${pkbBeamRunnerOption}</argument>
+ <!-- specific to this IO -->
+ <argument>-beam_options_config_file=${beamRootProjectDir}/.test-infra/kubernetes/postgres/pkb-config-local.yml</argument>
+ <argument>-beam_kubernetes_scripts=${beamRootProjectDir}/.test-infra/kubernetes/postgres/postgres.yml,${beamRootProjectDir}/.test-infra/kubernetes/postgres/postgres-service-for-local-dev.yml</argument>
+ <argument>-beam_it_module=sdks/java/io/jdbc</argument>
+ <argument>-beam_it_class=org.apache.beam.sdk.io.jdbc.JdbcIOIT</argument>
+ <!-- arguments typically defined by user -->
+ <argument>-beam_it_options=${integrationTestPipelineOptions}</argument>
+ </arguments>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+
<!-- Include the Google Cloud Dataflow runner -P dataflow-runner -->
<profile>
<id>dataflow-runner</id>
http://git-wip-us.apache.org/repos/asf/beam/blob/4192ac6c/sdks/java/io/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/pom.xml b/sdks/java/io/pom.xml
index b7909fa..e9aa65f 100644
--- a/sdks/java/io/pom.xml
+++ b/sdks/java/io/pom.xml
@@ -32,6 +32,17 @@
<description>Beam SDK Java IO provides different connectivity components
(sources and sinks) to consume and produce data from systems.</description>
+ <properties>
+ <!-- standard binary for kubectl -->
+ <kubectl>kubectl</kubectl>
+ <!-- the standard location for kubernete's config file -->
+ <kubeconfig>${user.home}/.kube/config</kubeconfig>
+ <!-- Necessary to make sure that integration with perfkit in io-it-suite works -->
+ <integrationTestPipelineOptions></integrationTestPipelineOptions>
+ <pkbBeamRunnerProfile></pkbBeamRunnerProfile>
+ <pkbBeamRunnerOption></pkbBeamRunnerOption>
+ </properties>
+
<modules>
<module>amqp</module>
<module>cassandra</module>
@@ -81,11 +92,36 @@
</execution>
</executions>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>${surefire-plugin.version}</version>
+ <configuration>
+ <skipTests>true</skipTests>
+ </configuration>
+ </plugin>
</plugins>
</build>
<properties>
<skipITs>false</skipITs>
</properties>
</profile>
+
+ <!-- this profile is for use with io-it-suite -->
+ <profile>
+ <id>forceDirectRunner</id>
+ <activation>
+ <property>
+ <name>forceDirectRunner</name>
+ </property>
+ </activation>
+ <properties>
+ <!-- These intentionally have a hanging equals sign so that an empty
+ string is passed to pkb. Passing "" will cause e.g. -P"" to
+ get added to the mvn command line -->
+ <pkbBeamRunnerProfile>-beam_runner_profile=</pkbBeamRunnerProfile>
+ <pkbBeamRunnerOption>-beam_runner_option=</pkbBeamRunnerOption>
+ </properties>
+ </profile>
</profiles>
</project>