You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ch...@apache.org on 2018/01/25 02:18:30 UTC
[beam] branch master updated: [BEAM-3060] Support for Perfkit
execution of file-based-io-tests on HDFS cluster. (#4401)
This is an automated email from the ASF dual-hosted git repository.
chamikara pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new ad82ac1 [BEAM-3060] Support for Perfkit execution of file-based-io-tests on HDFS cluster. (#4401)
ad82ac1 is described below
commit ad82ac140161cd5d88c972c88a1c8168cf26ff1c
Author: Kamil Szewczyk <sz...@gmail.com>
AuthorDate: Thu Jan 25 03:18:27 2018 +0100
[BEAM-3060] Support for Perfkit execution of file-based-io-tests on HDFS cluster. (#4401)
* Support for Perfkit execution of file-based-io-tests on HDFS cluster.
* Added kubernetes scripts and pkp-config.yml to jenkins job.
* HDFS profile description.
---
.../hadoop/SmallITCluster/pkb-config.yml | 40 ++++++++
sdks/java/io/file-based-io-tests/pom.xml | 104 +++++++++++++++++++++
2 files changed, 144 insertions(+)
diff --git a/.test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml b/.test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml
new file mode 100644
index 0000000..72f458a
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is a pkb benchmark configuration file, used when running the IO ITs
+# that use this data store. It allows users to run tests when they are on a
+# separate network from the kubernetes cluster by reading the hadoop namenode IP
+# address from the LoadBalancer service.
+#
+# When running Perfkit with DirectRunner - format pattern must additionally contain
+# dfs.client.use.datanode.hostname set to true:
+# format: '[{\"fs.defaultFS\":\"hdfs://{{LoadBalancerIp}}:9000\",\"dfs.replication\":1,\"dfs.client.use.datanode.hostname\":\"true\" }]'
+# and /etc/hosts should be modified with an entry containing:
+# LoadBalancerIp HadoopMasterPodName
+# otherwise hdfs client won't be able to reach datanode.
+# FilenamePrefix is used in file-based-io-tests.
+
+static_pipeline_options:
+dynamic_pipeline_options:
+ - name: hdfsConfiguration
+ format: '[{\"fs.defaultFS\":\"hdfs://{{LoadBalancerIp}}:9000\",\"dfs.replication\":1}]'
+ type: LoadBalancerIp
+ serviceName: hadoop-external
+ - name: filenamePrefix
+ format: 'hdfs://{{LoadBalancerIp}}:9000/TEXTIO_IT_'
+ type: LoadBalancerIp
+ serviceName: hadoop-external
diff --git a/sdks/java/io/file-based-io-tests/pom.xml b/sdks/java/io/file-based-io-tests/pom.xml
index bd04104..23c1b31 100644
--- a/sdks/java/io/file-based-io-tests/pom.xml
+++ b/sdks/java/io/file-based-io-tests/pom.xml
@@ -133,6 +133,110 @@
</arguments>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>${surefire-plugin.version}</version>
+ <configuration>
+ <skipTests>true</skipTests>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+ </profile>
+
+ <!--
+ This profile invokes PerfKitBenchmarker, which does benchmarking of
+ the IO ITs with HDFS filesystem. The arguments passed to it allow it
+ to invoke mvn again with the desired benchmark.
+
+ To invoke this, run:
+
+ mvn verify -Dio-it-hdfs-small -pl sdks/java/io/file-based-io-tests
+ -DpkbLocation="path-to-pkb.py" \
+ -DintegrationTestPipelineOptions='["––numberOfRecords=100000", \
+ "––tempRoot=gs://bucket-name/"]' \
+ -DpkbExtraProperties='["filesystem=hdfs"]' \
+ -DfileBasedIoItClass=file-based IO IT class, eg. org.apache.beam.sdk.io.text.TextIOIT
+
+ For DirectRunner, please use -DforceDirectRunner=true argument and check
+ .test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml for info about necessary modifications.
+ Line containing argument beam_kubernetes_scripts must be commented out, because test infrastructure
+ when testing on DirectRunner must be created manually using provided scripts.
+
+ For other runners please check doc in BEAM-3060 and https://beam.apache.org/documentation/io/testing/
+ -->
+ <profile>
+ <id>io-it-hdfs-small</id>
+ <activation>
+ <property><name>io-it-suite-hdfs-small</name></property>
+ </activation>
+ <properties>
+ <!-- This is based on the location of the current pom relative to the root
+ See discussion in BEAM-2460 -->
+ <beamRootProjectDir>${project.parent.parent.parent.parent.basedir}</beamRootProjectDir>
+ </properties>
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.gmaven</groupId>
+ <artifactId>groovy-maven-plugin</artifactId>
+ <version>${groovy-maven-plugin.version}</version>
+ <executions>
+ <execution>
+ <id>find-supported-python-for-compile</id>
+ <phase>initialize</phase>
+ <goals>
+ <goal>execute</goal>
+ </goals>
+ <configuration>
+ <source>${beamRootProjectDir}/sdks/python/findSupportedPython.groovy</source>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>exec-maven-plugin</artifactId>
+ <version>${maven-exec-plugin.version}</version>
+ <executions>
+ <execution>
+ <phase>verify</phase>
+ <goals>
+ <goal>exec</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <executable>${python.interpreter.bin}</executable>
+ <arguments>
+ <argument>${pkbLocation}</argument>
+ <argument>-benchmarks=beam_integration_benchmark</argument>
+ <argument>-beam_it_profile=io-it</argument>
+ <argument>-beam_location=${beamRootProjectDir}</argument>
+ <argument>-beam_prebuilt=true</argument>
+ <argument>-beam_sdk=java</argument>
+ <argument>-kubeconfig=${kubeconfig}</argument>
+ <argument>-kubectl=${kubectl}</argument>
+ <!-- runner overrides, controlled via forceDirectRunner -->
+ <argument>${pkbBeamRunnerProfile}</argument>
+ <argument>${pkbBeamRunnerOption}</argument>
+ <!-- specific to this IO -->
+ <argument>-beam_it_module=sdks/java/io/file-based-io-tests</argument>
+ <argument>-beam_it_class=${fileBasedIoItClass}</argument>
+ <!-- arguments typically defined by user -->
+ <argument>-beam_it_options=${integrationTestPipelineOptions}</argument>
+ <argument>-beam_options_config_file=${beamRootProjectDir}/.test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml</argument>
+ <argument>-beam_kubernetes_scripts=${beamRootProjectDir}/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster.yml,${beamRootProjectDir}/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster-for-local-dev.yml</argument>
+ <!--
+ optional array of key=value items. It will be passed to
+ target mvn command by pkb. eg. -DpkbExtraProperties='["filesystem=local"]'
+ -->
+ <argument>-beam_extra_mvn_properties=${pkbExtraProperties}</argument>
+ </arguments>
+ </configuration>
+ </plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
--
To stop receiving notification emails like this one, please contact
chamikara@apache.org.