You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ch...@apache.org on 2018/01/25 02:18:30 UTC

[beam] branch master updated: [BEAM-3060] Support for Perfkit execution of file-based-io-tests on HDFS cluster. (#4401)

This is an automated email from the ASF dual-hosted git repository.

chamikara pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new ad82ac1  [BEAM-3060] Support for Perfkit execution of file-based-io-tests on HDFS cluster. (#4401)
ad82ac1 is described below

commit ad82ac140161cd5d88c972c88a1c8168cf26ff1c
Author: Kamil Szewczyk <sz...@gmail.com>
AuthorDate: Thu Jan 25 03:18:27 2018 +0100

    [BEAM-3060] Support for Perfkit execution of file-based-io-tests on HDFS cluster. (#4401)
    
    * Support for Perfkit execution of file-based-io-tests on HDFS cluster.
    
    * Added kubernetes scripts and pkp-config.yml to jenkins job.
    
    * HDFS profile description.
---
 .../hadoop/SmallITCluster/pkb-config.yml           |  40 ++++++++
 sdks/java/io/file-based-io-tests/pom.xml           | 104 +++++++++++++++++++++
 2 files changed, 144 insertions(+)

diff --git a/.test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml b/.test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml
new file mode 100644
index 0000000..72f458a
--- /dev/null
+++ b/.test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This file is a pkb benchmark configuration file, used when running the IO ITs
+# that use this data store. It allows users to run tests when they are on a
+# separate network from the kubernetes cluster by reading the hadoop namenode IP
+# address from the LoadBalancer service.
+#
+# When running Perfkit with DirectRunner - format pattern must additionally contain
+# dfs.client.use.datanode.hostname set to true:
+#   format: '[{\"fs.defaultFS\":\"hdfs://{{LoadBalancerIp}}:9000\",\"dfs.replication\":1,\"dfs.client.use.datanode.hostname\":\"true\" }]'
+# and /etc/hosts should be modified with an entry containing:
+#   LoadBalancerIp HadoopMasterPodName
+# otherwise hdfs client won't be able to reach datanode.
+# FilenamePrefix is used in file-based-io-tests.
+
+static_pipeline_options:
+dynamic_pipeline_options:
+  - name: hdfsConfiguration
+    format: '[{\"fs.defaultFS\":\"hdfs://{{LoadBalancerIp}}:9000\",\"dfs.replication\":1}]'
+    type: LoadBalancerIp
+    serviceName: hadoop-external
+  - name: filenamePrefix
+    format: 'hdfs://{{LoadBalancerIp}}:9000/TEXTIO_IT_'
+    type: LoadBalancerIp
+    serviceName: hadoop-external
diff --git a/sdks/java/io/file-based-io-tests/pom.xml b/sdks/java/io/file-based-io-tests/pom.xml
index bd04104..23c1b31 100644
--- a/sdks/java/io/file-based-io-tests/pom.xml
+++ b/sdks/java/io/file-based-io-tests/pom.xml
@@ -133,6 +133,110 @@
                             </arguments>
                         </configuration>
                     </plugin>
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-surefire-plugin</artifactId>
+                        <version>${surefire-plugin.version}</version>
+                        <configuration>
+                            <skipTests>true</skipTests>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+
+        <!--
+            This profile invokes PerfKitBenchmarker, which does benchmarking of
+            the IO ITs with HDFS filesystem. The arguments passed to it allow it
+            to invoke mvn again with the desired benchmark.
+
+            To invoke this, run:
+
+            mvn verify -Dio-it-hdfs-small -pl sdks/java/io/file-based-io-tests
+                -DpkbLocation="path-to-pkb.py" \
+                -DintegrationTestPipelineOptions='["&ndash;&ndash;numberOfRecords=100000", \
+                "&ndash;&ndash;tempRoot=gs://bucket-name/"]' \
+                -DpkbExtraProperties='["filesystem=hdfs"]' \
+                -DfileBasedIoItClass=file-based IO IT class, eg. org.apache.beam.sdk.io.text.TextIOIT
+
+            For DirectRunner, please use -DforceDirectRunner=true argument and check
+            .test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml for info about necessary modifications.
+            Line containing argument beam_kubernetes_scripts must be commented out, because test infrastructure
+            when testing on DirectRunner must be created manually using provided scripts.
+
+            For other runners please check doc in BEAM-3060 and https://beam.apache.org/documentation/io/testing/
+        -->
+        <profile>
+            <id>io-it-hdfs-small</id>
+            <activation>
+                <property><name>io-it-suite-hdfs-small</name></property>
+            </activation>
+            <properties>
+                <!-- This is based on the location of the current pom relative to the root
+                     See discussion in BEAM-2460 -->
+                <beamRootProjectDir>${project.parent.parent.parent.parent.basedir}</beamRootProjectDir>
+            </properties>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.codehaus.gmaven</groupId>
+                        <artifactId>groovy-maven-plugin</artifactId>
+                        <version>${groovy-maven-plugin.version}</version>
+                        <executions>
+                            <execution>
+                                <id>find-supported-python-for-compile</id>
+                                <phase>initialize</phase>
+                                <goals>
+                                    <goal>execute</goal>
+                                </goals>
+                                <configuration>
+                                    <source>${beamRootProjectDir}/sdks/python/findSupportedPython.groovy</source>
+                                </configuration>
+                            </execution>
+                        </executions>
+                    </plugin>
+
+                    <plugin>
+                        <groupId>org.codehaus.mojo</groupId>
+                        <artifactId>exec-maven-plugin</artifactId>
+                        <version>${maven-exec-plugin.version}</version>
+                        <executions>
+                            <execution>
+                                <phase>verify</phase>
+                                <goals>
+                                    <goal>exec</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                        <configuration>
+                            <executable>${python.interpreter.bin}</executable>
+                            <arguments>
+                                <argument>${pkbLocation}</argument>
+                                <argument>-benchmarks=beam_integration_benchmark</argument>
+                                <argument>-beam_it_profile=io-it</argument>
+                                <argument>-beam_location=${beamRootProjectDir}</argument>
+                                <argument>-beam_prebuilt=true</argument>
+                                <argument>-beam_sdk=java</argument>
+                                <argument>-kubeconfig=${kubeconfig}</argument>
+                                <argument>-kubectl=${kubectl}</argument>
+                                <!-- runner overrides, controlled via forceDirectRunner -->
+                                <argument>${pkbBeamRunnerProfile}</argument>
+                                <argument>${pkbBeamRunnerOption}</argument>
+                                <!-- specific to this IO -->
+                                <argument>-beam_it_module=sdks/java/io/file-based-io-tests</argument>
+                                <argument>-beam_it_class=${fileBasedIoItClass}</argument>
+                                <!-- arguments typically defined by user -->
+                                <argument>-beam_it_options=${integrationTestPipelineOptions}</argument>
+                                <argument>-beam_options_config_file=${beamRootProjectDir}/.test-infra/kubernetes/hadoop/SmallITCluster/pkb-config.yml</argument>
+                                <argument>-beam_kubernetes_scripts=${beamRootProjectDir}/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster.yml,${beamRootProjectDir}/.test-infra/kubernetes/hadoop/SmallITCluster/hdfs-single-datanode-cluster-for-local-dev.yml</argument>
+                                <!--
+                                optional array of key=value items. It will be passed to
+                                target mvn command by pkb. eg. -DpkbExtraProperties='["filesystem=local"]'
+                                -->
+                                <argument>-beam_extra_mvn_properties=${pkbExtraProperties}</argument>
+                            </arguments>
+                        </configuration>
+                    </plugin>
 
                     <plugin>
                         <groupId>org.apache.maven.plugins</groupId>

-- 
To stop receiving notification emails like this one, please contact
chamikara@apache.org.