You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ch...@apache.org on 2017/11/20 23:11:35 UTC

[1/2] beam git commit: Dataflow and PerfKit profiles; hash for 100.000.000 lines

Repository: beam
Updated Branches:
  refs/heads/master 699e6d0f1 -> ad3a5a9f5


Dataflow and PerfKit profiles; hash for 100.000.000 lines


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/31a86fa4
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/31a86fa4
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/31a86fa4

Branch: refs/heads/master
Commit: 31a86fa4471fa05a1262cf7a835b3f4375001a8b
Parents: 699e6d0
Author: Dariusz Aniszewski <da...@polidea.com>
Authored: Tue Nov 7 17:25:55 2017 +0100
Committer: chamikara@google.com <ch...@google.com>
Committed: Mon Nov 20 15:11:06 2017 -0800

----------------------------------------------------------------------
 sdks/java/io/file-based-io-tests/pom.xml        | 110 +++++++++++++++++++
 .../org/apache/beam/sdk/io/text/TextIOIT.java   |  10 +-
 2 files changed, 118 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/31a86fa4/sdks/java/io/file-based-io-tests/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/file-based-io-tests/pom.xml b/sdks/java/io/file-based-io-tests/pom.xml
index ae7527c..6c3a7e3 100644
--- a/sdks/java/io/file-based-io-tests/pom.xml
+++ b/sdks/java/io/file-based-io-tests/pom.xml
@@ -31,6 +31,116 @@
     <name>Apache Beam :: SDKs :: Java :: IO :: File-based-io-tests</name>
     <description>Integration tests for reading/writing using file-based sources/sinks.</description>
 
+    <profiles>
+        <!-- Include the Google Cloud Dataflow runner activated by -DintegrationTestRunner=dataflow -->
+        <profile>
+            <id>dataflow-runner</id>
+            <activation>
+                <property>
+                    <name>integrationTestRunner</name>
+                    <value>dataflow</value>
+                </property>
+            </activation>
+            <dependencies>
+                <dependency>
+                    <groupId>org.apache.beam</groupId>
+                    <artifactId>beam-runners-google-cloud-dataflow-java</artifactId>
+                    <scope>runtime</scope>
+                </dependency>
+            </dependencies>
+        </profile>
+
+        <!--
+            This profile invokes PerfKitBenchmarker, which does benchmarking of
+            the IO ITs. The arguments passed to it allow it to invoke mvn again
+            with the desired benchmark.
+
+            To invoke this, run:
+
+            mvn verify -Dio-it-suite -pl sdks/java/io/file-based-io-tests
+                -DpkbLocation="path-to-pkb.py" \
+                -DintegrationTestPipelineOptions='["&ndash;&ndash;numberOfRecords=100000"]' \
+                -DfileBasedIoItClass=file-based IO IT class, eg. org.apache.beam.sdk.io.text.TextIOIT
+
+            For DirectRunner, please use -DforceDirectRunner=true argument
+            For other runners please check doc in BEAM-3060 and https://beam.apache.org/documentation/io/testing/
+        -->
+        <profile>
+            <id>io-it-suite</id>
+            <activation>
+                <property><name>io-it-suite</name></property>
+            </activation>
+            <properties>
+                <!-- This is based on the location of the current pom relative to the root
+                     See discussion in BEAM-2460 -->
+                <beamRootProjectDir>${project.parent.parent.parent.parent.basedir}</beamRootProjectDir>
+            </properties>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.codehaus.gmaven</groupId>
+                        <artifactId>groovy-maven-plugin</artifactId>
+                        <version>${groovy-maven-plugin.version}</version>
+                        <executions>
+                            <execution>
+                                <id>find-supported-python-for-compile</id>
+                                <phase>initialize</phase>
+                                <goals>
+                                    <goal>execute</goal>
+                                </goals>
+                                <configuration>
+                                    <source>${beamRootProjectDir}/sdks/python/findSupportedPython.groovy</source>
+                                </configuration>
+                            </execution>
+                        </executions>
+                    </plugin>
+
+                    <plugin>
+                        <groupId>org.codehaus.mojo</groupId>
+                        <artifactId>exec-maven-plugin</artifactId>
+                        <version>${maven-exec-plugin.version}</version>
+                        <executions>
+                            <execution>
+                                <phase>verify</phase>
+                                <goals>
+                                    <goal>exec</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                        <configuration>
+                            <executable>${python.interpreter.bin}</executable>
+                            <arguments>
+                                <argument>${pkbLocation}</argument>
+                                <argument>-benchmarks=beam_integration_benchmark</argument>
+                                <argument>-beam_it_profile=io-it</argument>
+                                <argument>-beam_location=${beamRootProjectDir}</argument>
+                                <argument>-beam_prebuilt=true</argument>
+                                <argument>-beam_sdk=java</argument>
+                                <!-- runner overrides, controlled via forceDirectRunner -->
+                                <argument>${pkbBeamRunnerProfile}</argument>
+                                <argument>${pkbBeamRunnerOption}</argument>
+                                <!-- specific to this IO -->
+                                <argument>-beam_it_module=sdks/java/io/file-based-io-tests</argument>
+                                <argument>-beam_it_class=${fileBasedIoItClass}</argument>
+                                <!-- arguments typically defined by user -->
+                                <argument>-beam_it_options=${integrationTestPipelineOptions}</argument>
+                            </arguments>
+                        </configuration>
+                    </plugin>
+
+                    <plugin>
+                        <groupId>org.apache.maven.plugins</groupId>
+                        <artifactId>maven-surefire-plugin</artifactId>
+                        <version>${surefire-plugin.version}</version>
+                        <configuration>
+                            <skipTests>true</skipTests>
+                        </configuration>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
+
     <dependencies>
         <dependency>
             <groupId>org.apache.beam</groupId>

http://git-wip-us.apache.org/repos/asf/beam/blob/31a86fa4/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
index ecab1d8..d741f95 100644
--- a/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
+++ b/sdks/java/io/file-based-io-tests/src/test/java/org/apache/beam/sdk/io/text/TextIOIT.java
@@ -54,11 +54,16 @@ import org.junit.runners.JUnit4;
  *
  * <p>Run this test using the command below. Pass in connection information via PipelineOptions:
  * <pre>
- *  mvn -e -Pio-it verify -pl sdks/java/io/text -DintegrationTestPipelineOptions='[
+ *  mvn -e -Pio-it verify -pl sdks/java/io/file-based-io-tests
+ *  -Dit.test=org.apache.beam.sdk.io.text.TextIOIT
+ *  -DintegrationTestPipelineOptions='[
  *  "--numberOfRecords=100000",
  *  "--filenamePrefix=TEXTIOIT"
  *  ]'
  * </pre>
+ * </p>
+ * <p>Please see 'sdks/java/io/file-based-io-tests/pom.xml' for instructions regarding
+ * running this test using Beam performance testing framework.</p>
  * */
 @RunWith(JUnit4.class)
 public class TextIOIT {
@@ -107,7 +112,8 @@ public class TextIOIT {
   private static String getExpectedHashForLineCount(Long lineCount) {
     Map<Long, String> expectedHashes = ImmutableMap.of(
         100_000L, "4c8bb3b99dcc59459b20fefba400d446",
-        1_000_000L, "9796db06e7a7960f974d5a91164afff1"
+        1_000_000L, "9796db06e7a7960f974d5a91164afff1",
+        100_000_000L, "6ce05f456e2fdc846ded2abd0ec1de95"
     );
 
     String hash = expectedHashes.get(lineCount);


[2/2] beam git commit: This closes #4120

Posted by ch...@apache.org.
This closes #4120


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/ad3a5a9f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/ad3a5a9f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/ad3a5a9f

Branch: refs/heads/master
Commit: ad3a5a9f58282b79abe9d037acadf359e12d2717
Parents: 699e6d0 31a86fa
Author: chamikara@google.com <ch...@google.com>
Authored: Mon Nov 20 15:11:20 2017 -0800
Committer: chamikara@google.com <ch...@google.com>
Committed: Mon Nov 20 15:11:20 2017 -0800

----------------------------------------------------------------------
 sdks/java/io/file-based-io-tests/pom.xml        | 110 +++++++++++++++++++
 .../org/apache/beam/sdk/io/text/TextIOIT.java   |  10 +-
 2 files changed, 118 insertions(+), 2 deletions(-)
----------------------------------------------------------------------