You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by ta...@apache.org on 2017/07/13 03:06:13 UTC

[01/50] [abbrv] beam git commit: Make modules that depend on Hadoop and Spark use the same version property

Repository: beam
Updated Branches:
  refs/heads/DSL_SQL d89d1ee1a -> ec494f675


Make modules that depend on Hadoop and Spark use the same version property


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3294d4b7
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3294d4b7
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3294d4b7

Branch: refs/heads/DSL_SQL
Commit: 3294d4b7ccb49da1affc761eec010557267ee6ad
Parents: 4d30484
Author: Ismaël Mejía <ie...@apache.org>
Authored: Sun Jun 4 22:55:05 2017 +0200
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:58 2017 -0700

----------------------------------------------------------------------
 examples/java/pom.xml                           | 18 ++++--
 examples/java8/pom.xml                          | 18 ++++--
 pom.xml                                         | 65 +++++++++++++++++++-
 runners/apex/pom.xml                            |  2 +-
 runners/spark/pom.xml                           |  7 ---
 sdks/java/extensions/sorter/pom.xml             |  6 --
 sdks/java/io/hadoop-file-system/pom.xml         | 31 ----------
 sdks/java/io/hadoop/jdk1.8-tests/pom.xml        |  2 -
 sdks/java/io/hbase/pom.xml                      |  9 ++-
 sdks/java/io/hcatalog/pom.xml                   |  6 +-
 sdks/java/io/jdbc/pom.xml                       |  2 -
 sdks/java/io/pom.xml                            | 31 ----------
 sdks/java/javadoc/pom.xml                       |  2 -
 .../main/resources/archetype-resources/pom.xml  |  1 -
 .../main/resources/archetype-resources/pom.xml  |  1 -
 15 files changed, 98 insertions(+), 103 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/examples/java/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java/pom.xml b/examples/java/pom.xml
index 701e4fe..7ae4e6a 100644
--- a/examples/java/pom.xml
+++ b/examples/java/pom.xml
@@ -34,10 +34,6 @@
 
   <packaging>jar</packaging>
 
-  <properties>
-    <spark.version>1.6.2</spark.version>
-  </properties>
-
   <profiles>
 
     <!--
@@ -66,6 +62,12 @@
           <groupId>org.apache.beam</groupId>
           <artifactId>beam-runners-apex</artifactId>
           <scope>runtime</scope>
+          <exclusions>
+            <exclusion>
+              <groupId>javax.servlet</groupId>
+              <artifactId>servlet-api</artifactId>
+            </exclusion>
+          </exclusions>
         </dependency>
         <!--
           Apex depends on httpclient version 4.3.5, project has a transitive dependency to httpclient 4.0.1 from
@@ -95,6 +97,12 @@
           <groupId>org.apache.beam</groupId>
           <artifactId>beam-runners-flink_2.10</artifactId>
           <scope>runtime</scope>
+          <exclusions>
+            <exclusion>
+              <groupId>javax.servlet</groupId>
+              <artifactId>servlet-api</artifactId>
+            </exclusion>
+          </exclusions>
         </dependency>
       </dependencies>
     </profile>
@@ -116,13 +124,11 @@
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-streaming_2.10</artifactId>
-          <version>${spark.version}</version>
           <scope>runtime</scope>
         </dependency>
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-core_2.10</artifactId>
-          <version>${spark.version}</version>
           <scope>runtime</scope>
           <exclusions>
             <exclusion>

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/examples/java8/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java8/pom.xml b/examples/java8/pom.xml
index 56295a4..a0ce708 100644
--- a/examples/java8/pom.xml
+++ b/examples/java8/pom.xml
@@ -35,10 +35,6 @@
 
   <packaging>jar</packaging>
 
-  <properties>
-    <spark.version>1.6.2</spark.version>
-  </properties>
-
   <profiles>
     <!--
       The direct runner is available by default.
@@ -66,6 +62,12 @@
           <groupId>org.apache.beam</groupId>
           <artifactId>beam-runners-apex</artifactId>
           <scope>runtime</scope>
+          <exclusions>
+            <exclusion>
+              <groupId>javax.servlet</groupId>
+              <artifactId>servlet-api</artifactId>
+            </exclusion>
+          </exclusions>
         </dependency>
         <!--
           Apex depends on httpclient version 4.3.5, project has a transitive dependency to httpclient 4.0.1 from
@@ -95,6 +97,12 @@
           <groupId>org.apache.beam</groupId>
           <artifactId>beam-runners-flink_2.10</artifactId>
           <scope>runtime</scope>
+          <exclusions>
+            <exclusion>
+              <groupId>javax.servlet</groupId>
+              <artifactId>servlet-api</artifactId>
+            </exclusion>
+          </exclusions>
         </dependency>
       </dependencies>
     </profile>
@@ -116,13 +124,11 @@
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-streaming_2.10</artifactId>
-          <version>${spark.version}</version>
           <scope>runtime</scope>
         </dependency>
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-core_2.10</artifactId>
-          <version>${spark.version}</version>
           <scope>runtime</scope>
           <exclusions>
             <exclusion>

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 4cdb30f..be3fe20 100644
--- a/pom.xml
+++ b/pom.xml
@@ -127,6 +127,12 @@
     <guava.version>20.0</guava.version>
     <grpc.version>1.2.0</grpc.version>
     <grpc-google-common-protos.version>0.1.9</grpc-google-common-protos.version>
+    <!--
+      This is the version of Hadoop used to compile the module that depend on Hadoop.
+      This dependency is defined with a provided scope.
+      Users must supply their own Hadoop version at runtime.
+    -->
+    <hadoop.version>2.7.3</hadoop.version>
     <hamcrest.version>1.3</hamcrest.version>
     <jackson.version>2.8.9</jackson.version>
     <findbugs.version>3.0.1</findbugs.version>
@@ -139,7 +145,7 @@
     <pubsub.version>v1-rev10-1.22.0</pubsub.version>
     <slf4j.version>1.7.14</slf4j.version>
     <spanner.version>0.20.0-beta</spanner.version>
-    <spark.version>1.6.2</spark.version>
+    <spark.version>1.6.3</spark.version>
     <spring.version>4.3.5.RELEASE</spring.version>
     <stax2.version>3.1.4</stax2.version>
     <storage.version>v1-rev71-1.22.0</storage.version>
@@ -1076,6 +1082,42 @@
         <version>${snappy-java.version}</version>
       </dependency>
 
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-client</artifactId>
+        <version>${hadoop.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-common</artifactId>
+        <version>${hadoop.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-mapreduce-client-core</artifactId>
+        <version>${hadoop.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-core_2.10</artifactId>
+        <version>${spark.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-streaming_2.10</artifactId>
+        <version>${spark.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-network-common_2.10</artifactId>
+        <version>${spark.version}</version>
+      </dependency>
+
       <!-- Testing -->
 
       <dependency>
@@ -1145,6 +1187,27 @@
         <scope>test</scope>
       </dependency>
 
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-minicluster</artifactId>
+        <version>${hadoop.version}</version>
+        <scope>test</scope>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-hdfs</artifactId>
+        <version>${hadoop.version}</version>
+        <scope>test</scope>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-hdfs</artifactId>
+        <version>${hadoop.version}</version>
+        <classifier>tests</classifier>
+        <scope>test</scope>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/runners/apex/pom.xml
----------------------------------------------------------------------
diff --git a/runners/apex/pom.xml b/runners/apex/pom.xml
index 2c54654..88ff0f2 100644
--- a/runners/apex/pom.xml
+++ b/runners/apex/pom.xml
@@ -261,7 +261,7 @@
                 <ignoredUsedUndeclaredDependency>com.esotericsoftware.kryo:kryo::${apex.kryo.version}</ignoredUsedUndeclaredDependency>
                 <ignoredUsedUndeclaredDependency>com.datatorrent:netlet::1.3.0</ignoredUsedUndeclaredDependency>
                 <ignoredUsedUndeclaredDependency>org.slf4j:slf4j-api:jar:1.7.14</ignoredUsedUndeclaredDependency>
-                <ignoredUsedUndeclaredDependency>org.apache.hadoop:hadoop-common:jar:2.6.0</ignoredUsedUndeclaredDependency>
+                <ignoredUsedUndeclaredDependency>org.apache.hadoop:hadoop-common:jar:${hadoop.version}</ignoredUsedUndeclaredDependency>
                 <ignoredUsedUndeclaredDependency>joda-time:joda-time:jar:2.4</ignoredUsedUndeclaredDependency>
                 <ignoredUsedUndeclaredDependency>com.google.guava:guava:jar:20.0</ignoredUsedUndeclaredDependency>
               </ignoredUsedUndeclaredDependencies>

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/runners/spark/pom.xml
----------------------------------------------------------------------
diff --git a/runners/spark/pom.xml b/runners/spark/pom.xml
index ee72dd9..1d93427 100644
--- a/runners/spark/pom.xml
+++ b/runners/spark/pom.xml
@@ -34,8 +34,6 @@
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
-    <spark.version>1.6.3</spark.version>
-    <hadoop.version>2.2.0</hadoop.version>
     <kafka.version>0.9.0.1</kafka.version>
     <jackson.version>2.4.4</jackson.version>
     <dropwizard.metrics.version>3.1.2</dropwizard.metrics.version>
@@ -135,31 +133,26 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_2.10</artifactId>
-      <version>${spark.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_2.10</artifactId>
-      <version>${spark.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-network-common_2.10</artifactId>
-      <version>${spark.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
-      <version>${hadoop.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-mapreduce-client-core</artifactId>
-      <version>${hadoop.version}</version>
       <scope>provided</scope>
     </dependency>
     <dependency>

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/sdks/java/extensions/sorter/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/sorter/pom.xml b/sdks/java/extensions/sorter/pom.xml
index 9d25f9d..ac61f76 100644
--- a/sdks/java/extensions/sorter/pom.xml
+++ b/sdks/java/extensions/sorter/pom.xml
@@ -29,10 +29,6 @@
   <artifactId>beam-sdks-java-extensions-sorter</artifactId>
   <name>Apache Beam :: SDKs :: Java :: Extensions :: Sorter</name>
 
-  <properties>
-    <hadoop.version>2.7.1</hadoop.version>
-  </properties>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.beam</groupId>
@@ -42,14 +38,12 @@
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-mapreduce-client-core</artifactId>
-      <version>${hadoop.version}</version>
       <scope>provided</scope>
     </dependency>
     
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
-      <version>${hadoop.version}</version>
       <scope>provided</scope>
     </dependency>
     

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/sdks/java/io/hadoop-file-system/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-file-system/pom.xml b/sdks/java/io/hadoop-file-system/pom.xml
index db5a1db..a54977e 100644
--- a/sdks/java/io/hadoop-file-system/pom.xml
+++ b/sdks/java/io/hadoop-file-system/pom.xml
@@ -44,37 +44,6 @@
     </plugins>
   </build>
 
-  <properties>
-    <!--
-      This is the version of Hadoop used to compile the hadoop-common module.
-      This dependency is defined with a provided scope.
-      Users must supply their own Hadoop version at runtime.
-    -->
-    <hadoop.version>2.7.3</hadoop.version>
-  </properties>
-
-  <dependencyManagement>
-    <!--
-       We define dependencies here instead of sdks/java/io because
-       of a version mimatch between this Hadoop version and other
-       Hadoop versions declared in other io submodules.
-    -->
-    <dependencies>
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-hdfs</artifactId>
-        <classifier>tests</classifier>
-        <version>${hadoop.version}</version>
-      </dependency>
-
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-minicluster</artifactId>
-        <version>${hadoop.version}</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.beam</groupId>

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/sdks/java/io/hadoop/jdk1.8-tests/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/pom.xml b/sdks/java/io/hadoop/jdk1.8-tests/pom.xml
index 9f84e88..baaa982 100644
--- a/sdks/java/io/hadoop/jdk1.8-tests/pom.xml
+++ b/sdks/java/io/hadoop/jdk1.8-tests/pom.xml
@@ -108,13 +108,11 @@
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-streaming_2.10</artifactId>
-          <version>${spark.version}</version>
           <scope>runtime</scope>
         </dependency>
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-core_2.10</artifactId>
-          <version>${spark.version}</version>
           <scope>runtime</scope>
           <exclusions>
             <exclusion>

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/sdks/java/io/hbase/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hbase/pom.xml b/sdks/java/io/hbase/pom.xml
index 4d9d600..9d5e2aa 100644
--- a/sdks/java/io/hbase/pom.xml
+++ b/sdks/java/io/hbase/pom.xml
@@ -32,7 +32,6 @@
 
   <properties>
     <hbase.version>1.2.6</hbase.version>
-    <hbase.hadoop.version>2.5.1</hbase.hadoop.version>
   </properties>
 
   <build>
@@ -109,14 +108,18 @@
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-minicluster</artifactId>
-      <version>${hbase.hadoop.version}</version>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-hdfs</artifactId>
       <scope>test</scope>
     </dependency>
 
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-common</artifactId>
-      <version>${hbase.hadoop.version}</version>
       <scope>test</scope>
     </dependency>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/sdks/java/io/hcatalog/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hcatalog/pom.xml b/sdks/java/io/hcatalog/pom.xml
index 19b62a5..8af740d 100644
--- a/sdks/java/io/hcatalog/pom.xml
+++ b/sdks/java/io/hcatalog/pom.xml
@@ -39,14 +39,14 @@
     <plugins>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
+        <artifactId>maven-shade-plugin</artifactId>
         <configuration>
-          <redirectTestOutputToFile>true</redirectTestOutputToFile>
+          <createDependencyReducedPom>false</createDependencyReducedPom>
         </configuration>
       </plugin>
     </plugins>
   </build>
-  
+
   <dependencies>
     <dependency>
       <groupId>org.apache.beam</groupId>

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/sdks/java/io/jdbc/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/pom.xml b/sdks/java/io/jdbc/pom.xml
index 17c26a0..45ec06c 100644
--- a/sdks/java/io/jdbc/pom.xml
+++ b/sdks/java/io/jdbc/pom.xml
@@ -49,13 +49,11 @@
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-streaming_2.10</artifactId>
-          <version>${spark.version}</version>
           <scope>runtime</scope>
         </dependency>
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-core_2.10</artifactId>
-          <version>${spark.version}</version>
           <scope>runtime</scope>
           <exclusions>
             <exclusion>

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/sdks/java/io/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/pom.xml b/sdks/java/io/pom.xml
index e5db41b..458dfaf 100644
--- a/sdks/java/io/pom.xml
+++ b/sdks/java/io/pom.xml
@@ -32,37 +32,6 @@
   <description>Beam SDK Java IO provides different connectivity components
   (sources and sinks) to consume and produce data from systems.</description>
 
-  <properties>
-    <!--
-      This is the version of Hadoop used to compile the hadoop-common module.
-      This dependency is defined with a provided scope.
-      Users must supply their own Hadoop version at runtime.
-    -->
-    <hadoop.version>2.7.3</hadoop.version>
-  </properties>
-
-  <dependencyManagement>
-    <dependencies>
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-client</artifactId>
-        <version>${hadoop.version}</version>
-      </dependency>
-
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-common</artifactId>
-        <version>${hadoop.version}</version>
-      </dependency>
-
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-mapreduce-client-core</artifactId>
-        <version>${hadoop.version}</version>
-      </dependency>
-    </dependencies>
-  </dependencyManagement>
-
   <modules>
     <module>amqp</module>
     <module>cassandra</module>

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/sdks/java/javadoc/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/pom.xml b/sdks/java/javadoc/pom.xml
index 54dae3a..08d5ec6 100644
--- a/sdks/java/javadoc/pom.xml
+++ b/sdks/java/javadoc/pom.xml
@@ -196,13 +196,11 @@
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_2.10</artifactId>
-      <version>${spark.version}</version>
     </dependency>
 
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_2.10</artifactId>
-      <version>${spark.version}</version>
     </dependency>
   </dependencies>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
index af4fbd3..4517861 100644
--- a/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/examples-java8/src/main/resources/archetype-resources/pom.xml
@@ -242,7 +242,6 @@
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-streaming_2.10</artifactId>
-          <version>${spark.version}</version>
           <scope>runtime</scope>
           <exclusions>
             <exclusion>

http://git-wip-us.apache.org/repos/asf/beam/blob/3294d4b7/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
index b8b9c9f..d039ddb 100644
--- a/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
+++ b/sdks/java/maven-archetypes/examples/src/main/resources/archetype-resources/pom.xml
@@ -241,7 +241,6 @@
         <dependency>
           <groupId>org.apache.spark</groupId>
           <artifactId>spark-streaming_2.10</artifactId>
-          <version>${spark.version}</version>
           <scope>runtime</scope>
           <exclusions>
             <exclusion>


[29/50] [abbrv] beam git commit: BEAM-2575 ApexRunner doesn't emit watermarks for additional outputs

Posted by ta...@apache.org.
BEAM-2575 ApexRunner doesn't emit watermarks for additional outputs


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e014db6b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e014db6b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e014db6b

Branch: refs/heads/DSL_SQL
Commit: e014db6b7af00b49467389854c63ef693819ec1f
Parents: eee0c9c
Author: Thomas Weise <th...@apache.org>
Authored: Sun Jul 9 11:57:43 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:01 2017 -0700

----------------------------------------------------------------------
 .../operators/ApexParDoOperator.java            | 21 +++++++++++++-------
 .../runners/apex/examples/WordCountTest.java    |  8 ++++++--
 2 files changed, 20 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/e014db6b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java
index 809ca2a..c3cbab2 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java
@@ -359,10 +359,7 @@ public class ApexParDoOperator<InputT, OutputT> extends BaseOperator implements
       }
     }
     if (sideInputs.isEmpty()) {
-      if (traceTuples) {
-        LOG.debug("\nemitting watermark {}\n", mark);
-      }
-      output.emit(mark);
+      outputWatermark(mark);
       return;
     }
 
@@ -370,10 +367,20 @@ public class ApexParDoOperator<InputT, OutputT> extends BaseOperator implements
         Math.min(pushedBackWatermark.get(), currentInputWatermark);
     if (potentialOutputWatermark > currentOutputWatermark) {
       currentOutputWatermark = potentialOutputWatermark;
-      if (traceTuples) {
-        LOG.debug("\nemitting watermark {}\n", currentOutputWatermark);
+      outputWatermark(ApexStreamTuple.WatermarkTuple.of(currentOutputWatermark));
+    }
+  }
+
+  private void outputWatermark(ApexStreamTuple.WatermarkTuple<?> mark) {
+    if (traceTuples) {
+      LOG.debug("\nemitting {}\n", mark);
+    }
+    output.emit(mark);
+    if (!additionalOutputPortMapping.isEmpty()) {
+      for (DefaultOutputPort<ApexStreamTuple<?>> additionalOutput :
+          additionalOutputPortMapping.values()) {
+        additionalOutput.emit(mark);
       }
-      output.emit(ApexStreamTuple.WatermarkTuple.of(currentOutputWatermark));
     }
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/e014db6b/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/WordCountTest.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/WordCountTest.java b/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/WordCountTest.java
index e76096e..ba75746 100644
--- a/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/WordCountTest.java
+++ b/runners/apex/src/test/java/org/apache/beam/runners/apex/examples/WordCountTest.java
@@ -123,11 +123,15 @@ public class WordCountTest {
     options.setInputFile(new File(inputFile).getAbsolutePath());
     String outputFilePrefix = "target/wordcountresult.txt";
     options.setOutput(outputFilePrefix);
-    WordCountTest.main(TestPipeline.convertToArgs(options));
 
     File outFile1 = new File(outputFilePrefix + "-00000-of-00002");
     File outFile2 = new File(outputFilePrefix + "-00001-of-00002");
-    Assert.assertTrue(outFile1.exists() && outFile2.exists());
+    Assert.assertTrue(!outFile1.exists() || outFile1.delete());
+    Assert.assertTrue(!outFile2.exists() || outFile2.delete());
+
+    WordCountTest.main(TestPipeline.convertToArgs(options));
+
+    Assert.assertTrue("result files exist", outFile1.exists() && outFile2.exists());
     HashSet<String> results = new HashSet<>();
     results.addAll(FileUtils.readLines(outFile1));
     results.addAll(FileUtils.readLines(outFile2));


[14/50] [abbrv] beam git commit: Fix bad merge

Posted by ta...@apache.org.
Fix bad merge


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/02774b98
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/02774b98
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/02774b98

Branch: refs/heads/DSL_SQL
Commit: 02774b98e6e9f708f5563d235e262c115e595066
Parents: 5e0f258
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Jul 6 21:45:39 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 .../java/org/apache/beam/runners/dataflow/TransformTranslator.java  | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/02774b98/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/TransformTranslator.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/TransformTranslator.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/TransformTranslator.java
index 7f61b6c..06ed1e0 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/TransformTranslator.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/TransformTranslator.java
@@ -22,6 +22,7 @@ import java.util.Map;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
 import org.apache.beam.runners.dataflow.util.OutputReference;
 import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.annotations.Internal;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.runners.AppliedPTransform;
 import org.apache.beam.sdk.transforms.PTransform;


[44/50] [abbrv] beam git commit: Use URNs, not Java classes, in immutability enforcements

Posted by ta...@apache.org.
Use URNs, not Java classes, in immutability enforcements


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/6a61f154
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/6a61f154
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/6a61f154

Branch: refs/heads/DSL_SQL
Commit: 6a61f154bdbc02c4d84053e13d68158e065e8372
Parents: 8ae2a79
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Jul 10 15:25:11 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:02 2017 -0700

----------------------------------------------------------------------
 .../beam/runners/direct/DirectRunner.java       | 21 ++++++++------------
 .../direct/ExecutorServiceParallelExecutor.java | 16 ++++++---------
 2 files changed, 14 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/6a61f154/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
index 7a221c4..4621224 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
@@ -38,14 +38,11 @@ import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.Pipeline.PipelineExecutionException;
 import org.apache.beam.sdk.PipelineResult;
 import org.apache.beam.sdk.PipelineRunner;
-import org.apache.beam.sdk.io.Read;
 import org.apache.beam.sdk.metrics.MetricResults;
 import org.apache.beam.sdk.metrics.MetricsEnvironment;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.runners.PTransformOverride;
 import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.ParDo.MultiOutput;
 import org.apache.beam.sdk.util.UserCodeException;
 import org.apache.beam.sdk.values.PCollection;
 import org.joda.time.Duration;
@@ -72,16 +69,17 @@ public class DirectRunner extends PipelineRunner<DirectPipelineResult> {
     IMMUTABILITY {
       @Override
       public boolean appliesTo(PCollection<?> collection, DirectGraph graph) {
-        return CONTAINS_UDF.contains(graph.getProducer(collection).getTransform().getClass());
+        return CONTAINS_UDF.contains(
+            PTransformTranslation.urnForTransform(graph.getProducer(collection).getTransform()));
       }
     };
 
     /**
      * The set of {@link PTransform PTransforms} that execute a UDF. Useful for some enforcements.
      */
-    private static final Set<Class<? extends PTransform>> CONTAINS_UDF =
+    private static final Set<String> CONTAINS_UDF =
         ImmutableSet.of(
-            Read.Bounded.class, Read.Unbounded.class, ParDo.SingleOutput.class, MultiOutput.class);
+            PTransformTranslation.READ_TRANSFORM_URN, PTransformTranslation.PAR_DO_TRANSFORM_URN);
 
     public abstract boolean appliesTo(PCollection<?> collection, DirectGraph graph);
 
@@ -110,22 +108,19 @@ public class DirectRunner extends PipelineRunner<DirectPipelineResult> {
       return bundleFactory;
     }
 
-    @SuppressWarnings("rawtypes")
-    private static Map<Class<? extends PTransform>, Collection<ModelEnforcementFactory>>
+    private static Map<String, Collection<ModelEnforcementFactory>>
         defaultModelEnforcements(Set<Enforcement> enabledEnforcements) {
-      ImmutableMap.Builder<Class<? extends PTransform>, Collection<ModelEnforcementFactory>>
-          enforcements = ImmutableMap.builder();
+      ImmutableMap.Builder<String, Collection<ModelEnforcementFactory>> enforcements =
+          ImmutableMap.builder();
       ImmutableList.Builder<ModelEnforcementFactory> enabledParDoEnforcements =
           ImmutableList.builder();
       if (enabledEnforcements.contains(Enforcement.IMMUTABILITY)) {
         enabledParDoEnforcements.add(ImmutabilityEnforcementFactory.create());
       }
       Collection<ModelEnforcementFactory> parDoEnforcements = enabledParDoEnforcements.build();
-      enforcements.put(ParDo.SingleOutput.class, parDoEnforcements);
-      enforcements.put(MultiOutput.class, parDoEnforcements);
+      enforcements.put(PTransformTranslation.PAR_DO_TRANSFORM_URN, parDoEnforcements);
       return enforcements.build();
     }
-
   }
 
   ////////////////////////////////////////////////////////////////////////////////////////////////

http://git-wip-us.apache.org/repos/asf/beam/blob/6a61f154/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ExecutorServiceParallelExecutor.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ExecutorServiceParallelExecutor.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ExecutorServiceParallelExecutor.java
index 2f4d1f6..75e2562 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ExecutorServiceParallelExecutor.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ExecutorServiceParallelExecutor.java
@@ -49,11 +49,11 @@ import javax.annotation.Nullable;
 import org.apache.beam.runners.core.KeyedWorkItem;
 import org.apache.beam.runners.core.KeyedWorkItems;
 import org.apache.beam.runners.core.TimerInternals.TimerData;
+import org.apache.beam.runners.core.construction.PTransformTranslation;
 import org.apache.beam.runners.direct.WatermarkManager.FiredTimers;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.PipelineResult.State;
 import org.apache.beam.sdk.runners.AppliedPTransform;
-import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.util.UserCodeException;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.values.PCollection;
@@ -77,9 +77,7 @@ final class ExecutorServiceParallelExecutor implements PipelineExecutor {
   private final DirectGraph graph;
   private final RootProviderRegistry rootProviderRegistry;
   private final TransformEvaluatorRegistry registry;
-  @SuppressWarnings("rawtypes")
-  private final Map<Class<? extends PTransform>, Collection<ModelEnforcementFactory>>
-      transformEnforcements;
+  private final Map<String, Collection<ModelEnforcementFactory>> transformEnforcements;
 
   private final EvaluationContext evaluationContext;
 
@@ -112,9 +110,7 @@ final class ExecutorServiceParallelExecutor implements PipelineExecutor {
       DirectGraph graph,
       RootProviderRegistry rootProviderRegistry,
       TransformEvaluatorRegistry registry,
-      @SuppressWarnings("rawtypes")
-          Map<Class<? extends PTransform>, Collection<ModelEnforcementFactory>>
-              transformEnforcements,
+      Map<String, Collection<ModelEnforcementFactory>> transformEnforcements,
       EvaluationContext context) {
     return new ExecutorServiceParallelExecutor(
         targetParallelism,
@@ -130,8 +126,7 @@ final class ExecutorServiceParallelExecutor implements PipelineExecutor {
       DirectGraph graph,
       RootProviderRegistry rootProviderRegistry,
       TransformEvaluatorRegistry registry,
-      @SuppressWarnings("rawtypes")
-      Map<Class<? extends PTransform>, Collection<ModelEnforcementFactory>> transformEnforcements,
+      Map<String, Collection<ModelEnforcementFactory>> transformEnforcements,
       EvaluationContext context) {
     this.targetParallelism = targetParallelism;
     // Don't use Daemon threads for workers. The Pipeline should continue to execute even if there
@@ -237,7 +232,8 @@ final class ExecutorServiceParallelExecutor implements PipelineExecutor {
 
     Collection<ModelEnforcementFactory> enforcements =
         MoreObjects.firstNonNull(
-            transformEnforcements.get(transform.getTransform().getClass()),
+            transformEnforcements.get(
+                PTransformTranslation.urnForTransform(transform.getTransform())),
             Collections.<ModelEnforcementFactory>emptyList());
 
     TransformExecutor<T> callable =


[38/50] [abbrv] beam git commit: Add client-side throttling.

Posted by ta...@apache.org.
Add client-side throttling.

The approach used is as described in
https://landing.google.com/sre/book/chapters/handling-overload.html#client-side-throttling-a7sYUg
. By backing off individual workers in response to high error rates, we relieve
pressure on the Datastore service, increasing the chance that the workload can
complete successfully.

The exported cumulativeThrottledSeconds could also be used as an autoscaling
signal in future.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f1defd14
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f1defd14
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f1defd14

Branch: refs/heads/DSL_SQL
Commit: f1defd14c943d65e946cda081fe22a872ce6ce07
Parents: 7925a66
Author: Colin Phipps <fi...@google.com>
Authored: Mon Jun 26 13:34:19 2017 +0000
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:02 2017 -0700

----------------------------------------------------------------------
 .../sdk/io/gcp/datastore/AdaptiveThrottler.java | 103 +++++++++++++++++
 .../beam/sdk/io/gcp/datastore/DatastoreV1.java  |  25 ++++-
 .../io/gcp/datastore/AdaptiveThrottlerTest.java | 111 +++++++++++++++++++
 3 files changed, 238 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/f1defd14/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/AdaptiveThrottler.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/AdaptiveThrottler.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/AdaptiveThrottler.java
new file mode 100644
index 0000000..ce6ebe6
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/AdaptiveThrottler.java
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.sdk.io.gcp.datastore;
+
+import com.google.common.annotations.VisibleForTesting;
+import java.util.Random;
+import org.apache.beam.sdk.transforms.Sum;
+import org.apache.beam.sdk.util.MovingFunction;
+
+
+/**
+ * An implementation of client-side adaptive throttling. See
+ * https://landing.google.com/sre/book/chapters/handling-overload.html#client-side-throttling-a7sYUg
+ * for a full discussion of the use case and algorithm applied.
+ */
+class AdaptiveThrottler {
+  private final MovingFunction successfulRequests;
+  private final MovingFunction allRequests;
+
+  /** The target ratio between requests sent and successful requests. This is "K" in the formula in
+   * https://landing.google.com/sre/book/chapters/handling-overload.html */
+  private final double overloadRatio;
+
+  /** The target minimum number of requests per samplePeriodMs, even if no requests succeed. Must be
+   * greater than 0, else we could throttle to zero. Because every decision is probabilistic, there
+   * is no guarantee that the request rate in any given interval will not be zero. (This is the +1
+   * from the formula in https://landing.google.com/sre/book/chapters/handling-overload.html */
+  private static final double MIN_REQUESTS = 1;
+  private final Random random;
+
+  /**
+   * @param samplePeriodMs the time window to keep of request history to inform throttling
+   * decisions.
+   * @param sampleUpdateMs the length of buckets within this time window.
+   * @param overloadRatio the target ratio between requests sent and successful requests. You should
+   * always set this to more than 1, otherwise the client would never try to send more requests than
+   * succeeded in the past - so it could never recover from temporary setbacks.
+   */
+  public AdaptiveThrottler(long samplePeriodMs, long sampleUpdateMs,
+      double overloadRatio) {
+    this(samplePeriodMs, sampleUpdateMs, overloadRatio, new Random());
+  }
+
+  @VisibleForTesting
+  AdaptiveThrottler(long samplePeriodMs, long sampleUpdateMs,
+      double overloadRatio, Random random) {
+    allRequests =
+        new MovingFunction(samplePeriodMs, sampleUpdateMs,
+        1 /* numSignificantBuckets */, 1 /* numSignificantSamples */, Sum.ofLongs());
+    successfulRequests =
+        new MovingFunction(samplePeriodMs, sampleUpdateMs,
+        1 /* numSignificantBuckets */, 1 /* numSignificantSamples */, Sum.ofLongs());
+    this.overloadRatio = overloadRatio;
+    this.random = random;
+  }
+
+  @VisibleForTesting
+  double throttlingProbability(long nowMsSinceEpoch) {
+    if (!allRequests.isSignificant()) {
+      return 0;
+    }
+    long allRequestsNow = allRequests.get(nowMsSinceEpoch);
+    long successfulRequestsNow = successfulRequests.get(nowMsSinceEpoch);
+    return Math.max(0,
+        (allRequestsNow - overloadRatio * successfulRequestsNow) / (allRequestsNow + MIN_REQUESTS));
+  }
+
+  /**
+   * Call this before sending a request to the remote service; if this returns true, drop the
+   * request (treating it as a failure or trying it again at a later time).
+   */
+  public boolean throttleRequest(long nowMsSinceEpoch) {
+    double delayProbability = throttlingProbability(nowMsSinceEpoch);
+    // Note that we increment the count of all requests here, even if we return true - so even if we
+    // tell the client not to send a request at all, it still counts as a failed request.
+    allRequests.add(nowMsSinceEpoch, 1);
+
+    return (random.nextDouble() < delayProbability);
+  }
+
+  /**
+   * Call this after {@link throttleRequest} if your request was successful.
+   */
+  public void successfulRequest(long nowMsSinceEpoch) {
+    successfulRequests.add(nowMsSinceEpoch, 1);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1defd14/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
index e67f4b2..5f65428 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/datastore/DatastoreV1.java
@@ -71,6 +71,8 @@ import org.apache.beam.sdk.annotations.Experimental;
 import org.apache.beam.sdk.annotations.Experimental.Kind;
 import org.apache.beam.sdk.coders.SerializableCoder;
 import org.apache.beam.sdk.extensions.gcp.options.GcpOptions;
+import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Metrics;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
@@ -1209,6 +1211,13 @@ public class DatastoreV1 {
     private final List<Mutation> mutations = new ArrayList<>();
     private int mutationsSize = 0;  // Accumulated size of protos in mutations.
     private WriteBatcher writeBatcher;
+    private transient AdaptiveThrottler throttler;
+    private final Counter throttledSeconds =
+      Metrics.counter(DatastoreWriterFn.class, "cumulativeThrottlingSeconds");
+    private final Counter rpcErrors =
+      Metrics.counter(DatastoreWriterFn.class, "datastoreRpcErrors");
+    private final Counter rpcSuccesses =
+      Metrics.counter(DatastoreWriterFn.class, "datastoreRpcSuccesses");
 
     private static final int MAX_RETRIES = 5;
     private static final FluentBackoff BUNDLE_WRITE_BACKOFF =
@@ -1237,6 +1246,10 @@ public class DatastoreV1 {
     public void startBundle(StartBundleContext c) {
       datastore = datastoreFactory.getDatastore(c.getPipelineOptions(), projectId.get(), localhost);
       writeBatcher.start();
+      if (throttler == null) {
+        // Initialize throttler at first use, because it is not serializable.
+        throttler = new AdaptiveThrottler(120000, 10000, 1.25);
+      }
     }
 
     @ProcessElement
@@ -1284,11 +1297,20 @@ public class DatastoreV1 {
         commitRequest.setMode(CommitRequest.Mode.NON_TRANSACTIONAL);
         long startTime = System.currentTimeMillis(), endTime;
 
+        if (throttler.throttleRequest(startTime)) {
+          LOG.info("Delaying request due to previous failures");
+          throttledSeconds.inc(WriteBatcherImpl.DATASTORE_BATCH_TARGET_LATENCY_MS / 1000);
+          sleeper.sleep(WriteBatcherImpl.DATASTORE_BATCH_TARGET_LATENCY_MS);
+          continue;
+        }
+
         try {
           datastore.commit(commitRequest.build());
           endTime = System.currentTimeMillis();
 
           writeBatcher.addRequestLatency(endTime, endTime - startTime, mutations.size());
+          throttler.successfulRequest(startTime);
+          rpcSuccesses.inc();
 
           // Break if the commit threw no exception.
           break;
@@ -1300,11 +1322,12 @@ public class DatastoreV1 {
             endTime = System.currentTimeMillis();
             writeBatcher.addRequestLatency(endTime, endTime - startTime, mutations.size());
           }
-
           // Only log the code and message for potentially-transient errors. The entire exception
           // will be propagated upon the last retry.
           LOG.error("Error writing batch of {} mutations to Datastore ({}): {}", mutations.size(),
               exception.getCode(), exception.getMessage());
+          rpcErrors.inc();
+
           if (!BackOffUtils.next(sleeper, backoff)) {
             LOG.error("Aborting after {} retries.", MAX_RETRIES);
             throw exception;

http://git-wip-us.apache.org/repos/asf/beam/blob/f1defd14/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/datastore/AdaptiveThrottlerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/datastore/AdaptiveThrottlerTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/datastore/AdaptiveThrottlerTest.java
new file mode 100644
index 0000000..c12cf55
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/datastore/AdaptiveThrottlerTest.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.datastore;
+
+import static org.hamcrest.Matchers.closeTo;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+
+import java.util.Random;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mockito;
+
+/**
+ * Tests for {@link AdaptiveThrottler}.
+ */
+@RunWith(JUnit4.class)
+public class AdaptiveThrottlerTest {
+
+  static final long START_TIME_MS = 0;
+  static final long SAMPLE_PERIOD_MS = 60000;
+  static final long SAMPLE_BUCKET_MS = 1000;
+  static final double OVERLOAD_RATIO = 2;
+
+  /** Returns a throttler configured with the standard parameters above. */
+  AdaptiveThrottler getThrottler() {
+    return new AdaptiveThrottler(SAMPLE_PERIOD_MS, SAMPLE_BUCKET_MS, OVERLOAD_RATIO);
+  }
+
+  @Test
+  public void testNoInitialThrottling() throws Exception {
+    AdaptiveThrottler throttler = getThrottler();
+    assertThat(throttler.throttlingProbability(START_TIME_MS), equalTo(0.0));
+    assertThat("first request is not throttled",
+        throttler.throttleRequest(START_TIME_MS), equalTo(false));
+  }
+
+  @Test
+  public void testNoThrottlingIfNoErrors() throws Exception {
+    AdaptiveThrottler throttler = getThrottler();
+    long t = START_TIME_MS;
+    for (; t < START_TIME_MS + 20; t++) {
+      assertFalse(throttler.throttleRequest(t));
+      throttler.successfulRequest(t);
+    }
+    assertThat(throttler.throttlingProbability(t), equalTo(0.0));
+  }
+
+  @Test
+  public void testNoThrottlingAfterErrorsExpire() throws Exception {
+    AdaptiveThrottler throttler = getThrottler();
+    long t = START_TIME_MS;
+    for (; t < START_TIME_MS + SAMPLE_PERIOD_MS; t++) {
+      throttler.throttleRequest(t);
+      // and no successfulRequest.
+    }
+    assertThat("check that we set up a non-zero probability of throttling",
+        throttler.throttlingProbability(t), greaterThan(0.0));
+    for (; t < START_TIME_MS + 2 * SAMPLE_PERIOD_MS; t++) {
+      throttler.throttleRequest(t);
+      throttler.successfulRequest(t);
+    }
+    assertThat(throttler.throttlingProbability(t), equalTo(0.0));
+  }
+
+  @Test
+  public void testThrottlingAfterErrors() throws Exception {
+    Random mockRandom = Mockito.mock(Random.class);
+    Mockito.when(mockRandom.nextDouble()).thenReturn(
+        0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,
+        0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9);
+    AdaptiveThrottler throttler = new AdaptiveThrottler(
+        SAMPLE_PERIOD_MS, SAMPLE_BUCKET_MS, OVERLOAD_RATIO, mockRandom);
+    for (int i = 0; i < 20; i++) {
+      boolean throttled = throttler.throttleRequest(START_TIME_MS + i);
+      // 1/3rd of requests succeeding.
+      if (i % 3 == 1) {
+        throttler.successfulRequest(START_TIME_MS + i);
+      }
+
+      // Once we have some history in place, check what throttling happens.
+      if (i >= 10) {
+        // Expect 1/3rd of requests to be throttled. (So 1/3rd throttled, 1/3rd succeeding, 1/3rd
+        // tried and failing).
+        assertThat(String.format("for i=%d", i),
+            throttler.throttlingProbability(START_TIME_MS + i), closeTo(0.33, /*error=*/ 0.1));
+        // Requests 10..13 should be throttled, 14..19 not throttled given the mocked random numbers
+        // that we fed to throttler.
+        assertThat(String.format("for i=%d", i), throttled, equalTo(i < 14));
+      }
+    }
+  }
+}


[37/50] [abbrv] beam git commit: Rehydrate PCollections

Posted by ta...@apache.org.
Rehydrate PCollections


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1f17b8a2
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1f17b8a2
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1f17b8a2

Branch: refs/heads/DSL_SQL
Commit: 1f17b8a2bbd5068c8fd3374731d96f57d31433dc
Parents: 4c336e8
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Jul 6 09:24:22 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:01 2017 -0700

----------------------------------------------------------------------
 .../construction/PCollectionTranslation.java    | 16 ++++++++++++++
 .../PCollectionTranslationTest.java             | 22 ++++++++++++++++++++
 2 files changed, 38 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/1f17b8a2/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PCollectionTranslation.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PCollectionTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PCollectionTranslation.java
index 968966f..52526bb 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PCollectionTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PCollectionTranslation.java
@@ -20,6 +20,7 @@ package org.apache.beam.runners.core.construction;
 
 import com.google.protobuf.InvalidProtocolBufferException;
 import java.io.IOException;
+import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.common.runner.v1.RunnerApi;
 import org.apache.beam.sdk.values.PCollection;
@@ -47,6 +48,21 @@ public class PCollectionTranslation {
         .build();
   }
 
+  public static PCollection<?> fromProto(
+      Pipeline pipeline, RunnerApi.PCollection pCollection, RunnerApi.Components components)
+      throws IOException {
+    return PCollection.createPrimitiveOutputInternal(
+            pipeline,
+            WindowingStrategyTranslation.fromProto(
+                components.getWindowingStrategiesOrThrow(pCollection.getWindowingStrategyId()),
+                components),
+            fromProto(pCollection.getIsBounded()))
+        .setCoder(
+            (Coder)
+                CoderTranslation.fromProto(
+                    components.getCodersOrThrow(pCollection.getCoderId()), components));
+  }
+
   public static IsBounded isBounded(RunnerApi.PCollection pCollection) {
     return fromProto(pCollection.getIsBounded());
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/1f17b8a2/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PCollectionTranslationTest.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PCollectionTranslationTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PCollectionTranslationTest.java
index 3b94220..5c45487 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PCollectionTranslationTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PCollectionTranslationTest.java
@@ -113,6 +113,28 @@ public class PCollectionTranslationTest {
 
   @Test
   public void testEncodeDecodeCycle() throws Exception {
+    // Encode
+    SdkComponents sdkComponents = SdkComponents.create();
+    RunnerApi.PCollection protoCollection =
+        PCollectionTranslation.toProto(testCollection, sdkComponents);
+    RunnerApi.Components protoComponents = sdkComponents.toComponents();
+
+    // Decode
+    Pipeline pipeline = Pipeline.create();
+    PCollection<?> decodedCollection =
+        PCollectionTranslation.fromProto(pipeline, protoCollection, protoComponents);
+
+    // Verify
+    assertThat(decodedCollection.getCoder(), Matchers.<Coder<?>>equalTo(testCollection.getCoder()));
+    assertThat(
+        decodedCollection.getWindowingStrategy(),
+        Matchers.<WindowingStrategy<?, ?>>equalTo(
+            testCollection.getWindowingStrategy().fixDefaults()));
+    assertThat(decodedCollection.isBounded(), equalTo(testCollection.isBounded()));
+  }
+
+  @Test
+  public void testEncodeDecodeFields() throws Exception {
     SdkComponents sdkComponents = SdkComponents.create();
     RunnerApi.PCollection protoCollection = PCollectionTranslation
         .toProto(testCollection, sdkComponents);


[08/50] [abbrv] beam git commit: [maven-release-plugin] prepare for next development iteration

Posted by ta...@apache.org.
[maven-release-plugin] prepare for next development iteration


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/cd157519
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/cd157519
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/cd157519

Branch: refs/heads/DSL_SQL
Commit: cd1575191ba9fc05f485e3db985bebc2dd30b5b3
Parents: 89531a8
Author: Jean-Baptiste Onofré <jb...@apache.org>
Authored: Wed Jul 5 16:47:38 2017 +0200
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:59 2017 -0700

----------------------------------------------------------------------
 examples/java/pom.xml                                   | 2 +-
 examples/java8/pom.xml                                  | 2 +-
 examples/pom.xml                                        | 2 +-
 pom.xml                                                 | 4 ++--
 runners/apex/pom.xml                                    | 2 +-
 runners/core-construction-java/pom.xml                  | 2 +-
 runners/core-java/pom.xml                               | 2 +-
 runners/direct-java/pom.xml                             | 2 +-
 runners/flink/pom.xml                                   | 2 +-
 runners/google-cloud-dataflow-java/pom.xml              | 2 +-
 runners/pom.xml                                         | 2 +-
 runners/spark/pom.xml                                   | 2 +-
 sdks/common/fn-api/pom.xml                              | 2 +-
 sdks/common/pom.xml                                     | 2 +-
 sdks/common/runner-api/pom.xml                          | 2 +-
 sdks/java/build-tools/pom.xml                           | 2 +-
 sdks/java/core/pom.xml                                  | 2 +-
 sdks/java/extensions/google-cloud-platform-core/pom.xml | 2 +-
 sdks/java/extensions/jackson/pom.xml                    | 2 +-
 sdks/java/extensions/join-library/pom.xml               | 2 +-
 sdks/java/extensions/pom.xml                            | 2 +-
 sdks/java/extensions/protobuf/pom.xml                   | 2 +-
 sdks/java/extensions/sorter/pom.xml                     | 2 +-
 sdks/java/harness/pom.xml                               | 2 +-
 sdks/java/io/amqp/pom.xml                               | 2 +-
 sdks/java/io/cassandra/pom.xml                          | 2 +-
 sdks/java/io/common/pom.xml                             | 2 +-
 sdks/java/io/elasticsearch/pom.xml                      | 2 +-
 sdks/java/io/google-cloud-platform/pom.xml              | 2 +-
 sdks/java/io/hadoop-common/pom.xml                      | 2 +-
 sdks/java/io/hadoop-file-system/pom.xml                 | 2 +-
 sdks/java/io/hadoop/input-format/pom.xml                | 2 +-
 sdks/java/io/hadoop/jdk1.8-tests/pom.xml                | 2 +-
 sdks/java/io/hadoop/pom.xml                             | 2 +-
 sdks/java/io/hbase/pom.xml                              | 2 +-
 sdks/java/io/hcatalog/pom.xml                           | 2 +-
 sdks/java/io/jdbc/pom.xml                               | 2 +-
 sdks/java/io/jms/pom.xml                                | 2 +-
 sdks/java/io/kafka/pom.xml                              | 2 +-
 sdks/java/io/kinesis/pom.xml                            | 2 +-
 sdks/java/io/mongodb/pom.xml                            | 2 +-
 sdks/java/io/mqtt/pom.xml                               | 2 +-
 sdks/java/io/pom.xml                                    | 2 +-
 sdks/java/io/xml/pom.xml                                | 2 +-
 sdks/java/java8tests/pom.xml                            | 2 +-
 sdks/java/javadoc/pom.xml                               | 2 +-
 sdks/java/maven-archetypes/examples-java8/pom.xml       | 2 +-
 sdks/java/maven-archetypes/examples/pom.xml             | 2 +-
 sdks/java/maven-archetypes/pom.xml                      | 2 +-
 sdks/java/maven-archetypes/starter/pom.xml              | 2 +-
 sdks/java/pom.xml                                       | 2 +-
 sdks/pom.xml                                            | 2 +-
 sdks/python/pom.xml                                     | 2 +-
 53 files changed, 54 insertions(+), 54 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/examples/java/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java/pom.xml b/examples/java/pom.xml
index 7ae4e6a..ae64a79 100644
--- a/examples/java/pom.xml
+++ b/examples/java/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-examples-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/examples/java8/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java8/pom.xml b/examples/java8/pom.xml
index a0ce708..6fd29a4 100644
--- a/examples/java8/pom.xml
+++ b/examples/java8/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-examples-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/examples/pom.xml
----------------------------------------------------------------------
diff --git a/examples/pom.xml b/examples/pom.xml
index a7e61dd..51f4c35 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index a5930ca..f5d4815 100644
--- a/pom.xml
+++ b/pom.xml
@@ -34,7 +34,7 @@
   <url>http://beam.apache.org/</url>
   <inceptionYear>2016</inceptionYear>
 
-  <version>2.1.0-SNAPSHOT</version>
+  <version>2.2.0-SNAPSHOT</version>
 
   <licenses>
     <license>
@@ -48,7 +48,7 @@
     <connection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</developerConnection>
     <url>https://git-wip-us.apache.org/repos/asf?p=beam.git;a=summary</url>
-    <tag>release-2.1.0</tag>
+    <tag>HEAD</tag>
   </scm>
 
   <issueManagement>

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/runners/apex/pom.xml
----------------------------------------------------------------------
diff --git a/runners/apex/pom.xml b/runners/apex/pom.xml
index 20f2d28..fd5aafb 100644
--- a/runners/apex/pom.xml
+++ b/runners/apex/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/runners/core-construction-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/pom.xml b/runners/core-construction-java/pom.xml
index 67951e9..b85b5f5 100644
--- a/runners/core-construction-java/pom.xml
+++ b/runners/core-construction-java/pom.xml
@@ -24,7 +24,7 @@
   <parent>
     <artifactId>beam-runners-parent</artifactId>
     <groupId>org.apache.beam</groupId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/runners/core-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/core-java/pom.xml b/runners/core-java/pom.xml
index c3a8d25..8c8e599 100644
--- a/runners/core-java/pom.xml
+++ b/runners/core-java/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/runners/direct-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/direct-java/pom.xml b/runners/direct-java/pom.xml
index 5b5aec2..0e1f73a 100644
--- a/runners/direct-java/pom.xml
+++ b/runners/direct-java/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/runners/flink/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/pom.xml b/runners/flink/pom.xml
index 339aa8e..c063a2d 100644
--- a/runners/flink/pom.xml
+++ b/runners/flink/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index 2ba163b..91908cd 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/runners/pom.xml
----------------------------------------------------------------------
diff --git a/runners/pom.xml b/runners/pom.xml
index 38aada8..b00ba9c 100644
--- a/runners/pom.xml
+++ b/runners/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/runners/spark/pom.xml
----------------------------------------------------------------------
diff --git a/runners/spark/pom.xml b/runners/spark/pom.xml
index 8a69496..7f70204 100644
--- a/runners/spark/pom.xml
+++ b/runners/spark/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-runners-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/common/fn-api/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/common/fn-api/pom.xml b/sdks/common/fn-api/pom.xml
index 77a9ba5..6810667 100644
--- a/sdks/common/fn-api/pom.xml
+++ b/sdks/common/fn-api/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-common-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/common/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/common/pom.xml b/sdks/common/pom.xml
index c621ed5..40eefa7 100644
--- a/sdks/common/pom.xml
+++ b/sdks/common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/common/runner-api/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/common/runner-api/pom.xml b/sdks/common/runner-api/pom.xml
index f5536a7..8bc4123 100644
--- a/sdks/common/runner-api/pom.xml
+++ b/sdks/common/runner-api/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-common-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/build-tools/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/build-tools/pom.xml b/sdks/java/build-tools/pom.xml
index 5a2c498..d7d25f6 100644
--- a/sdks/java/build-tools/pom.xml
+++ b/sdks/java/build-tools/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/core/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/core/pom.xml b/sdks/java/core/pom.xml
index 11b68e6..3f12dc4 100644
--- a/sdks/java/core/pom.xml
+++ b/sdks/java/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/extensions/google-cloud-platform-core/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/google-cloud-platform-core/pom.xml b/sdks/java/extensions/google-cloud-platform-core/pom.xml
index e4e951b..7d54990 100644
--- a/sdks/java/extensions/google-cloud-platform-core/pom.xml
+++ b/sdks/java/extensions/google-cloud-platform-core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-extensions-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/extensions/jackson/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/jackson/pom.xml b/sdks/java/extensions/jackson/pom.xml
index 4b09c11..7fd38e0 100644
--- a/sdks/java/extensions/jackson/pom.xml
+++ b/sdks/java/extensions/jackson/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-extensions-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/extensions/join-library/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/join-library/pom.xml b/sdks/java/extensions/join-library/pom.xml
index 556ec40..ea24b75 100644
--- a/sdks/java/extensions/join-library/pom.xml
+++ b/sdks/java/extensions/join-library/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-extensions-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/extensions/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/pom.xml b/sdks/java/extensions/pom.xml
index 3d63626..1222476 100644
--- a/sdks/java/extensions/pom.xml
+++ b/sdks/java/extensions/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/extensions/protobuf/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/protobuf/pom.xml b/sdks/java/extensions/protobuf/pom.xml
index ae909ab..63855f8 100644
--- a/sdks/java/extensions/protobuf/pom.xml
+++ b/sdks/java/extensions/protobuf/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-extensions-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/extensions/sorter/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/extensions/sorter/pom.xml b/sdks/java/extensions/sorter/pom.xml
index ac61f76..395c73f 100644
--- a/sdks/java/extensions/sorter/pom.xml
+++ b/sdks/java/extensions/sorter/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-extensions-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/harness/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/harness/pom.xml b/sdks/java/harness/pom.xml
index a35481d..9cfadc2 100644
--- a/sdks/java/harness/pom.xml
+++ b/sdks/java/harness/pom.xml
@@ -23,7 +23,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/amqp/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/amqp/pom.xml b/sdks/java/io/amqp/pom.xml
index 45b295d..8da9448 100644
--- a/sdks/java/io/amqp/pom.xml
+++ b/sdks/java/io/amqp/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/cassandra/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/cassandra/pom.xml b/sdks/java/io/cassandra/pom.xml
index 8249f57..c74477e 100644
--- a/sdks/java/io/cassandra/pom.xml
+++ b/sdks/java/io/cassandra/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/common/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/common/pom.xml b/sdks/java/io/common/pom.xml
index f7525fd..df0d94b 100644
--- a/sdks/java/io/common/pom.xml
+++ b/sdks/java/io/common/pom.xml
@@ -22,7 +22,7 @@
     <parent>
         <groupId>org.apache.beam</groupId>
         <artifactId>beam-sdks-java-io-parent</artifactId>
-        <version>2.1.0-SNAPSHOT</version>
+        <version>2.2.0-SNAPSHOT</version>
         <relativePath>../pom.xml</relativePath>
     </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/elasticsearch/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/elasticsearch/pom.xml b/sdks/java/io/elasticsearch/pom.xml
index c8e308c..e0a7f21 100644
--- a/sdks/java/io/elasticsearch/pom.xml
+++ b/sdks/java/io/elasticsearch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/google-cloud-platform/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/pom.xml b/sdks/java/io/google-cloud-platform/pom.xml
index 09a430a..a1495f2 100644
--- a/sdks/java/io/google-cloud-platform/pom.xml
+++ b/sdks/java/io/google-cloud-platform/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/hadoop-common/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-common/pom.xml b/sdks/java/io/hadoop-common/pom.xml
index 8749243..4bcbcd7 100644
--- a/sdks/java/io/hadoop-common/pom.xml
+++ b/sdks/java/io/hadoop-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/hadoop-file-system/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop-file-system/pom.xml b/sdks/java/io/hadoop-file-system/pom.xml
index a54977e..a9c2e57 100644
--- a/sdks/java/io/hadoop-file-system/pom.xml
+++ b/sdks/java/io/hadoop-file-system/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/hadoop/input-format/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/input-format/pom.xml b/sdks/java/io/hadoop/input-format/pom.xml
index 06f9f11..0953119 100644
--- a/sdks/java/io/hadoop/input-format/pom.xml
+++ b/sdks/java/io/hadoop/input-format/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-hadoop-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
   <artifactId>beam-sdks-java-io-hadoop-input-format</artifactId>

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/hadoop/jdk1.8-tests/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/jdk1.8-tests/pom.xml b/sdks/java/io/hadoop/jdk1.8-tests/pom.xml
index baaa982..12944f4 100644
--- a/sdks/java/io/hadoop/jdk1.8-tests/pom.xml
+++ b/sdks/java/io/hadoop/jdk1.8-tests/pom.xml
@@ -26,7 +26,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-hadoop-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
   <artifactId>beam-sdks-java-io-hadoop-jdk1.8-tests</artifactId>

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/hadoop/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hadoop/pom.xml b/sdks/java/io/hadoop/pom.xml
index a1c7a2e..bc3569d 100644
--- a/sdks/java/io/hadoop/pom.xml
+++ b/sdks/java/io/hadoop/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
   <packaging>pom</packaging>

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/hbase/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hbase/pom.xml b/sdks/java/io/hbase/pom.xml
index 40ac8df..40f516a 100644
--- a/sdks/java/io/hbase/pom.xml
+++ b/sdks/java/io/hbase/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/hcatalog/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/hcatalog/pom.xml b/sdks/java/io/hcatalog/pom.xml
index a31ff86..2aa661e 100644
--- a/sdks/java/io/hcatalog/pom.xml
+++ b/sdks/java/io/hcatalog/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/jdbc/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/jdbc/pom.xml b/sdks/java/io/jdbc/pom.xml
index 45ec06c..050fc6a 100644
--- a/sdks/java/io/jdbc/pom.xml
+++ b/sdks/java/io/jdbc/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/jms/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/jms/pom.xml b/sdks/java/io/jms/pom.xml
index 58009a1..c2074af 100644
--- a/sdks/java/io/jms/pom.xml
+++ b/sdks/java/io/jms/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/kafka/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/kafka/pom.xml b/sdks/java/io/kafka/pom.xml
index 29350cc..1256c46 100644
--- a/sdks/java/io/kafka/pom.xml
+++ b/sdks/java/io/kafka/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/kinesis/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/pom.xml b/sdks/java/io/kinesis/pom.xml
index cb7064b..46d5e26 100644
--- a/sdks/java/io/kinesis/pom.xml
+++ b/sdks/java/io/kinesis/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/mongodb/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/mongodb/pom.xml b/sdks/java/io/mongodb/pom.xml
index 912e20c..d93cc41 100644
--- a/sdks/java/io/mongodb/pom.xml
+++ b/sdks/java/io/mongodb/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/mqtt/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/mqtt/pom.xml b/sdks/java/io/mqtt/pom.xml
index baaf771..9fa1dc0 100644
--- a/sdks/java/io/mqtt/pom.xml
+++ b/sdks/java/io/mqtt/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/pom.xml b/sdks/java/io/pom.xml
index 458dfaf..b7909fa 100644
--- a/sdks/java/io/pom.xml
+++ b/sdks/java/io/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/io/xml/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/io/xml/pom.xml b/sdks/java/io/xml/pom.xml
index cf7dd33..7b5804e 100644
--- a/sdks/java/io/xml/pom.xml
+++ b/sdks/java/io/xml/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-io-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/java8tests/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/java8tests/pom.xml b/sdks/java/java8tests/pom.xml
index b90a757..2378014 100644
--- a/sdks/java/java8tests/pom.xml
+++ b/sdks/java/java8tests/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/javadoc/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/pom.xml b/sdks/java/javadoc/pom.xml
index 08d5ec6..ddb92cf 100644
--- a/sdks/java/javadoc/pom.xml
+++ b/sdks/java/javadoc/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/maven-archetypes/examples-java8/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples-java8/pom.xml b/sdks/java/maven-archetypes/examples-java8/pom.xml
index b57644d..b60a695 100644
--- a/sdks/java/maven-archetypes/examples-java8/pom.xml
+++ b/sdks/java/maven-archetypes/examples-java8/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-maven-archetypes-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/maven-archetypes/examples/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/examples/pom.xml b/sdks/java/maven-archetypes/examples/pom.xml
index c1378cb..2a02039 100644
--- a/sdks/java/maven-archetypes/examples/pom.xml
+++ b/sdks/java/maven-archetypes/examples/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-maven-archetypes-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/maven-archetypes/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/pom.xml b/sdks/java/maven-archetypes/pom.xml
index b7fe274..d676b31 100644
--- a/sdks/java/maven-archetypes/pom.xml
+++ b/sdks/java/maven-archetypes/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/maven-archetypes/starter/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/pom.xml b/sdks/java/maven-archetypes/starter/pom.xml
index 06b41c8..8024b52 100644
--- a/sdks/java/maven-archetypes/starter/pom.xml
+++ b/sdks/java/maven-archetypes/starter/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-java-maven-archetypes-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/java/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/pom.xml b/sdks/java/pom.xml
index 250c85a..3144193 100644
--- a/sdks/java/pom.xml
+++ b/sdks/java/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/pom.xml b/sdks/pom.xml
index 27b9610..aec8762 100644
--- a/sdks/pom.xml
+++ b/sdks/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/cd157519/sdks/python/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/python/pom.xml b/sdks/python/pom.xml
index 1295654..1077689 100644
--- a/sdks/python/pom.xml
+++ b/sdks/python/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.beam</groupId>
     <artifactId>beam-sdks-parent</artifactId>
-    <version>2.1.0-SNAPSHOT</version>
+    <version>2.2.0-SNAPSHOT</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 


[39/50] [abbrv] beam git commit: Fix null checks in TransformHierarchy

Posted by ta...@apache.org.
Fix null checks in TransformHierarchy


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/83f31e94
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/83f31e94
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/83f31e94

Branch: refs/heads/DSL_SQL
Commit: 83f31e942b5e106b21bc922d016c5840bf0b0a3a
Parents: fc06b79
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Jun 12 15:12:18 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:02 2017 -0700

----------------------------------------------------------------------
 .../org/apache/beam/sdk/runners/TransformHierarchy.java   | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/83f31e94/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/TransformHierarchy.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/TransformHierarchy.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/TransformHierarchy.java
index 6f1ee94..d8ff59e 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/TransformHierarchy.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/TransformHierarchy.java
@@ -145,14 +145,6 @@ public class TransformHierarchy {
       Node producerNode = getProducer(inputValue);
       PInput input = producerInput.remove(inputValue);
       inputValue.finishSpecifying(input, producerNode.getTransform());
-      checkState(
-          producers.get(inputValue) != null,
-          "Producer unknown for input %s",
-          inputValue);
-      checkState(
-          producers.get(inputValue) != null,
-          "Producer unknown for input %s",
-          inputValue);
     }
   }
 
@@ -201,7 +193,7 @@ public class TransformHierarchy {
   }
 
   Node getProducer(PValue produced) {
-    return producers.get(produced);
+    return checkNotNull(producers.get(produced), "No producer found for %s", produced);
   }
 
   public Set<PValue> visit(PipelineVisitor visitor) {


[07/50] [abbrv] beam git commit: Update Python SDK version

Posted by ta...@apache.org.
Update Python SDK version


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a32db077
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a32db077
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a32db077

Branch: refs/heads/DSL_SQL
Commit: a32db07733f84f3c395d1447ac3db66d04e68c4f
Parents: cd15751
Author: Jean-Baptiste Onofré <jb...@apache.org>
Authored: Wed Jul 5 16:52:48 2017 +0200
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:59 2017 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/a32db077/sdks/python/apache_beam/version.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/version.py b/sdks/python/apache_beam/version.py
index ae92a23..8b0a430 100644
--- a/sdks/python/apache_beam/version.py
+++ b/sdks/python/apache_beam/version.py
@@ -18,4 +18,4 @@
 """Apache Beam SDK version information and utilities."""
 
 
-__version__ = '2.1.0.dev'
+__version__ = '2.2.0.dev'


[34/50] [abbrv] beam git commit: Adds DynamicDestinations support to FileBasedSink

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextSink.java
index 511d697..b57b28c 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextSink.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextSink.java
@@ -34,27 +34,29 @@ import org.apache.beam.sdk.util.MimeTypes;
  * '\n'} represented in {@code UTF-8} format as the record separator. Each record (including the
  * last) is terminated.
  */
-class TextSink extends FileBasedSink<String> {
+class TextSink<UserT, DestinationT> extends FileBasedSink<String, DestinationT> {
   @Nullable private final String header;
   @Nullable private final String footer;
 
   TextSink(
       ValueProvider<ResourceId> baseOutputFilename,
-      FilenamePolicy filenamePolicy,
+      DynamicDestinations<UserT, DestinationT> dynamicDestinations,
       @Nullable String header,
       @Nullable String footer,
       WritableByteChannelFactory writableByteChannelFactory) {
-    super(baseOutputFilename, filenamePolicy, writableByteChannelFactory);
+    super(baseOutputFilename, dynamicDestinations, writableByteChannelFactory);
     this.header = header;
     this.footer = footer;
   }
+
   @Override
-  public WriteOperation<String> createWriteOperation() {
-    return new TextWriteOperation(this, header, footer);
+  public WriteOperation<String, DestinationT> createWriteOperation() {
+    return new TextWriteOperation<>(this, header, footer);
   }
 
   /** A {@link WriteOperation WriteOperation} for text files. */
-  private static class TextWriteOperation extends WriteOperation<String> {
+  private static class TextWriteOperation<DestinationT>
+      extends WriteOperation<String, DestinationT> {
     @Nullable private final String header;
     @Nullable private final String footer;
 
@@ -65,20 +67,20 @@ class TextSink extends FileBasedSink<String> {
     }
 
     @Override
-    public Writer<String> createWriter() throws Exception {
-      return new TextWriter(this, header, footer);
+    public Writer<String, DestinationT> createWriter() throws Exception {
+      return new TextWriter<>(this, header, footer);
     }
   }
 
   /** A {@link Writer Writer} for text files. */
-  private static class TextWriter extends Writer<String> {
+  private static class TextWriter<DestinationT> extends Writer<String, DestinationT> {
     private static final String NEWLINE = "\n";
     @Nullable private final String header;
     @Nullable private final String footer;
     private OutputStreamWriter out;
 
     public TextWriter(
-        WriteOperation<String> writeOperation,
+        WriteOperation<String, DestinationT> writeOperation,
         @Nullable String header,
         @Nullable String footer) {
       super(writeOperation, MimeTypes.TEXT);

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/io/WriteFiles.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/WriteFiles.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/WriteFiles.java
index a220eab..7013044 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/WriteFiles.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/WriteFiles.java
@@ -20,9 +20,12 @@ package org.apache.beam.sdk.io;
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 
+import com.google.common.base.Objects;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.hash.Hashing;
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.UUID;
@@ -30,8 +33,11 @@ import java.util.concurrent.ThreadLocalRandom;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
 import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.Coder.NonDeterministicException;
 import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ShardedKeyCoder;
 import org.apache.beam.sdk.coders.VarIntCoder;
 import org.apache.beam.sdk.coders.VoidCoder;
 import org.apache.beam.sdk.io.FileBasedSink.FileResult;
@@ -47,6 +53,7 @@ import org.apache.beam.sdk.transforms.Flatten;
 import org.apache.beam.sdk.transforms.GroupByKey;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.View;
 import org.apache.beam.sdk.transforms.WithKeys;
 import org.apache.beam.sdk.transforms.display.DisplayData;
@@ -55,6 +62,7 @@ import org.apache.beam.sdk.transforms.windowing.DefaultTrigger;
 import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
 import org.apache.beam.sdk.transforms.windowing.PaneInfo;
 import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.util.CoderUtils;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollection.IsBounded;
@@ -62,6 +70,7 @@ import org.apache.beam.sdk.values.PCollectionList;
 import org.apache.beam.sdk.values.PCollectionTuple;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.PDone;
+import org.apache.beam.sdk.values.ShardedKey;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.TupleTagList;
 import org.slf4j.Logger;
@@ -72,13 +81,12 @@ import org.slf4j.LoggerFactory;
  * global initialization of a sink, followed by a parallel write, and ends with a sequential
  * finalization of the write. The output of a write is {@link PDone}.
  *
- * <p>By default, every bundle in the input {@link PCollection} will be processed by a
- * {@link WriteOperation}, so the number of output
- * will vary based on runner behavior, though at least 1 output will always be produced. The
- * exact parallelism of the write stage can be controlled using {@link WriteFiles#withNumShards},
- * typically used to control how many files are produced or to globally limit the number of
- * workers connecting to an external service. However, this option can often hurt performance: it
- * adds an additional {@link GroupByKey} to the pipeline.
+ * <p>By default, every bundle in the input {@link PCollection} will be processed by a {@link
+ * WriteOperation}, so the number of output will vary based on runner behavior, though at least 1
+ * output will always be produced. The exact parallelism of the write stage can be controlled using
+ * {@link WriteFiles#withNumShards}, typically used to control how many files are produced or to
+ * globally limit the number of workers connecting to an external service. However, this option can
+ * often hurt performance: it adds an additional {@link GroupByKey} to the pipeline.
  *
  * <p>Example usage with runner-determined sharding:
  *
@@ -89,7 +97,8 @@ import org.slf4j.LoggerFactory;
  * <pre>{@code p.apply(WriteFiles.to(new MySink(...)).withNumShards(3));}</pre>
  */
 @Experimental(Experimental.Kind.SOURCE_SINK)
-public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
+public class WriteFiles<UserT, DestinationT, OutputT>
+    extends PTransform<PCollection<UserT>, PDone> {
   private static final Logger LOG = LoggerFactory.getLogger(WriteFiles.class);
 
   // The maximum number of file writers to keep open in a single bundle at a time, since file
@@ -105,12 +114,12 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
   private static final int SPILLED_RECORD_SHARDING_FACTOR = 10;
 
   static final int UNKNOWN_SHARDNUM = -1;
-  private FileBasedSink<T> sink;
-  private WriteOperation<T> writeOperation;
+  private FileBasedSink<OutputT, DestinationT> sink;
+  private SerializableFunction<UserT, OutputT> formatFunction;
+  private WriteOperation<OutputT, DestinationT> writeOperation;
   // This allows the number of shards to be dynamically computed based on the input
   // PCollection.
-  @Nullable
-  private final PTransform<PCollection<T>, PCollectionView<Integer>> computeNumShards;
+  @Nullable private final PTransform<PCollection<UserT>, PCollectionView<Integer>> computeNumShards;
   // We don't use a side input for static sharding, as we want this value to be updatable
   // when a pipeline is updated.
   @Nullable
@@ -122,19 +131,28 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
    * Creates a {@link WriteFiles} transform that writes to the given {@link FileBasedSink}, letting
    * the runner control how many different shards are produced.
    */
-  public static <T> WriteFiles<T> to(FileBasedSink<T> sink) {
+  public static <UserT, DestinationT, OutputT> WriteFiles<UserT, DestinationT, OutputT> to(
+      FileBasedSink<OutputT, DestinationT> sink,
+      SerializableFunction<UserT, OutputT> formatFunction) {
     checkNotNull(sink, "sink");
-    return new WriteFiles<>(sink, null /* runner-determined sharding */, null,
-        false, DEFAULT_MAX_NUM_WRITERS_PER_BUNDLE);
+    return new WriteFiles<>(
+        sink,
+        formatFunction,
+        null /* runner-determined sharding */,
+        null,
+        false,
+        DEFAULT_MAX_NUM_WRITERS_PER_BUNDLE);
   }
 
   private WriteFiles(
-      FileBasedSink<T> sink,
-      @Nullable PTransform<PCollection<T>, PCollectionView<Integer>> computeNumShards,
+      FileBasedSink<OutputT, DestinationT> sink,
+      SerializableFunction<UserT, OutputT> formatFunction,
+      @Nullable PTransform<PCollection<UserT>, PCollectionView<Integer>> computeNumShards,
       @Nullable ValueProvider<Integer> numShardsProvider,
       boolean windowedWrites,
       int maxNumWritersPerBundle) {
     this.sink = sink;
+    this.formatFunction = checkNotNull(formatFunction);
     this.computeNumShards = computeNumShards;
     this.numShardsProvider = numShardsProvider;
     this.windowedWrites = windowedWrites;
@@ -142,7 +160,7 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
   }
 
   @Override
-  public PDone expand(PCollection<T> input) {
+  public PDone expand(PCollection<UserT> input) {
     if (input.isBounded() == IsBounded.UNBOUNDED) {
       checkArgument(windowedWrites,
           "Must use windowed writes when applying %s to an unbounded PCollection",
@@ -181,13 +199,16 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
     }
   }
 
-  /**
-   * Returns the {@link FileBasedSink} associated with this PTransform.
-   */
-  public FileBasedSink<T> getSink() {
+  /** Returns the {@link FileBasedSink} associated with this PTransform. */
+  public FileBasedSink<OutputT, DestinationT> getSink() {
     return sink;
   }
 
+  /** Returns the the format function that maps the user type to the record written to files. */
+  public SerializableFunction<UserT, OutputT> getFormatFunction() {
+    return formatFunction;
+  }
+
   /**
    * Returns whether or not to perform windowed writes.
    */
@@ -202,7 +223,7 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
    * #withRunnerDeterminedSharding()}.
    */
   @Nullable
-  public PTransform<PCollection<T>, PCollectionView<Integer>> getSharding() {
+  public PTransform<PCollection<UserT>, PCollectionView<Integer>> getSharding() {
     return computeNumShards;
   }
 
@@ -220,7 +241,7 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
    * <p>A value less than or equal to 0 will be equivalent to the default behavior of
    * runner-determined sharding.
    */
-  public WriteFiles<T> withNumShards(int numShards) {
+  public WriteFiles<UserT, DestinationT, OutputT> withNumShards(int numShards) {
     if (numShards > 0) {
       return withNumShards(StaticValueProvider.of(numShards));
     }
@@ -234,16 +255,26 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
    * <p>This option should be used sparingly as it can hurt performance. See {@link WriteFiles} for
    * more information.
    */
-  public WriteFiles<T> withNumShards(ValueProvider<Integer> numShardsProvider) {
-    return new WriteFiles<>(sink, null, numShardsProvider, windowedWrites,
+  public WriteFiles<UserT, DestinationT, OutputT> withNumShards(
+      ValueProvider<Integer> numShardsProvider) {
+    return new WriteFiles<>(
+        sink,
+        formatFunction,
+        computeNumShards,
+        numShardsProvider,
+        windowedWrites,
         maxNumWritersPerBundle);
   }
 
-  /**
-   * Set the maximum number of writers created in a bundle before spilling to shuffle.
-   */
-  public WriteFiles<T> withMaxNumWritersPerBundle(int maxNumWritersPerBundle) {
-    return new WriteFiles<>(sink, null, numShardsProvider, windowedWrites,
+  /** Set the maximum number of writers created in a bundle before spilling to shuffle. */
+  public WriteFiles<UserT, DestinationT, OutputT> withMaxNumWritersPerBundle(
+      int maxNumWritersPerBundle) {
+    return new WriteFiles<>(
+        sink,
+        formatFunction,
+        computeNumShards,
+        numShardsProvider,
+        windowedWrites,
         maxNumWritersPerBundle);
   }
 
@@ -254,97 +285,167 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
    * <p>This option should be used sparingly as it can hurt performance. See {@link WriteFiles} for
    * more information.
    */
-  public WriteFiles<T> withSharding(PTransform<PCollection<T>, PCollectionView<Integer>> sharding) {
+  public WriteFiles<UserT, DestinationT, OutputT> withSharding(
+      PTransform<PCollection<UserT>, PCollectionView<Integer>> sharding) {
     checkNotNull(
         sharding, "Cannot provide null sharding. Use withRunnerDeterminedSharding() instead");
-    return new WriteFiles<>(sink, sharding, null, windowedWrites, maxNumWritersPerBundle);
+    return new WriteFiles<>(
+        sink, formatFunction, sharding, null, windowedWrites, maxNumWritersPerBundle);
   }
 
   /**
    * Returns a new {@link WriteFiles} that will write to the current {@link FileBasedSink} with
    * runner-determined sharding.
    */
-  public WriteFiles<T> withRunnerDeterminedSharding() {
-    return new WriteFiles<>(sink, null, null, windowedWrites, maxNumWritersPerBundle);
+  public WriteFiles<UserT, DestinationT, OutputT> withRunnerDeterminedSharding() {
+    return new WriteFiles<>(
+        sink, formatFunction, null, null, windowedWrites, maxNumWritersPerBundle);
   }
 
   /**
    * Returns a new {@link WriteFiles} that writes preserves windowing on it's input.
    *
-   * <p>If this option is not specified, windowing and triggering are replaced by
-   * {@link GlobalWindows} and {@link DefaultTrigger}.
+   * <p>If this option is not specified, windowing and triggering are replaced by {@link
+   * GlobalWindows} and {@link DefaultTrigger}.
    *
-   * <p>If there is no data for a window, no output shards will be generated for that window.
-   * If a window triggers multiple times, then more than a single output shard might be
-   * generated multiple times; it's up to the sink implementation to keep these output shards
-   * unique.
+   * <p>If there is no data for a window, no output shards will be generated for that window. If a
+   * window triggers multiple times, then more than a single output shard might be generated
+   * multiple times; it's up to the sink implementation to keep these output shards unique.
    *
-   * <p>This option can only be used if {@link #withNumShards(int)} is also set to a
-   * positive value.
+   * <p>This option can only be used if {@link #withNumShards(int)} is also set to a positive value.
    */
-  public WriteFiles<T> withWindowedWrites() {
-    return new WriteFiles<>(sink, computeNumShards, numShardsProvider, true,
-        maxNumWritersPerBundle);
+  public WriteFiles<UserT, DestinationT, OutputT> withWindowedWrites() {
+    return new WriteFiles<>(
+        sink, formatFunction, computeNumShards, numShardsProvider, true, maxNumWritersPerBundle);
+  }
+
+  private static class WriterKey<DestinationT> {
+    private final BoundedWindow window;
+    private final PaneInfo paneInfo;
+    private final DestinationT destination;
+
+    WriterKey(BoundedWindow window, PaneInfo paneInfo, DestinationT destination) {
+      this.window = window;
+      this.paneInfo = paneInfo;
+      this.destination = destination;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof WriterKey)) {
+        return false;
+      }
+      WriterKey other = (WriterKey) o;
+      return Objects.equal(window, other.window)
+          && Objects.equal(paneInfo, other.paneInfo)
+          && Objects.equal(destination, other.destination);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hashCode(window, paneInfo, destination);
+    }
+  }
+
+  // Hash the destination in a manner that we can then use as a key in a GBK. Since Java's
+  // hashCode isn't guaranteed to be stable across machines, we instead serialize the destination
+  // and use murmur3_32 to hash it. We enforce that destinationCoder must be deterministic, so
+  // this can be used as a key.
+  private static <DestinationT> int hashDestination(
+      DestinationT destination, Coder<DestinationT> destinationCoder) throws IOException {
+    return Hashing.murmur3_32()
+        .hashBytes(CoderUtils.encodeToByteArray(destinationCoder, destination))
+        .asInt();
   }
 
   /**
-   * Writes all the elements in a bundle using a {@link Writer} produced by the
-   * {@link WriteOperation} associated with the {@link FileBasedSink} with windowed writes enabled.
+   * Writes all the elements in a bundle using a {@link Writer} produced by the {@link
+   * WriteOperation} associated with the {@link FileBasedSink}.
    */
-  private class WriteWindowedBundles extends DoFn<T, FileResult> {
-    private final TupleTag<KV<Integer, T>> unwrittedRecordsTag;
-    private Map<KV<BoundedWindow, PaneInfo>, Writer<T>> windowedWriters;
-    int spilledShardNum = UNKNOWN_SHARDNUM;
-
-    WriteWindowedBundles(TupleTag<KV<Integer, T>> unwrittedRecordsTag) {
-      this.unwrittedRecordsTag = unwrittedRecordsTag;
+  private class WriteBundles extends DoFn<UserT, FileResult<DestinationT>> {
+    private final TupleTag<KV<ShardedKey<Integer>, UserT>> unwrittenRecordsTag;
+    private final Coder<DestinationT> destinationCoder;
+    private final boolean windowedWrites;
+
+    private Map<WriterKey<DestinationT>, Writer<OutputT, DestinationT>> writers;
+    private int spilledShardNum = UNKNOWN_SHARDNUM;
+
+    WriteBundles(
+        boolean windowedWrites,
+        TupleTag<KV<ShardedKey<Integer>, UserT>> unwrittenRecordsTag,
+        Coder<DestinationT> destinationCoder) {
+      this.windowedWrites = windowedWrites;
+      this.unwrittenRecordsTag = unwrittenRecordsTag;
+      this.destinationCoder = destinationCoder;
     }
 
     @StartBundle
     public void startBundle(StartBundleContext c) {
       // Reset state in case of reuse. We need to make sure that each bundle gets unique writers.
-      windowedWriters = Maps.newHashMap();
+      writers = Maps.newHashMap();
     }
 
     @ProcessElement
     public void processElement(ProcessContext c, BoundedWindow window) throws Exception {
       PaneInfo paneInfo = c.pane();
-      Writer<T> writer;
       // If we are doing windowed writes, we need to ensure that we have separate files for
-      // data in different windows/panes.
-      KV<BoundedWindow, PaneInfo> key = KV.of(window, paneInfo);
-      writer = windowedWriters.get(key);
+      // data in different windows/panes. Similar for dynamic writes, make sure that different
+      // destinations go to different writers.
+      // In the case of unwindowed writes, the window and the pane will always be the same, and
+      // the map will only have a single element.
+      DestinationT destination = sink.getDynamicDestinations().getDestination(c.element());
+      WriterKey<DestinationT> key = new WriterKey<>(window, c.pane(), destination);
+      Writer<OutputT, DestinationT> writer = writers.get(key);
       if (writer == null) {
-        if (windowedWriters.size() <= maxNumWritersPerBundle) {
+        if (writers.size() <= maxNumWritersPerBundle) {
           String uuid = UUID.randomUUID().toString();
           LOG.info(
-              "Opening writer {} for write operation {}, window {} pane {}",
+              "Opening writer {} for write operation {}, window {} pane {} destination {}",
               uuid,
               writeOperation,
               window,
-              paneInfo);
+              paneInfo,
+              destination);
           writer = writeOperation.createWriter();
-          writer.openWindowed(uuid, window, paneInfo, UNKNOWN_SHARDNUM);
-          windowedWriters.put(key, writer);
+          if (windowedWrites) {
+            writer.openWindowed(uuid, window, paneInfo, UNKNOWN_SHARDNUM, destination);
+          } else {
+            writer.openUnwindowed(uuid, UNKNOWN_SHARDNUM, destination);
+          }
+          writers.put(key, writer);
           LOG.debug("Done opening writer");
         } else {
           if (spilledShardNum == UNKNOWN_SHARDNUM) {
+            // Cache the random value so we only call ThreadLocalRandom once per DoFn instance.
             spilledShardNum = ThreadLocalRandom.current().nextInt(SPILLED_RECORD_SHARDING_FACTOR);
           } else {
             spilledShardNum = (spilledShardNum + 1) % SPILLED_RECORD_SHARDING_FACTOR;
           }
-          c.output(unwrittedRecordsTag, KV.of(spilledShardNum, c.element()));
+          c.output(
+              unwrittenRecordsTag,
+              KV.of(
+                  ShardedKey.of(hashDestination(destination, destinationCoder), spilledShardNum),
+                  c.element()));
           return;
         }
       }
-      writeOrClose(writer, c.element());
+      writeOrClose(writer, formatFunction.apply(c.element()));
     }
 
     @FinishBundle
     public void finishBundle(FinishBundleContext c) throws Exception {
-      for (Map.Entry<KV<BoundedWindow, PaneInfo>, Writer<T>> entry : windowedWriters.entrySet()) {
-        FileResult result = entry.getValue().close();
-        BoundedWindow window = entry.getKey().getKey();
+      for (Map.Entry<WriterKey<DestinationT>, Writer<OutputT, DestinationT>> entry :
+          writers.entrySet()) {
+        Writer<OutputT, DestinationT> writer = entry.getValue();
+        FileResult<DestinationT> result;
+        try {
+          result = writer.close();
+        } catch (Exception e) {
+          // If anything goes wrong, make sure to delete the temporary file.
+          writer.cleanup();
+          throw e;
+        }
+        BoundedWindow window = entry.getKey().window;
         c.output(result, window.maxTimestamp(), window);
       }
     }
@@ -355,90 +456,62 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
     }
   }
 
-  /**
-   * Writes all the elements in a bundle using a {@link Writer} produced by the
-   * {@link WriteOperation} associated with the {@link FileBasedSink} with windowed writes disabled.
-   */
-  private class WriteUnwindowedBundles extends DoFn<T, FileResult> {
-    // Writer that will write the records in this bundle. Lazily
-    // initialized in processElement.
-    private Writer<T> writer = null;
-    private BoundedWindow window = null;
-
-    @StartBundle
-    public void startBundle(StartBundleContext c) {
-      // Reset state in case of reuse. We need to make sure that each bundle gets unique writers.
-      writer = null;
-    }
-
-    @ProcessElement
-    public void processElement(ProcessContext c, BoundedWindow window) throws Exception {
-      // Cache a single writer for the bundle.
-      if (writer == null) {
-        LOG.info("Opening writer for write operation {}", writeOperation);
-        writer = writeOperation.createWriter();
-        writer.openUnwindowed(UUID.randomUUID().toString(), UNKNOWN_SHARDNUM);
-        LOG.debug("Done opening writer");
-      }
-      this.window = window;
-      writeOrClose(this.writer, c.element());
-    }
+  enum ShardAssignment { ASSIGN_IN_FINALIZE, ASSIGN_WHEN_WRITING }
 
-    @FinishBundle
-    public void finishBundle(FinishBundleContext c) throws Exception {
-      if (writer == null) {
-        return;
-      }
-      FileResult result = writer.close();
-      c.output(result, window.maxTimestamp(), window);
-    }
-
-    @Override
-    public void populateDisplayData(DisplayData.Builder builder) {
-      builder.delegate(WriteFiles.this);
-    }
-  }
-
-  enum ShardAssignment { ASSIGN_IN_FINALIZE, ASSIGN_WHEN_WRITING };
-
-  /**
-   * Like {@link WriteWindowedBundles} and {@link WriteUnwindowedBundles}, but where the elements
-   * for each shard have been collected into a single iterable.
+  /*
+   * Like {@link WriteBundles}, but where the elements for each shard have been collected into a
+   * single iterable.
    */
-  private class WriteShardedBundles extends DoFn<KV<Integer, Iterable<T>>, FileResult> {
+  private class WriteShardedBundles
+      extends DoFn<KV<ShardedKey<Integer>, Iterable<UserT>>, FileResult<DestinationT>> {
     ShardAssignment shardNumberAssignment;
     WriteShardedBundles(ShardAssignment shardNumberAssignment) {
       this.shardNumberAssignment = shardNumberAssignment;
     }
+
     @ProcessElement
     public void processElement(ProcessContext c, BoundedWindow window) throws Exception {
-      // In a sharded write, single input element represents one shard. We can open and close
-      // the writer in each call to processElement.
-      LOG.info("Opening writer for write operation {}", writeOperation);
-      Writer<T> writer = writeOperation.createWriter();
-      if (windowedWrites) {
-        int shardNumber = shardNumberAssignment == ShardAssignment.ASSIGN_WHEN_WRITING
-            ? c.element().getKey() : UNKNOWN_SHARDNUM;
-        writer.openWindowed(UUID.randomUUID().toString(), window, c.pane(), shardNumber);
-      } else {
-        writer.openUnwindowed(UUID.randomUUID().toString(), UNKNOWN_SHARDNUM);
-      }
-      LOG.debug("Done opening writer");
-
-      try {
-        for (T t : c.element().getValue()) {
-          writeOrClose(writer, t);
+      // Since we key by a 32-bit hash of the destination, there might be multiple destinations
+      // in this iterable. The number of destinations is generally very small (1000s or less), so
+      // there will rarely be hash collisions.
+      Map<DestinationT, Writer<OutputT, DestinationT>> writers = Maps.newHashMap();
+      for (UserT input : c.element().getValue()) {
+        DestinationT destination = sink.getDynamicDestinations().getDestination(input);
+        Writer<OutputT, DestinationT> writer = writers.get(destination);
+        if (writer == null) {
+          LOG.debug("Opening writer for write operation {}", writeOperation);
+          writer = writeOperation.createWriter();
+          if (windowedWrites) {
+            int shardNumber =
+                shardNumberAssignment == ShardAssignment.ASSIGN_WHEN_WRITING
+                    ? c.element().getKey().getShardNumber()
+                    : UNKNOWN_SHARDNUM;
+            writer.openWindowed(
+                UUID.randomUUID().toString(), window, c.pane(), shardNumber, destination);
+          } else {
+            writer.openUnwindowed(UUID.randomUUID().toString(), UNKNOWN_SHARDNUM, destination);
+          }
+          LOG.debug("Done opening writer");
+          writers.put(destination, writer);
+        }
+        writeOrClose(writer, formatFunction.apply(input));
         }
 
-        // Close the writer; if this throws let the error propagate.
-        FileResult result = writer.close();
-        c.output(result);
-      } catch (Exception e) {
-        // If anything goes wrong, make sure to delete the temporary file.
-        writer.cleanup();
-        throw e;
+      // Close all writers.
+      for (Map.Entry<DestinationT, Writer<OutputT, DestinationT>> entry : writers.entrySet()) {
+        Writer<OutputT, DestinationT> writer = entry.getValue();
+        FileResult<DestinationT> result;
+        try {
+          // Close the writer; if this throws let the error propagate.
+          result = writer.close();
+          c.output(result);
+        } catch (Exception e) {
+          // If anything goes wrong, make sure to delete the temporary file.
+          writer.cleanup();
+          throw e;
+        }
+      }
       }
-    }
 
     @Override
     public void populateDisplayData(DisplayData.Builder builder) {
@@ -446,12 +519,15 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
     }
   }
 
-  private static <T> void writeOrClose(Writer<T> writer, T t) throws Exception {
+  private static <OutputT, DestinationT> void writeOrClose(
+      Writer<OutputT, DestinationT> writer, OutputT t) throws Exception {
     try {
       writer.write(t);
     } catch (Exception e) {
       try {
         writer.close();
+        // If anything goes wrong, make sure to delete the temporary file.
+        writer.cleanup();
       } catch (Exception closeException) {
         if (closeException instanceof InterruptedException) {
           // Do not silently ignore interrupted state.
@@ -464,20 +540,25 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
     }
   }
 
-  private static class ApplyShardingKey<T> extends DoFn<T, KV<Integer, T>> {
+  private class ApplyShardingKey extends DoFn<UserT, KV<ShardedKey<Integer>, UserT>> {
     private final PCollectionView<Integer> numShardsView;
     private final ValueProvider<Integer> numShardsProvider;
+    private final Coder<DestinationT> destinationCoder;
+
     private int shardNumber;
 
-    ApplyShardingKey(PCollectionView<Integer> numShardsView,
-                     ValueProvider<Integer> numShardsProvider) {
+    ApplyShardingKey(
+        PCollectionView<Integer> numShardsView,
+        ValueProvider<Integer> numShardsProvider,
+        Coder<DestinationT> destinationCoder) {
+      this.destinationCoder = destinationCoder;
       this.numShardsView = numShardsView;
       this.numShardsProvider = numShardsProvider;
       shardNumber = UNKNOWN_SHARDNUM;
     }
 
     @ProcessElement
-    public void processElement(ProcessContext context) {
+    public void processElement(ProcessContext context) throws IOException {
       final int shardCount;
       if (numShardsView != null) {
         shardCount = context.sideInput(numShardsView);
@@ -497,86 +578,110 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
       } else {
         shardNumber = (shardNumber + 1) % shardCount;
       }
-      context.output(KV.of(shardNumber, context.element()));
+      // We avoid using destination itself as a sharding key, because destination is often large.
+      // e.g. when using {@link DefaultFilenamePolicy}, the destination contains the entire path
+      // to the file. Often most of the path is constant across all destinations, just the path
+      // suffix is appended by the destination function. Instead we key by a 32-bit hash (carefully
+      // chosen to be guaranteed stable), and call getDestination again in the next ParDo to resolve
+      // the destinations. This does mean that multiple destinations might end up on the same shard,
+      // however the number of collisions should be small, so there's no need to worry about memory
+      // issues.
+      DestinationT destination = sink.getDynamicDestinations().getDestination(context.element());
+      context.output(
+          KV.of(
+              ShardedKey.of(hashDestination(destination, destinationCoder), shardNumber),
+              context.element()));
     }
   }
 
   /**
    * A write is performed as sequence of three {@link ParDo}'s.
    *
-   * <p>This singleton collection containing the WriteOperation is then used as a side
-   * input to a ParDo over the PCollection of elements to write. In this bundle-writing phase,
-   * {@link WriteOperation#createWriter} is called to obtain a {@link Writer}.
-   * {@link Writer#open} and {@link Writer#close} are called in
-   * {@link DoFn.StartBundle} and {@link DoFn.FinishBundle}, respectively, and
-   * {@link Writer#write} method is called for every element in the bundle. The output
-   * of this ParDo is a PCollection of <i>writer result</i> objects (see {@link FileBasedSink}
-   * for a description of writer results)-one for each bundle.
+   * <p>This singleton collection containing the WriteOperation is then used as a side input to a
+   * ParDo over the PCollection of elements to write. In this bundle-writing phase, {@link
+   * WriteOperation#createWriter} is called to obtain a {@link Writer}. {@link Writer#open} and
+   * {@link Writer#close} are called in {@link DoFn.StartBundle} and {@link DoFn.FinishBundle},
+   * respectively, and {@link Writer#write} method is called for every element in the bundle. The
+   * output of this ParDo is a PCollection of <i>writer result</i> objects (see {@link
+   * FileBasedSink} for a description of writer results)-one for each bundle.
    *
    * <p>The final do-once ParDo uses a singleton collection asinput and the collection of writer
-   * results as a side-input. In this ParDo, {@link WriteOperation#finalize} is called
-   * to finalize the write.
+   * results as a side-input. In this ParDo, {@link WriteOperation#finalize} is called to finalize
+   * the write.
    *
-   * <p>If the write of any element in the PCollection fails, {@link Writer#close} will be
-   * called before the exception that caused the write to fail is propagated and the write result
-   * will be discarded.
+   * <p>If the write of any element in the PCollection fails, {@link Writer#close} will be called
+   * before the exception that caused the write to fail is propagated and the write result will be
+   * discarded.
    *
    * <p>Since the {@link WriteOperation} is serialized after the initialization ParDo and
    * deserialized in the bundle-writing and finalization phases, any state change to the
-   * WriteOperation object that occurs during initialization is visible in the latter
-   * phases. However, the WriteOperation is not serialized after the bundle-writing
-   * phase. This is why implementations should guarantee that
-   * {@link WriteOperation#createWriter} does not mutate WriteOperation).
+   * WriteOperation object that occurs during initialization is visible in the latter phases.
+   * However, the WriteOperation is not serialized after the bundle-writing phase. This is why
+   * implementations should guarantee that {@link WriteOperation#createWriter} does not mutate
+   * WriteOperation).
    */
-  private PDone createWrite(PCollection<T> input) {
+  private PDone createWrite(PCollection<UserT> input) {
     Pipeline p = input.getPipeline();
 
     if (!windowedWrites) {
       // Re-window the data into the global window and remove any existing triggers.
       input =
           input.apply(
-              Window.<T>into(new GlobalWindows())
+              Window.<UserT>into(new GlobalWindows())
                   .triggering(DefaultTrigger.of())
                   .discardingFiredPanes());
     }
 
-
     // Perform the per-bundle writes as a ParDo on the input PCollection (with the
     // WriteOperation as a side input) and collect the results of the writes in a
     // PCollection. There is a dependency between this ParDo and the first (the
     // WriteOperation PCollection as a side input), so this will happen after the
     // initial ParDo.
-    PCollection<FileResult> results;
+    PCollection<FileResult<DestinationT>> results;
     final PCollectionView<Integer> numShardsView;
     @SuppressWarnings("unchecked")
     Coder<BoundedWindow> shardedWindowCoder =
         (Coder<BoundedWindow>) input.getWindowingStrategy().getWindowFn().windowCoder();
+    final Coder<DestinationT> destinationCoder;
+    try {
+      destinationCoder =
+          sink.getDynamicDestinations()
+              .getDestinationCoderWithDefault(input.getPipeline().getCoderRegistry());
+      destinationCoder.verifyDeterministic();
+    } catch (CannotProvideCoderException | NonDeterministicException e) {
+      throw new RuntimeException(e);
+    }
+
     if (computeNumShards == null && numShardsProvider == null) {
       numShardsView = null;
-      if (windowedWrites) {
-        TupleTag<FileResult> writtenRecordsTag = new TupleTag<>("writtenRecordsTag");
-        TupleTag<KV<Integer, T>> unwrittedRecordsTag = new TupleTag<>("unwrittenRecordsTag");
-        PCollectionTuple writeTuple = input.apply("WriteWindowedBundles", ParDo.of(
-            new WriteWindowedBundles(unwrittedRecordsTag))
-            .withOutputTags(writtenRecordsTag, TupleTagList.of(unwrittedRecordsTag)));
-        PCollection<FileResult> writtenBundleFiles = writeTuple.get(writtenRecordsTag)
-            .setCoder(FileResultCoder.of(shardedWindowCoder));
-        // Any "spilled" elements are written using WriteShardedBundles. Assign shard numbers in
-        // finalize to stay consistent with what WriteWindowedBundles does.
-        PCollection<FileResult> writtenGroupedFiles =
-            writeTuple
-                .get(unwrittedRecordsTag)
-                .setCoder(KvCoder.of(VarIntCoder.of(), input.getCoder()))
-                .apply("GroupUnwritten", GroupByKey.<Integer, T>create())
-                .apply("WriteUnwritten", ParDo.of(
-                    new WriteShardedBundles(ShardAssignment.ASSIGN_IN_FINALIZE)))
-                .setCoder(FileResultCoder.of(shardedWindowCoder));
-        results = PCollectionList.of(writtenBundleFiles).and(writtenGroupedFiles)
-            .apply(Flatten.<FileResult>pCollections());
-      } else {
-        results =
-            input.apply("WriteUnwindowedBundles", ParDo.of(new WriteUnwindowedBundles()));
-      }
+      TupleTag<FileResult<DestinationT>> writtenRecordsTag = new TupleTag<>("writtenRecordsTag");
+      TupleTag<KV<ShardedKey<Integer>, UserT>> unwrittedRecordsTag =
+          new TupleTag<>("unwrittenRecordsTag");
+      String writeName = windowedWrites ? "WriteWindowedBundles" : "WriteBundles";
+      PCollectionTuple writeTuple =
+          input.apply(
+              writeName,
+              ParDo.of(new WriteBundles(windowedWrites, unwrittedRecordsTag, destinationCoder))
+                  .withOutputTags(writtenRecordsTag, TupleTagList.of(unwrittedRecordsTag)));
+      PCollection<FileResult<DestinationT>> writtenBundleFiles =
+          writeTuple
+              .get(writtenRecordsTag)
+              .setCoder(FileResultCoder.of(shardedWindowCoder, destinationCoder));
+      // Any "spilled" elements are written using WriteShardedBundles. Assign shard numbers in
+      // finalize to stay consistent with what WriteWindowedBundles does.
+      PCollection<FileResult<DestinationT>> writtenGroupedFiles =
+          writeTuple
+              .get(unwrittedRecordsTag)
+              .setCoder(KvCoder.of(ShardedKeyCoder.of(VarIntCoder.of()), input.getCoder()))
+              .apply("GroupUnwritten", GroupByKey.<ShardedKey<Integer>, UserT>create())
+              .apply(
+                  "WriteUnwritten",
+                  ParDo.of(new WriteShardedBundles(ShardAssignment.ASSIGN_IN_FINALIZE)))
+              .setCoder(FileResultCoder.of(shardedWindowCoder, destinationCoder));
+      results =
+          PCollectionList.of(writtenBundleFiles)
+              .and(writtenGroupedFiles)
+              .apply(Flatten.<FileResult<DestinationT>>pCollections());
     } else {
       List<PCollectionView<?>> sideInputs = Lists.newArrayList();
       if (computeNumShards != null) {
@@ -585,23 +690,31 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
       } else {
         numShardsView = null;
       }
-
-      PCollection<KV<Integer, Iterable<T>>> sharded =
+      PCollection<KV<ShardedKey<Integer>, Iterable<UserT>>> sharded =
           input
-              .apply("ApplyShardLabel", ParDo.of(
-                  new ApplyShardingKey<T>(numShardsView,
-                      (numShardsView != null) ? null : numShardsProvider))
-                  .withSideInputs(sideInputs))
-              .apply("GroupIntoShards", GroupByKey.<Integer, T>create());
+              .apply(
+                  "ApplyShardLabel",
+                  ParDo.of(
+                          new ApplyShardingKey(
+                              numShardsView,
+                              (numShardsView != null) ? null : numShardsProvider,
+                              destinationCoder))
+                      .withSideInputs(sideInputs))
+              .setCoder(KvCoder.of(ShardedKeyCoder.of(VarIntCoder.of()), input.getCoder()))
+              .apply("GroupIntoShards", GroupByKey.<ShardedKey<Integer>, UserT>create());
+      shardedWindowCoder =
+          (Coder<BoundedWindow>) sharded.getWindowingStrategy().getWindowFn().windowCoder();
       // Since this path might be used by streaming runners processing triggers, it's important
       // to assign shard numbers here so that they are deterministic. The ASSIGN_IN_FINALIZE
       // strategy works by sorting all FileResult objects and assigning them numbers, which is not
       // guaranteed to work well when processing triggers - if the finalize step retries it might
       // see a different Iterable of FileResult objects, and it will assign different shard numbers.
-      results = sharded.apply("WriteShardedBundles",
-          ParDo.of(new WriteShardedBundles(ShardAssignment.ASSIGN_WHEN_WRITING)));
+      results =
+          sharded.apply(
+              "WriteShardedBundles",
+              ParDo.of(new WriteShardedBundles(ShardAssignment.ASSIGN_WHEN_WRITING)));
     }
-    results.setCoder(FileResultCoder.of(shardedWindowCoder));
+    results.setCoder(FileResultCoder.of(shardedWindowCoder, destinationCoder));
 
     if (windowedWrites) {
       // When processing streaming windowed writes, results will arrive multiple times. This
@@ -609,26 +722,31 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
       // as new data arriving into a side input does not trigger the listening DoFn. Instead
       // we aggregate the result set using a singleton GroupByKey, so the DoFn will be triggered
       // whenever new data arrives.
-      PCollection<KV<Void, FileResult>> keyedResults =
-          results.apply("AttachSingletonKey", WithKeys.<Void, FileResult>of((Void) null));
-      keyedResults.setCoder(KvCoder.of(VoidCoder.of(),
-          FileResultCoder.of(shardedWindowCoder)));
+      PCollection<KV<Void, FileResult<DestinationT>>> keyedResults =
+          results.apply(
+              "AttachSingletonKey", WithKeys.<Void, FileResult<DestinationT>>of((Void) null));
+      keyedResults.setCoder(
+          KvCoder.of(VoidCoder.of(), FileResultCoder.of(shardedWindowCoder, destinationCoder)));
 
       // Is the continuation trigger sufficient?
       keyedResults
-          .apply("FinalizeGroupByKey", GroupByKey.<Void, FileResult>create())
-          .apply("Finalize", ParDo.of(new DoFn<KV<Void, Iterable<FileResult>>, Integer>() {
-            @ProcessElement
-            public void processElement(ProcessContext c) throws Exception {
-              LOG.info("Finalizing write operation {}.", writeOperation);
-              List<FileResult> results = Lists.newArrayList(c.element().getValue());
-              writeOperation.finalize(results);
-              LOG.debug("Done finalizing write operation");
-            }
-          }));
+          .apply("FinalizeGroupByKey", GroupByKey.<Void, FileResult<DestinationT>>create())
+          .apply(
+              "Finalize",
+              ParDo.of(
+                  new DoFn<KV<Void, Iterable<FileResult<DestinationT>>>, Integer>() {
+                    @ProcessElement
+                    public void processElement(ProcessContext c) throws Exception {
+                      LOG.info("Finalizing write operation {}.", writeOperation);
+                      List<FileResult<DestinationT>> results =
+                          Lists.newArrayList(c.element().getValue());
+                      writeOperation.finalize(results);
+                      LOG.debug("Done finalizing write operation");
+                    }
+                  }));
     } else {
-      final PCollectionView<Iterable<FileResult>> resultsView =
-          results.apply(View.<FileResult>asIterable());
+      final PCollectionView<Iterable<FileResult<DestinationT>>> resultsView =
+          results.apply(View.<FileResult<DestinationT>>asIterable());
       ImmutableList.Builder<PCollectionView<?>> sideInputs =
           ImmutableList.<PCollectionView<?>>builder().add(resultsView);
       if (numShardsView != null) {
@@ -644,41 +762,53 @@ public class WriteFiles<T> extends PTransform<PCollection<T>, PDone> {
       // set numShards, then all shards will be written out as empty files. For this reason we
       // use a side input here.
       PCollection<Void> singletonCollection = p.apply(Create.of((Void) null));
-      singletonCollection
-          .apply("Finalize", ParDo.of(new DoFn<Void, Integer>() {
-            @ProcessElement
-            public void processElement(ProcessContext c) throws Exception {
-              LOG.info("Finalizing write operation {}.", writeOperation);
-              List<FileResult> results = Lists.newArrayList(c.sideInput(resultsView));
-              LOG.debug("Side input initialized to finalize write operation {}.", writeOperation);
-
-              // We must always output at least 1 shard, and honor user-specified numShards if
-              // set.
-              int minShardsNeeded;
-              if (numShardsView != null) {
-                minShardsNeeded = c.sideInput(numShardsView);
-              } else if (numShardsProvider != null) {
-                minShardsNeeded = numShardsProvider.get();
-              } else {
-                minShardsNeeded = 1;
-              }
-              int extraShardsNeeded = minShardsNeeded - results.size();
-              if (extraShardsNeeded > 0) {
-                LOG.info(
-                    "Creating {} empty output shards in addition to {} written for a total of {}.",
-                    extraShardsNeeded, results.size(), minShardsNeeded);
-                for (int i = 0; i < extraShardsNeeded; ++i) {
-                  Writer<T> writer = writeOperation.createWriter();
-                  writer.openUnwindowed(UUID.randomUUID().toString(), UNKNOWN_SHARDNUM);
-                  FileResult emptyWrite = writer.close();
-                  results.add(emptyWrite);
-                }
-                LOG.debug("Done creating extra shards.");
-              }
-              writeOperation.finalize(results);
-              LOG.debug("Done finalizing write operation {}", writeOperation);
-            }
-          }).withSideInputs(sideInputs.build()));
+      singletonCollection.apply(
+          "Finalize",
+          ParDo.of(
+                  new DoFn<Void, Integer>() {
+                    @ProcessElement
+                    public void processElement(ProcessContext c) throws Exception {
+                      LOG.info("Finalizing write operation {}.", writeOperation);
+                      List<FileResult<DestinationT>> results =
+                          Lists.newArrayList(c.sideInput(resultsView));
+                      LOG.debug(
+                          "Side input initialized to finalize write operation {}.", writeOperation);
+
+                      // We must always output at least 1 shard, and honor user-specified numShards
+                      // if
+                      // set.
+                      int minShardsNeeded;
+                      if (numShardsView != null) {
+                        minShardsNeeded = c.sideInput(numShardsView);
+                      } else if (numShardsProvider != null) {
+                        minShardsNeeded = numShardsProvider.get();
+                      } else {
+                        minShardsNeeded = 1;
+                      }
+                      int extraShardsNeeded = minShardsNeeded - results.size();
+                      if (extraShardsNeeded > 0) {
+                        LOG.info(
+                            "Creating {} empty output shards in addition to {} written "
+                                + "for a total of {}.",
+                            extraShardsNeeded,
+                            results.size(),
+                            minShardsNeeded);
+                        for (int i = 0; i < extraShardsNeeded; ++i) {
+                          Writer<OutputT, DestinationT> writer = writeOperation.createWriter();
+                          writer.openUnwindowed(
+                              UUID.randomUUID().toString(),
+                              UNKNOWN_SHARDNUM,
+                              sink.getDynamicDestinations().getDefaultDestination());
+                          FileResult<DestinationT> emptyWrite = writer.close();
+                          results.add(emptyWrite);
+                        }
+                        LOG.debug("Done creating extra shards.");
+                      }
+                      writeOperation.finalize(results);
+                      LOG.debug("Done finalizing write operation {}", writeOperation);
+                    }
+                  })
+              .withSideInputs(sideInputs.build()));
     }
     return PDone.in(input.getPipeline());
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/SerializableFunctions.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/SerializableFunctions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/SerializableFunctions.java
new file mode 100644
index 0000000..d057d81
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/SerializableFunctions.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.sdk.transforms;
+
+/** Useful {@link SerializableFunction} overrides. */
+public class SerializableFunctions {
+  private static class Identity<T> implements SerializableFunction<T, T> {
+    @Override
+    public T apply(T input) {
+      return input;
+    }
+  }
+
+  private static class Constant<InT, OutT> implements SerializableFunction<InT, OutT> {
+    OutT value;
+
+    Constant(OutT value) {
+      this.value = value;
+    }
+
+    @Override
+    public OutT apply(InT input) {
+      return value;
+    }
+  }
+
+  public static <T> SerializableFunction<T, T> identity() {
+    return new Identity<>();
+  }
+
+  public static <InT, OutT> SerializableFunction<InT, OutT> constant(OutT value) {
+    return new Constant<>(value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/values/ShardedKey.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/ShardedKey.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/ShardedKey.java
new file mode 100644
index 0000000..e56af13
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/ShardedKey.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.sdk.values;
+
+import java.io.Serializable;
+import java.util.Objects;
+
+/** A key and a shard number. */
+public class ShardedKey<K> implements Serializable {
+  private static final long serialVersionUID = 1L;
+  private final K key;
+  private final int shardNumber;
+
+  public static <K> ShardedKey<K> of(K key, int shardNumber) {
+    return new ShardedKey<>(key, shardNumber);
+  }
+
+  private ShardedKey(K key, int shardNumber) {
+    this.key = key;
+    this.shardNumber = shardNumber;
+  }
+
+  public K getKey() {
+    return key;
+  }
+
+  public int getShardNumber() {
+    return shardNumber;
+  }
+
+  @Override
+  public String toString() {
+    return "key: " + key + " shard: " + shardNumber;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof ShardedKey)) {
+      return false;
+    }
+    ShardedKey<K> other = (ShardedKey<K>) o;
+    return Objects.equals(key, other.key) && Objects.equals(shardNumber, other.shardNumber);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(key, shardNumber);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/test/java/org/apache/beam/sdk/io/AvroIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/AvroIOTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/AvroIOTest.java
index 6d01d32..260e47a 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/AvroIOTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/AvroIOTest.java
@@ -54,10 +54,11 @@ import org.apache.avro.reflect.ReflectData;
 import org.apache.avro.reflect.ReflectDatumReader;
 import org.apache.beam.sdk.coders.AvroCoder;
 import org.apache.beam.sdk.coders.DefaultCoder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
+import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints;
 import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
 import org.apache.beam.sdk.io.fs.ResourceId;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.testing.NeedsRunner;
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.TestPipeline;
@@ -276,37 +277,42 @@ public class AvroIOTest {
   }
 
   private static class WindowedFilenamePolicy extends FilenamePolicy {
-    final String outputFilePrefix;
+    final ResourceId outputFilePrefix;
 
-    WindowedFilenamePolicy(String outputFilePrefix) {
+    WindowedFilenamePolicy(ResourceId outputFilePrefix) {
       this.outputFilePrefix = outputFilePrefix;
     }
 
     @Override
-    public ResourceId windowedFilename(
-        ResourceId outputDirectory, WindowedContext input, String extension) {
-      String filename = String.format(
-          "%s-%s-%s-of-%s-pane-%s%s%s",
-          outputFilePrefix,
-          input.getWindow(),
-          input.getShardNumber(),
-          input.getNumShards() - 1,
-          input.getPaneInfo().getIndex(),
-          input.getPaneInfo().isLast() ? "-final" : "",
-          extension);
-      return outputDirectory.resolve(filename, StandardResolveOptions.RESOLVE_FILE);
+    public ResourceId windowedFilename(WindowedContext input, OutputFileHints outputFileHints) {
+      String filenamePrefix =
+          outputFilePrefix.isDirectory() ? "" : firstNonNull(outputFilePrefix.getFilename(), "");
+
+      String filename =
+          String.format(
+              "%s-%s-%s-of-%s-pane-%s%s%s",
+              filenamePrefix,
+              input.getWindow(),
+              input.getShardNumber(),
+              input.getNumShards() - 1,
+              input.getPaneInfo().getIndex(),
+              input.getPaneInfo().isLast() ? "-final" : "",
+              outputFileHints.getSuggestedFilenameSuffix());
+      return outputFilePrefix
+          .getCurrentDirectory()
+          .resolve(filename, StandardResolveOptions.RESOLVE_FILE);
     }
 
     @Override
-    public ResourceId unwindowedFilename(
-        ResourceId outputDirectory, Context input, String extension) {
+    public ResourceId unwindowedFilename(Context input, OutputFileHints outputFileHints) {
       throw new UnsupportedOperationException("Expecting windowed outputs only");
     }
 
     @Override
     public void populateDisplayData(DisplayData.Builder builder) {
-      builder.add(DisplayData.item("fileNamePrefix", outputFilePrefix)
-          .withLabel("File Name Prefix"));
+      builder.add(
+          DisplayData.item("fileNamePrefix", outputFilePrefix.toString())
+              .withLabel("File Name Prefix"));
     }
   }
 
@@ -359,15 +365,18 @@ public class AvroIOTest {
         Arrays.copyOfRange(secondWindowArray, 1, secondWindowArray.length))
         .advanceWatermarkToInfinity();
 
-    FilenamePolicy policy = new WindowedFilenamePolicy(baseFilename);
+    FilenamePolicy policy =
+        new WindowedFilenamePolicy(FileBasedSink.convertToFileResourceIfPossible(baseFilename));
     windowedAvroWritePipeline
         .apply(values)
         .apply(Window.<GenericClass>into(FixedWindows.of(Duration.standardMinutes(1))))
-        .apply(AvroIO.write(GenericClass.class)
-            .to(baseFilename)
-            .withFilenamePolicy(policy)
-            .withWindowedWrites()
-            .withNumShards(2));
+        .apply(
+            AvroIO.write(GenericClass.class)
+                .to(policy)
+                .withTempDirectory(
+                    StaticValueProvider.of(FileSystems.matchNewResource(baseDir.toString(), true)))
+                .withWindowedWrites()
+                .withNumShards(2));
     windowedAvroWritePipeline.run();
 
     // Validate that the data written matches the expected elements in the expected order
@@ -494,13 +503,14 @@ public class AvroIOTest {
       expectedFiles.add(
           new File(
               DefaultFilenamePolicy.constructName(
-                  outputFilePrefix,
-                  shardNameTemplate,
-                  "" /* no suffix */,
-                  i,
-                  numShards,
-                  null,
-                  null)));
+                      FileBasedSink.convertToFileResourceIfPossible(outputFilePrefix),
+                      shardNameTemplate,
+                      "" /* no suffix */,
+                      i,
+                      numShards,
+                      null,
+                      null)
+                  .toString()));
     }
 
     List<String> actualElements = new ArrayList<>();
@@ -572,15 +582,4 @@ public class AvroIOTest {
     assertThat(displayData, hasDisplayItem("numShards", 100));
     assertThat(displayData, hasDisplayItem("codec", CodecFactory.snappyCodec().toString()));
   }
-
-  @Test
-  public void testWindowedWriteRequiresFilenamePolicy() {
-    PCollection<String> emptyInput = p.apply(Create.empty(StringUtf8Coder.of()));
-    AvroIO.Write write = AvroIO.write(String.class).to("/tmp/some/file").withWindowedWrites();
-
-    expectedException.expect(IllegalStateException.class);
-    expectedException.expectMessage(
-        "When using windowed writes, a filename policy must be set via withFilenamePolicy()");
-    emptyInput.apply(write);
-  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/test/java/org/apache/beam/sdk/io/DefaultFilenamePolicyTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/DefaultFilenamePolicyTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/DefaultFilenamePolicyTest.java
index 217420c..9dc6d33 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/DefaultFilenamePolicyTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/DefaultFilenamePolicyTest.java
@@ -17,9 +17,9 @@
  */
 package org.apache.beam.sdk.io;
 
-import static org.apache.beam.sdk.io.DefaultFilenamePolicy.constructName;
 import static org.junit.Assert.assertEquals;
 
+import org.apache.beam.sdk.io.fs.ResourceId;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -30,69 +30,108 @@ import org.junit.runners.JUnit4;
 @RunWith(JUnit4.class)
 public class DefaultFilenamePolicyTest {
 
+  private static String constructName(
+      String baseFilename,
+      String shardTemplate,
+      String suffix,
+      int shardNum,
+      int numShards,
+      String paneStr,
+      String windowStr) {
+    ResourceId constructed =
+        DefaultFilenamePolicy.constructName(
+            FileSystems.matchNewResource(baseFilename, false),
+            shardTemplate,
+            suffix,
+            shardNum,
+            numShards,
+            paneStr,
+            windowStr);
+    return constructed.toString();
+  }
+
   @Test
   public void testConstructName() {
-    assertEquals("output-001-of-123.txt",
-        constructName("output", "-SSS-of-NNN", ".txt", 1, 123, null, null));
+    assertEquals(
+        "/path/to/output-001-of-123.txt",
+        constructName("/path/to/output", "-SSS-of-NNN", ".txt", 1, 123, null, null));
 
-    assertEquals("out.txt/part-00042",
-        constructName("out.txt", "/part-SSSSS", "", 42, 100, null, null));
+    assertEquals(
+        "/path/to/out.txt/part-00042",
+        constructName("/path/to/out.txt", "/part-SSSSS", "", 42, 100, null, null));
 
-    assertEquals("out.txt",
-        constructName("ou", "t.t", "xt", 1, 1, null, null));
+    assertEquals("/path/to/out.txt", constructName("/path/to/ou", "t.t", "xt", 1, 1, null, null));
 
-    assertEquals("out0102shard.txt",
-        constructName("out", "SSNNshard", ".txt", 1, 2, null, null));
+    assertEquals(
+        "/path/to/out0102shard.txt",
+        constructName("/path/to/out", "SSNNshard", ".txt", 1, 2, null, null));
 
-    assertEquals("out-2/1.part-1-of-2.txt",
-        constructName("out", "-N/S.part-S-of-N", ".txt", 1, 2, null, null));
+    assertEquals(
+        "/path/to/out-2/1.part-1-of-2.txt",
+        constructName("/path/to/out", "-N/S.part-S-of-N", ".txt", 1, 2, null, null));
   }
 
   @Test
   public void testConstructNameWithLargeShardCount() {
-    assertEquals("out-100-of-5000.txt",
-        constructName("out", "-SS-of-NN", ".txt", 100, 5000, null, null));
+    assertEquals(
+        "/out-100-of-5000.txt", constructName("/out", "-SS-of-NN", ".txt", 100, 5000, null, null));
   }
 
   @Test
   public void testConstructWindowedName() {
-    assertEquals("output-001-of-123.txt",
-        constructName("output", "-SSS-of-NNN", ".txt", 1, 123, null, null));
-
-    assertEquals("output-001-of-123-PPP-W.txt",
-        constructName("output", "-SSS-of-NNN-PPP-W", ".txt", 1, 123, null, null));
-
-    assertEquals("out.txt/part-00042-myPaneStr-myWindowStr",
-        constructName("out.txt", "/part-SSSSS-P-W", "", 42, 100, "myPaneStr",
-            "myWindowStr"));
-
-    assertEquals("out.txt", constructName("ou", "t.t", "xt", 1, 1, "myPaneStr2",
-        "anotherWindowStr"));
-
-    assertEquals("out0102shard-oneMoreWindowStr-anotherPaneStr.txt",
-        constructName("out", "SSNNshard-W-P", ".txt", 1, 2, "anotherPaneStr",
-            "oneMoreWindowStr"));
-
-    assertEquals("out-2/1.part-1-of-2-slidingWindow1-myPaneStr3-windowslidingWindow1-"
-        + "panemyPaneStr3.txt",
-        constructName("out", "-N/S.part-S-of-N-W-P-windowW-paneP", ".txt", 1, 2, "myPaneStr3",
-        "slidingWindow1"));
+    assertEquals(
+        "/path/to/output-001-of-123.txt",
+        constructName("/path/to/output", "-SSS-of-NNN", ".txt", 1, 123, null, null));
+
+    assertEquals(
+        "/path/to/output-001-of-123-PPP-W.txt",
+        constructName("/path/to/output", "-SSS-of-NNN-PPP-W", ".txt", 1, 123, null, null));
+
+    assertEquals(
+        "/path/to/out" + ".txt/part-00042-myPaneStr-myWindowStr",
+        constructName(
+            "/path/to/out.txt", "/part-SSSSS-P-W", "", 42, 100, "myPaneStr", "myWindowStr"));
+
+    assertEquals(
+        "/path/to/out.txt",
+        constructName("/path/to/ou", "t.t", "xt", 1, 1, "myPaneStr2", "anotherWindowStr"));
+
+    assertEquals(
+        "/path/to/out0102shard-oneMoreWindowStr-anotherPaneStr.txt",
+        constructName(
+            "/path/to/out", "SSNNshard-W-P", ".txt", 1, 2, "anotherPaneStr", "oneMoreWindowStr"));
+
+    assertEquals(
+        "/out-2/1.part-1-of-2-slidingWindow1-myPaneStr3-windowslidingWindow1-"
+            + "panemyPaneStr3.txt",
+        constructName(
+            "/out",
+            "-N/S.part-S-of-N-W-P-windowW-paneP",
+            ".txt",
+            1,
+            2,
+            "myPaneStr3",
+            "slidingWindow1"));
 
     // test first/last pane
-    assertEquals("out.txt/part-00042-myWindowStr-pane-11-true-false",
-        constructName("out.txt", "/part-SSSSS-W-P", "", 42, 100, "pane-11-true-false",
-            "myWindowStr"));
-
-    assertEquals("out.txt", constructName("ou", "t.t", "xt", 1, 1, "pane",
-        "anotherWindowStr"));
-
-    assertEquals("out0102shard-oneMoreWindowStr-pane--1-false-false-pane--1-false-false.txt",
-        constructName("out", "SSNNshard-W-P-P", ".txt", 1, 2, "pane--1-false-false",
-            "oneMoreWindowStr"));
-
-    assertEquals("out-2/1.part-1-of-2-sWindow1-winsWindow1-ppaneL.txt",
-        constructName("out",
-        "-N/S.part-S-of-N-W-winW-pP", ".txt", 1, 2, "paneL", "sWindow1"));
+    assertEquals(
+        "/out.txt/part-00042-myWindowStr-pane-11-true-false",
+        constructName(
+            "/out.txt", "/part-SSSSS-W-P", "", 42, 100, "pane-11-true-false", "myWindowStr"));
+
+    assertEquals(
+        "/path/to/out.txt",
+        constructName("/path/to/ou", "t.t", "xt", 1, 1, "pane", "anotherWindowStr"));
+
+    assertEquals(
+        "/out0102shard-oneMoreWindowStr-pane--1-false-false-pane--1-false-false.txt",
+        constructName(
+            "/out", "SSNNshard-W-P-P", ".txt", 1, 2, "pane--1-false-false", "oneMoreWindowStr"));
+
+    assertEquals(
+        "/path/to/out-2/1.part-1-of-2-sWindow1-winsWindow1-ppaneL.txt",
+        constructName(
+            "/path/to/out", "-N/S.part-S-of-N-W-winW-pP", ".txt", 1, 2, "paneL", "sWindow1"));
   }
 
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/test/java/org/apache/beam/sdk/io/DrunkWritableByteChannelFactory.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/DrunkWritableByteChannelFactory.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/DrunkWritableByteChannelFactory.java
index 6615a2e..a7644b6 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/DrunkWritableByteChannelFactory.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/DrunkWritableByteChannelFactory.java
@@ -39,7 +39,7 @@ public class DrunkWritableByteChannelFactory implements WritableByteChannelFacto
   }
 
   @Override
-  public String getFilenameSuffix() {
+  public String getSuggestedFilenameSuffix() {
     return ".drunk";
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileBasedSinkTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileBasedSinkTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileBasedSinkTest.java
index caad759..755bb59 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileBasedSinkTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/FileBasedSinkTest.java
@@ -103,7 +103,7 @@ public class FileBasedSinkTest {
 
     SimpleSink.SimpleWriter writer =
         buildWriteOperationWithTempDir(getBaseTempDirectory()).createWriter();
-    writer.openUnwindowed(testUid, -1);
+    writer.openUnwindowed(testUid, -1, null);
     for (String value : values) {
       writer.write(value);
     }
@@ -198,23 +198,27 @@ public class FileBasedSinkTest {
       throws Exception {
     int numFiles = temporaryFiles.size();
 
-    List<FileResult> fileResults = new ArrayList<>();
+    List<FileResult<Void>> fileResults = new ArrayList<>();
     // Create temporary output bundles and output File objects.
     for (int i = 0; i < numFiles; i++) {
       fileResults.add(
-          new FileResult(
+          new FileResult<Void>(
               LocalResources.fromFile(temporaryFiles.get(i), false),
               WriteFiles.UNKNOWN_SHARDNUM,
               null,
+              null,
               null));
     }
 
     writeOp.finalize(fileResults);
 
-    ResourceId outputDirectory = writeOp.getSink().getBaseOutputDirectoryProvider().get();
     for (int i = 0; i < numFiles; i++) {
-      ResourceId outputFilename = writeOp.getSink().getFilenamePolicy()
-          .unwindowedFilename(outputDirectory, new Context(i, numFiles), "");
+      ResourceId outputFilename =
+          writeOp
+              .getSink()
+              .getDynamicDestinations()
+              .getFilenamePolicy(null)
+              .unwindowedFilename(new Context(i, numFiles), CompressionType.UNCOMPRESSED);
       assertTrue(new File(outputFilename.toString()).exists());
       assertFalse(temporaryFiles.get(i).exists());
     }
@@ -231,11 +235,12 @@ public class FileBasedSinkTest {
   private void testRemoveTemporaryFiles(int numFiles, ResourceId tempDirectory)
       throws Exception {
     String prefix = "file";
-    SimpleSink sink =
-        new SimpleSink(getBaseOutputDirectory(), prefix, "", "");
+    SimpleSink<Void> sink =
+        SimpleSink.makeSimpleSink(
+            getBaseOutputDirectory(), prefix, "", "", CompressionType.UNCOMPRESSED);
 
-    WriteOperation<String> writeOp =
-        new SimpleSink.SimpleWriteOperation(sink, tempDirectory);
+    WriteOperation<String, Void> writeOp =
+        new SimpleSink.SimpleWriteOperation<>(sink, tempDirectory);
 
     List<File> temporaryFiles = new ArrayList<>();
     List<File> outputFiles = new ArrayList<>();
@@ -272,8 +277,6 @@ public class FileBasedSinkTest {
   @Test
   public void testCopyToOutputFiles() throws Exception {
     SimpleSink.SimpleWriteOperation writeOp = buildWriteOperation();
-    ResourceId outputDirectory = writeOp.getSink().getBaseOutputDirectoryProvider().get();
-
     List<String> inputFilenames = Arrays.asList("input-1", "input-2", "input-3");
     List<String> inputContents = Arrays.asList("1", "2", "3");
     List<String> expectedOutputFilenames = Arrays.asList(
@@ -292,9 +295,14 @@ public class FileBasedSinkTest {
       File inputTmpFile = tmpFolder.newFile(inputFilenames.get(i));
       List<String> lines = Collections.singletonList(inputContents.get(i));
       writeFile(lines, inputTmpFile);
-      inputFilePaths.put(LocalResources.fromFile(inputTmpFile, false),
-          writeOp.getSink().getFilenamePolicy()
-              .unwindowedFilename(outputDirectory, new Context(i, inputFilenames.size()), ""));
+      inputFilePaths.put(
+          LocalResources.fromFile(inputTmpFile, false),
+          writeOp
+              .getSink()
+              .getDynamicDestinations()
+              .getFilenamePolicy(null)
+              .unwindowedFilename(
+                  new Context(i, inputFilenames.size()), CompressionType.UNCOMPRESSED));
     }
 
     // Copy input files to output files.
@@ -311,7 +319,8 @@ public class FileBasedSinkTest {
       ResourceId outputDirectory, FilenamePolicy policy, int numFiles) {
     List<ResourceId> filenames = new ArrayList<>();
     for (int i = 0; i < numFiles; i++) {
-      filenames.add(policy.unwindowedFilename(outputDirectory, new Context(i, numFiles), ""));
+      filenames.add(
+          policy.unwindowedFilename(new Context(i, numFiles), CompressionType.UNCOMPRESSED));
     }
     return filenames;
   }
@@ -326,8 +335,10 @@ public class FileBasedSinkTest {
     List<ResourceId> actual;
     ResourceId root = getBaseOutputDirectory();
 
-    SimpleSink sink = new SimpleSink(root, "file", ".SSSSS.of.NNNNN", ".test");
-    FilenamePolicy policy = sink.getFilenamePolicy();
+    SimpleSink<Void> sink =
+        SimpleSink.makeSimpleSink(
+            root, "file", ".SSSSS.of.NNNNN", ".test", CompressionType.UNCOMPRESSED);
+    FilenamePolicy policy = sink.getDynamicDestinations().getFilenamePolicy(null);
 
     expected = Arrays.asList(
         root.resolve("file.00000.of.00003.test", StandardResolveOptions.RESOLVE_FILE),
@@ -352,8 +363,9 @@ public class FileBasedSinkTest {
   @Test
   public void testCollidingOutputFilenames() throws IOException {
     ResourceId root = getBaseOutputDirectory();
-    SimpleSink sink = new SimpleSink(root, "file", "-NN", "test");
-    SimpleSink.SimpleWriteOperation writeOp = new SimpleSink.SimpleWriteOperation(sink);
+    SimpleSink<Void> sink =
+        SimpleSink.makeSimpleSink(root, "file", "-NN", "test", CompressionType.UNCOMPRESSED);
+    SimpleSink.SimpleWriteOperation<Void> writeOp = new SimpleSink.SimpleWriteOperation<>(sink);
 
     ResourceId temp1 = root.resolve("temp1", StandardResolveOptions.RESOLVE_FILE);
     ResourceId temp2 = root.resolve("temp2", StandardResolveOptions.RESOLVE_FILE);
@@ -361,11 +373,11 @@ public class FileBasedSinkTest {
     ResourceId output = root.resolve("file-03.test", StandardResolveOptions.RESOLVE_FILE);
     // More than one shard does.
     try {
-      Iterable<FileResult> results =
+      Iterable<FileResult<Void>> results =
           Lists.newArrayList(
-              new FileResult(temp1, 1, null, null),
-              new FileResult(temp2, 1, null, null),
-              new FileResult(temp3, 1, null, null));
+              new FileResult<Void>(temp1, 1, null, null, null),
+              new FileResult<Void>(temp2, 1, null, null, null),
+              new FileResult<Void>(temp3, 1, null, null, null));
       writeOp.buildOutputFilenames(results);
       fail("Should have failed.");
     } catch (IllegalStateException exn) {
@@ -379,8 +391,10 @@ public class FileBasedSinkTest {
     List<ResourceId> expected;
     List<ResourceId> actual;
     ResourceId root = getBaseOutputDirectory();
-    SimpleSink sink = new SimpleSink(root, "file", "-SSSSS-of-NNNNN", "");
-    FilenamePolicy policy = sink.getFilenamePolicy();
+    SimpleSink<Void> sink =
+        SimpleSink.makeSimpleSink(
+            root, "file", "-SSSSS-of-NNNNN", "", CompressionType.UNCOMPRESSED);
+    FilenamePolicy policy = sink.getDynamicDestinations().getFilenamePolicy(null);
 
     expected = Arrays.asList(
         root.resolve("file-00000-of-00003", StandardResolveOptions.RESOLVE_FILE),
@@ -486,10 +500,11 @@ public class FileBasedSinkTest {
   public void testFileBasedWriterWithWritableByteChannelFactory() throws Exception {
     final String testUid = "testId";
     ResourceId root = getBaseOutputDirectory();
-    WriteOperation<String> writeOp =
-        new SimpleSink(root, "file", "-SS-of-NN", "txt", new DrunkWritableByteChannelFactory())
+    WriteOperation<String, Void> writeOp =
+        SimpleSink.makeSimpleSink(
+                root, "file", "-SS-of-NN", "txt", new DrunkWritableByteChannelFactory())
             .createWriteOperation();
-    final Writer<String> writer = writeOp.createWriter();
+    final Writer<String, Void> writer = writeOp.createWriter();
     final ResourceId expectedFile =
         writeOp.tempDirectory.get().resolve(testUid, StandardResolveOptions.RESOLVE_FILE);
 
@@ -503,7 +518,7 @@ public class FileBasedSinkTest {
     expected.add("footer");
     expected.add("footer");
 
-    writer.openUnwindowed(testUid, -1);
+    writer.openUnwindowed(testUid, -1, null);
     writer.write("a");
     writer.write("b");
     final FileResult result = writer.close();
@@ -513,20 +528,20 @@ public class FileBasedSinkTest {
   }
 
   /** Build a SimpleSink with default options. */
-  private SimpleSink buildSink() {
-    return new SimpleSink(getBaseOutputDirectory(), "file", "-SS-of-NN", ".test");
+  private SimpleSink<Void> buildSink() {
+    return SimpleSink.makeSimpleSink(
+        getBaseOutputDirectory(), "file", "-SS-of-NN", ".test", CompressionType.UNCOMPRESSED);
   }
 
-  /**
-   * Build a SimpleWriteOperation with default options and the given temporary directory.
-   */
-  private SimpleSink.SimpleWriteOperation buildWriteOperationWithTempDir(ResourceId tempDirectory) {
-    SimpleSink sink = buildSink();
-    return new SimpleSink.SimpleWriteOperation(sink, tempDirectory);
+  /** Build a SimpleWriteOperation with default options and the given temporary directory. */
+  private SimpleSink.SimpleWriteOperation<Void> buildWriteOperationWithTempDir(
+      ResourceId tempDirectory) {
+    SimpleSink<Void> sink = buildSink();
+    return new SimpleSink.SimpleWriteOperation<>(sink, tempDirectory);
   }
 
   /** Build a write operation with the default options for it and its parent sink. */
-  private SimpleSink.SimpleWriteOperation buildWriteOperation() {
+  private SimpleSink.SimpleWriteOperation<Void> buildWriteOperation() {
     return buildSink().createWriteOperation();
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/test/java/org/apache/beam/sdk/io/SimpleSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/SimpleSink.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/SimpleSink.java
index bdf37f6..9196178 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/SimpleSink.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/SimpleSink.java
@@ -19,37 +19,55 @@ package org.apache.beam.sdk.io;
 
 import java.nio.ByteBuffer;
 import java.nio.channels.WritableByteChannel;
+import org.apache.beam.sdk.io.DefaultFilenamePolicy.Params;
+import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
 import org.apache.beam.sdk.io.fs.ResourceId;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.util.MimeTypes;
 
 /**
- * A simple {@link FileBasedSink} that writes {@link String} values as lines with
- * header and footer.
+ * A simple {@link FileBasedSink} that writes {@link String} values as lines with header and footer.
  */
-class SimpleSink extends FileBasedSink<String> {
-  public SimpleSink(ResourceId baseOutputDirectory, String prefix, String template, String suffix) {
-    this(baseOutputDirectory, prefix, template, suffix, CompressionType.UNCOMPRESSED);
+class SimpleSink<DestinationT> extends FileBasedSink<String, DestinationT> {
+  public SimpleSink(
+      ResourceId tempDirectory,
+      DynamicDestinations<String, DestinationT> dynamicDestinations,
+      WritableByteChannelFactory writableByteChannelFactory) {
+    super(StaticValueProvider.of(tempDirectory), dynamicDestinations, writableByteChannelFactory);
   }
 
-  public SimpleSink(ResourceId baseOutputDirectory, String prefix, String template, String suffix,
-                    WritableByteChannelFactory writableByteChannelFactory) {
-    super(
-        StaticValueProvider.of(baseOutputDirectory),
-        new DefaultFilenamePolicy(StaticValueProvider.of(prefix), template, suffix),
-        writableByteChannelFactory);
+  public static SimpleSink<Void> makeSimpleSink(
+      ResourceId tempDirectory, FilenamePolicy filenamePolicy) {
+    return new SimpleSink<>(
+        tempDirectory,
+        DynamicFileDestinations.<String>constant(filenamePolicy),
+        CompressionType.UNCOMPRESSED);
   }
 
-  public SimpleSink(ResourceId baseOutputDirectory, FilenamePolicy filenamePolicy) {
-    super(StaticValueProvider.of(baseOutputDirectory), filenamePolicy);
+  public static SimpleSink<Void> makeSimpleSink(
+      ResourceId baseDirectory,
+      String prefix,
+      String shardTemplate,
+      String suffix,
+      WritableByteChannelFactory writableByteChannelFactory) {
+    DynamicDestinations<String, Void> dynamicDestinations =
+        DynamicFileDestinations.constant(
+            DefaultFilenamePolicy.fromParams(
+                new Params()
+                    .withBaseFilename(
+                        baseDirectory.resolve(prefix, StandardResolveOptions.RESOLVE_FILE))
+                    .withShardTemplate(shardTemplate)
+                    .withSuffix(suffix)));
+    return new SimpleSink<>(baseDirectory, dynamicDestinations, writableByteChannelFactory);
   }
 
   @Override
-  public SimpleWriteOperation createWriteOperation() {
-    return new SimpleWriteOperation(this);
+  public SimpleWriteOperation<DestinationT> createWriteOperation() {
+    return new SimpleWriteOperation<>(this);
   }
 
-  static final class SimpleWriteOperation extends WriteOperation<String> {
+  static final class SimpleWriteOperation<DestinationT>
+      extends WriteOperation<String, DestinationT> {
     public SimpleWriteOperation(SimpleSink sink, ResourceId tempOutputDirectory) {
       super(sink, tempOutputDirectory);
     }
@@ -59,12 +77,12 @@ class SimpleSink extends FileBasedSink<String> {
     }
 
     @Override
-    public SimpleWriter createWriter() throws Exception {
-      return new SimpleWriter(this);
+    public SimpleWriter<DestinationT> createWriter() throws Exception {
+      return new SimpleWriter<>(this);
     }
   }
 
-  static final class SimpleWriter extends Writer<String> {
+  static final class SimpleWriter<DestinationT> extends Writer<String, DestinationT> {
     static final String HEADER = "header";
     static final String FOOTER = "footer";
 


[21/50] [abbrv] beam git commit: Set the type of batch jobs to FNAPI_BATCH when beam_fn_api experiment is specified.

Posted by ta...@apache.org.
Set the type of batch jobs to FNAPI_BATCH when beam_fn_api experiment is specified.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b1313ffe
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b1313ffe
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b1313ffe

Branch: refs/heads/DSL_SQL
Commit: b1313ffef5bf8a2dd17ee20b6dd77f62d4174659
Parents: 78a39bd
Author: Valentyn Tymofieiev <va...@google.com>
Authored: Fri Jul 7 15:14:56 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 .../runners/dataflow/dataflow_runner.py         | 16 ++---------
 .../runners/dataflow/internal/apiclient.py      | 29 ++++++++++++++++++--
 .../runners/dataflow/internal/apiclient_test.py |  5 +---
 3 files changed, 29 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/b1313ffe/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
index 57bcc5e..059e139 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
@@ -46,8 +46,8 @@ from apache_beam.runners.runner import PipelineRunner
 from apache_beam.runners.runner import PipelineState
 from apache_beam.transforms.display import DisplayData
 from apache_beam.typehints import typehints
-from apache_beam.options.pipeline_options import StandardOptions
 from apache_beam.options.pipeline_options import SetupOptions
+from apache_beam.options.pipeline_options import StandardOptions
 from apache_beam.options.pipeline_options import TestOptions
 from apache_beam.utils.plugin import BeamPlugin
 
@@ -65,12 +65,6 @@ class DataflowRunner(PipelineRunner):
   if blocking is set to False.
   """
 
-  # Environment version information. It is passed to the service during a
-  # a job submission and is used by the service to establish what features
-  # are expected by the workers.
-  BATCH_ENVIRONMENT_MAJOR_VERSION = '6'
-  STREAMING_ENVIRONMENT_MAJOR_VERSION = '1'
-
   # A list of PTransformOverride objects to be applied before running a pipeline
   # using DataflowRunner.
   # Currently this only works for overrides where the input and output types do
@@ -268,15 +262,9 @@ class DataflowRunner(PipelineRunner):
     if test_options.dry_run:
       return None
 
-    standard_options = pipeline._options.view_as(StandardOptions)
-    if standard_options.streaming:
-      job_version = DataflowRunner.STREAMING_ENVIRONMENT_MAJOR_VERSION
-    else:
-      job_version = DataflowRunner.BATCH_ENVIRONMENT_MAJOR_VERSION
-
     # Get a Dataflow API client and set its options
     self.dataflow_client = apiclient.DataflowApplicationClient(
-        pipeline._options, job_version)
+        pipeline._options)
 
     # Create the job
     result = DataflowPipelineResult(

http://git-wip-us.apache.org/repos/asf/beam/blob/b1313ffe/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
index edac9d7..33dfe19 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
@@ -49,6 +49,13 @@ from apache_beam.options.pipeline_options import StandardOptions
 from apache_beam.options.pipeline_options import WorkerOptions
 
 
+# Environment version information. It is passed to the service during a
+# a job submission and is used by the service to establish what features
+# are expected by the workers.
+_LEGACY_ENVIRONMENT_MAJOR_VERSION = '6'
+_FNAPI_ENVIRONMENT_MAJOR_VERSION = '1'
+
+
 class Step(object):
   """Wrapper for a dataflow Step protobuf."""
 
@@ -146,7 +153,10 @@ class Environment(object):
     if self.standard_options.streaming:
       job_type = 'FNAPI_STREAMING'
     else:
-      job_type = 'PYTHON_BATCH'
+      if _use_fnapi(options):
+        job_type = 'FNAPI_BATCH'
+      else:
+        job_type = 'PYTHON_BATCH'
     self.proto.version.additionalProperties.extend([
         dataflow.Environment.VersionValue.AdditionalProperty(
             key='job_type',
@@ -360,11 +370,16 @@ class Job(object):
 class DataflowApplicationClient(object):
   """A Dataflow API client used by application code to create and query jobs."""
 
-  def __init__(self, options, environment_version):
+  def __init__(self, options):
     """Initializes a Dataflow API client object."""
     self.standard_options = options.view_as(StandardOptions)
     self.google_cloud_options = options.view_as(GoogleCloudOptions)
-    self.environment_version = environment_version
+
+    if _use_fnapi(options):
+      self.environment_version = _FNAPI_ENVIRONMENT_MAJOR_VERSION
+    else:
+      self.environment_version = _LEGACY_ENVIRONMENT_MAJOR_VERSION
+
     if self.google_cloud_options.no_auth:
       credentials = None
     else:
@@ -706,6 +721,14 @@ def translate_mean(accumulator, metric_update):
     metric_update.kind = None
 
 
+def _use_fnapi(pipeline_options):
+  standard_options = pipeline_options.view_as(StandardOptions)
+  debug_options = pipeline_options.view_as(DebugOptions)
+
+  return standard_options.streaming or (
+      debug_options.experiments and 'beam_fn_api' in debug_options.experiments)
+
+
 # To enable a counter on the service, add it to this dictionary.
 metric_translations = {
     cy_combiners.CountCombineFn: ('sum', translate_scalar),

http://git-wip-us.apache.org/repos/asf/beam/blob/b1313ffe/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
index 55211f7..407ffcf 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient_test.py
@@ -22,7 +22,6 @@ from mock import Mock
 from apache_beam.metrics.cells import DistributionData
 from apache_beam.options.pipeline_options import PipelineOptions
 
-from apache_beam.runners.dataflow.dataflow_runner import DataflowRunner
 from apache_beam.runners.dataflow.internal.clients import dataflow
 
 # Protect against environments where apitools library is not available.
@@ -40,9 +39,7 @@ class UtilTest(unittest.TestCase):
   @unittest.skip("Enable once BEAM-1080 is fixed.")
   def test_create_application_client(self):
     pipeline_options = PipelineOptions()
-    apiclient.DataflowApplicationClient(
-        pipeline_options,
-        DataflowRunner.BATCH_ENVIRONMENT_MAJOR_VERSION)
+    apiclient.DataflowApplicationClient(pipeline_options)
 
   def test_set_network(self):
     pipeline_options = PipelineOptions(


[19/50] [abbrv] beam git commit: [BEAM-1347] Rename DoFnRunnerFactory to FnApiDoFnRunner.

Posted by ta...@apache.org.
[BEAM-1347] Rename DoFnRunnerFactory to FnApiDoFnRunner.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/513ccdf1
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/513ccdf1
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/513ccdf1

Branch: refs/heads/DSL_SQL
Commit: 513ccdf1bf7ed96ff43a7e1476e3202dd7eea93d
Parents: f897467
Author: Luke Cwik <lc...@google.com>
Authored: Fri Jun 23 14:31:58 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 .../beam/runners/core/DoFnRunnerFactory.java    | 182 ----------------
 .../beam/runners/core/FnApiDoFnRunner.java      | 182 ++++++++++++++++
 .../runners/core/DoFnRunnerFactoryTest.java     | 209 -------------------
 .../beam/runners/core/FnApiDoFnRunnerTest.java  | 209 +++++++++++++++++++
 4 files changed, 391 insertions(+), 391 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/513ccdf1/sdks/java/harness/src/main/java/org/apache/beam/runners/core/DoFnRunnerFactory.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/DoFnRunnerFactory.java b/sdks/java/harness/src/main/java/org/apache/beam/runners/core/DoFnRunnerFactory.java
deleted file mode 100644
index 3c0b6eb..0000000
--- a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/DoFnRunnerFactory.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.core;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.auto.service.AutoService;
-import com.google.common.collect.Collections2;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.ImmutableMultimap;
-import com.google.common.collect.Multimap;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.BytesValue;
-import com.google.protobuf.InvalidProtocolBufferException;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Map;
-import java.util.Objects;
-import java.util.function.Consumer;
-import java.util.function.Supplier;
-import org.apache.beam.fn.harness.data.BeamFnDataClient;
-import org.apache.beam.fn.harness.fake.FakeStepContext;
-import org.apache.beam.fn.harness.fn.ThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.runners.core.DoFnRunners.OutputManager;
-import org.apache.beam.runners.dataflow.util.DoFnInfo;
-import org.apache.beam.sdk.common.runner.v1.RunnerApi;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.util.SerializableUtils;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.sdk.values.WindowingStrategy;
-
-/**
- * Classes associated with converting {@link RunnerApi.PTransform}s to {@link DoFnRunner}s.
- *
- * <p>TODO: Move DoFnRunners into SDK harness and merge the methods below into it removing this
- * class.
- */
-public class DoFnRunnerFactory {
-
-  private static final String URN = "urn:org.apache.beam:dofn:java:0.1";
-
-  /** A registrar which provides a factory to handle Java {@link DoFn}s. */
-  @AutoService(PTransformRunnerFactory.Registrar.class)
-  public static class Registrar implements
-      PTransformRunnerFactory.Registrar {
-
-    @Override
-    public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
-      return ImmutableMap.of(URN, new Factory());
-    }
-  }
-
-  /** A factory for {@link DoFnRunner}s. */
-  static class Factory<InputT, OutputT>
-      implements PTransformRunnerFactory<DoFnRunner<InputT, OutputT>> {
-
-    @Override
-    public DoFnRunner<InputT, OutputT> createRunnerForPTransform(
-        PipelineOptions pipelineOptions,
-        BeamFnDataClient beamFnDataClient,
-        String pTransformId,
-        RunnerApi.PTransform pTransform,
-        Supplier<String> processBundleInstructionId,
-        Map<String, RunnerApi.PCollection> pCollections,
-        Map<String, RunnerApi.Coder> coders,
-        Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
-        Consumer<ThrowingRunnable> addStartFunction,
-        Consumer<ThrowingRunnable> addFinishFunction) {
-
-      // For every output PCollection, create a map from output name to Consumer
-      ImmutableMap.Builder<String, Collection<ThrowingConsumer<WindowedValue<?>>>>
-          outputMapBuilder = ImmutableMap.builder();
-      for (Map.Entry<String, String> entry : pTransform.getOutputsMap().entrySet()) {
-        outputMapBuilder.put(
-            entry.getKey(),
-            pCollectionIdsToConsumers.get(entry.getValue()));
-      }
-      ImmutableMap<String, Collection<ThrowingConsumer<WindowedValue<?>>>> outputMap =
-          outputMapBuilder.build();
-
-      // Get the DoFnInfo from the serialized blob.
-      ByteString serializedFn;
-      try {
-        serializedFn = pTransform.getSpec().getParameter().unpack(BytesValue.class).getValue();
-      } catch (InvalidProtocolBufferException e) {
-        throw new IllegalArgumentException(
-            String.format("Unable to unwrap DoFn %s", pTransform.getSpec()), e);
-      }
-      DoFnInfo<?, ?> doFnInfo =
-          (DoFnInfo<?, ?>)
-              SerializableUtils.deserializeFromByteArray(serializedFn.toByteArray(), "DoFnInfo");
-
-      // Verify that the DoFnInfo tag to output map matches the output map on the PTransform.
-      checkArgument(
-          Objects.equals(
-              new HashSet<>(Collections2.transform(outputMap.keySet(), Long::parseLong)),
-              doFnInfo.getOutputMap().keySet()),
-          "Unexpected mismatch between transform output map %s and DoFnInfo output map %s.",
-          outputMap.keySet(),
-          doFnInfo.getOutputMap());
-
-      ImmutableMultimap.Builder<TupleTag<?>,
-          ThrowingConsumer<WindowedValue<OutputT>>> tagToOutput =
-          ImmutableMultimap.builder();
-      for (Map.Entry<Long, TupleTag<?>> entry : doFnInfo.getOutputMap().entrySet()) {
-        @SuppressWarnings({"unchecked", "rawtypes"})
-        Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers =
-            (Collection) outputMap.get(Long.toString(entry.getKey()));
-        tagToOutput.putAll(entry.getValue(), consumers);
-      }
-
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tagBasedOutputMap =
-          (Map) tagToOutput.build().asMap();
-
-      OutputManager outputManager =
-          new OutputManager() {
-            Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tupleTagToOutput =
-                tagBasedOutputMap;
-
-            @Override
-            public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
-              try {
-                Collection<ThrowingConsumer<WindowedValue<?>>> consumers =
-                    tupleTagToOutput.get(tag);
-                if (consumers == null) {
-                    /* This is a normal case, e.g., if a DoFn has output but that output is not
-                     * consumed. Drop the output. */
-                  return;
-                }
-                for (ThrowingConsumer<WindowedValue<?>> consumer : consumers) {
-                  consumer.accept(output);
-                }
-              } catch (Throwable t) {
-                throw new RuntimeException(t);
-              }
-            }
-          };
-
-      @SuppressWarnings({"unchecked", "rawtypes", "deprecation"})
-      DoFnRunner<InputT, OutputT> runner =
-          DoFnRunners.simpleRunner(
-              pipelineOptions,
-              (DoFn) doFnInfo.getDoFn(),
-              NullSideInputReader.empty(), /* TODO */
-              outputManager,
-              (TupleTag) doFnInfo.getOutputMap().get(doFnInfo.getMainOutput()),
-              new ArrayList<>(doFnInfo.getOutputMap().values()),
-              new FakeStepContext(),
-              (WindowingStrategy) doFnInfo.getWindowingStrategy());
-
-      // Register the appropriate handlers.
-      addStartFunction.accept(runner::startBundle);
-      for (String pcollectionId : pTransform.getInputsMap().values()) {
-        pCollectionIdsToConsumers.put(
-            pcollectionId,
-            (ThrowingConsumer) (ThrowingConsumer<WindowedValue<InputT>>) runner::processElement);
-      }
-      addFinishFunction.accept(runner::finishBundle);
-      return runner;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/513ccdf1/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java
new file mode 100644
index 0000000..adf735a
--- /dev/null
+++ b/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.core;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.auto.service.AutoService;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableMultimap;
+import com.google.common.collect.Multimap;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.BytesValue;
+import com.google.protobuf.InvalidProtocolBufferException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Objects;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+import org.apache.beam.fn.harness.data.BeamFnDataClient;
+import org.apache.beam.fn.harness.fake.FakeStepContext;
+import org.apache.beam.fn.harness.fn.ThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingRunnable;
+import org.apache.beam.runners.core.DoFnRunners.OutputManager;
+import org.apache.beam.runners.dataflow.util.DoFnInfo;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.util.SerializableUtils;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.WindowingStrategy;
+
+/**
+ * Classes associated with converting {@link RunnerApi.PTransform}s to {@link DoFnRunner}s.
+ *
+ * <p>TODO: Move DoFnRunners into SDK harness and merge the methods below into it removing this
+ * class.
+ */
+public class FnApiDoFnRunner {
+
+  private static final String URN = "urn:org.apache.beam:dofn:java:0.1";
+
+  /** A registrar which provides a factory to handle Java {@link DoFn}s. */
+  @AutoService(PTransformRunnerFactory.Registrar.class)
+  public static class Registrar implements
+      PTransformRunnerFactory.Registrar {
+
+    @Override
+    public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
+      return ImmutableMap.of(URN, new Factory());
+    }
+  }
+
+  /** A factory for {@link DoFnRunner}s. */
+  static class Factory<InputT, OutputT>
+      implements PTransformRunnerFactory<DoFnRunner<InputT, OutputT>> {
+
+    @Override
+    public DoFnRunner<InputT, OutputT> createRunnerForPTransform(
+        PipelineOptions pipelineOptions,
+        BeamFnDataClient beamFnDataClient,
+        String pTransformId,
+        RunnerApi.PTransform pTransform,
+        Supplier<String> processBundleInstructionId,
+        Map<String, RunnerApi.PCollection> pCollections,
+        Map<String, RunnerApi.Coder> coders,
+        Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
+        Consumer<ThrowingRunnable> addStartFunction,
+        Consumer<ThrowingRunnable> addFinishFunction) {
+
+      // For every output PCollection, create a map from output name to Consumer
+      ImmutableMap.Builder<String, Collection<ThrowingConsumer<WindowedValue<?>>>>
+          outputMapBuilder = ImmutableMap.builder();
+      for (Map.Entry<String, String> entry : pTransform.getOutputsMap().entrySet()) {
+        outputMapBuilder.put(
+            entry.getKey(),
+            pCollectionIdsToConsumers.get(entry.getValue()));
+      }
+      ImmutableMap<String, Collection<ThrowingConsumer<WindowedValue<?>>>> outputMap =
+          outputMapBuilder.build();
+
+      // Get the DoFnInfo from the serialized blob.
+      ByteString serializedFn;
+      try {
+        serializedFn = pTransform.getSpec().getParameter().unpack(BytesValue.class).getValue();
+      } catch (InvalidProtocolBufferException e) {
+        throw new IllegalArgumentException(
+            String.format("Unable to unwrap DoFn %s", pTransform.getSpec()), e);
+      }
+      DoFnInfo<?, ?> doFnInfo =
+          (DoFnInfo<?, ?>)
+              SerializableUtils.deserializeFromByteArray(serializedFn.toByteArray(), "DoFnInfo");
+
+      // Verify that the DoFnInfo tag to output map matches the output map on the PTransform.
+      checkArgument(
+          Objects.equals(
+              new HashSet<>(Collections2.transform(outputMap.keySet(), Long::parseLong)),
+              doFnInfo.getOutputMap().keySet()),
+          "Unexpected mismatch between transform output map %s and DoFnInfo output map %s.",
+          outputMap.keySet(),
+          doFnInfo.getOutputMap());
+
+      ImmutableMultimap.Builder<TupleTag<?>,
+          ThrowingConsumer<WindowedValue<OutputT>>> tagToOutput =
+          ImmutableMultimap.builder();
+      for (Map.Entry<Long, TupleTag<?>> entry : doFnInfo.getOutputMap().entrySet()) {
+        @SuppressWarnings({"unchecked", "rawtypes"})
+        Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers =
+            (Collection) outputMap.get(Long.toString(entry.getKey()));
+        tagToOutput.putAll(entry.getValue(), consumers);
+      }
+
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tagBasedOutputMap =
+          (Map) tagToOutput.build().asMap();
+
+      OutputManager outputManager =
+          new OutputManager() {
+            Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tupleTagToOutput =
+                tagBasedOutputMap;
+
+            @Override
+            public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
+              try {
+                Collection<ThrowingConsumer<WindowedValue<?>>> consumers =
+                    tupleTagToOutput.get(tag);
+                if (consumers == null) {
+                    /* This is a normal case, e.g., if a DoFn has output but that output is not
+                     * consumed. Drop the output. */
+                  return;
+                }
+                for (ThrowingConsumer<WindowedValue<?>> consumer : consumers) {
+                  consumer.accept(output);
+                }
+              } catch (Throwable t) {
+                throw new RuntimeException(t);
+              }
+            }
+          };
+
+      @SuppressWarnings({"unchecked", "rawtypes", "deprecation"})
+      DoFnRunner<InputT, OutputT> runner =
+          DoFnRunners.simpleRunner(
+              pipelineOptions,
+              (DoFn) doFnInfo.getDoFn(),
+              NullSideInputReader.empty(), /* TODO */
+              outputManager,
+              (TupleTag) doFnInfo.getOutputMap().get(doFnInfo.getMainOutput()),
+              new ArrayList<>(doFnInfo.getOutputMap().values()),
+              new FakeStepContext(),
+              (WindowingStrategy) doFnInfo.getWindowingStrategy());
+
+      // Register the appropriate handlers.
+      addStartFunction.accept(runner::startBundle);
+      for (String pcollectionId : pTransform.getInputsMap().values()) {
+        pCollectionIdsToConsumers.put(
+            pcollectionId,
+            (ThrowingConsumer) (ThrowingConsumer<WindowedValue<InputT>>) runner::processElement);
+      }
+      addFinishFunction.accept(runner::finishBundle);
+      return runner;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/513ccdf1/sdks/java/harness/src/test/java/org/apache/beam/runners/core/DoFnRunnerFactoryTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/DoFnRunnerFactoryTest.java b/sdks/java/harness/src/test/java/org/apache/beam/runners/core/DoFnRunnerFactoryTest.java
deleted file mode 100644
index 62646ff..0000000
--- a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/DoFnRunnerFactoryTest.java
+++ /dev/null
@@ -1,209 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.core;
-
-import static org.apache.beam.sdk.util.WindowedValue.timestampedValueInGlobalWindow;
-import static org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow;
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.fail;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.base.Suppliers;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Multimap;
-import com.google.protobuf.Any;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.BytesValue;
-import com.google.protobuf.Message;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.ServiceLoader;
-import org.apache.beam.fn.harness.fn.ThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.runners.core.PTransformRunnerFactory.Registrar;
-import org.apache.beam.runners.dataflow.util.CloudObjects;
-import org.apache.beam.runners.dataflow.util.DoFnInfo;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.common.runner.v1.RunnerApi;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.util.SerializableUtils;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.sdk.values.WindowingStrategy;
-import org.hamcrest.collection.IsMapContaining;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/** Tests for {@link DoFnRunnerFactory}. */
-@RunWith(JUnit4.class)
-public class DoFnRunnerFactoryTest {
-
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-  private static final Coder<WindowedValue<String>> STRING_CODER =
-      WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE);
-  private static final String STRING_CODER_SPEC_ID = "999L";
-  private static final RunnerApi.Coder STRING_CODER_SPEC;
-  private static final String URN = "urn:org.apache.beam:dofn:java:0.1";
-
-  static {
-    try {
-      STRING_CODER_SPEC = RunnerApi.Coder.newBuilder()
-          .setSpec(RunnerApi.SdkFunctionSpec.newBuilder()
-              .setSpec(RunnerApi.FunctionSpec.newBuilder()
-                  .setParameter(Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(
-                      OBJECT_MAPPER.writeValueAsBytes(CloudObjects.asCloudObject(STRING_CODER))))
-                      .build())))
-              .build())
-          .build();
-    } catch (IOException e) {
-      throw new ExceptionInInitializerError(e);
-    }
-  }
-
-  private static class TestDoFn extends DoFn<String, String> {
-    private static final TupleTag<String> mainOutput = new TupleTag<>("mainOutput");
-    private static final TupleTag<String> additionalOutput = new TupleTag<>("output");
-
-    private BoundedWindow window;
-
-    @ProcessElement
-    public void processElement(ProcessContext context, BoundedWindow window) {
-      context.output("MainOutput" + context.element());
-      context.output(additionalOutput, "AdditionalOutput" + context.element());
-      this.window = window;
-    }
-
-    @FinishBundle
-    public void finishBundle(FinishBundleContext context) {
-      if (window != null) {
-        context.output("FinishBundle", window.maxTimestamp(), window);
-        window = null;
-      }
-    }
-  }
-
-  /**
-   * Create a DoFn that has 3 inputs (inputATarget1, inputATarget2, inputBTarget) and 2 outputs
-   * (mainOutput, output). Validate that inputs are fed to the {@link DoFn} and that outputs
-   * are directed to the correct consumers.
-   */
-  @Test
-  public void testCreatingAndProcessingDoFn() throws Exception {
-    Map<String, Message> fnApiRegistry = ImmutableMap.of(STRING_CODER_SPEC_ID, STRING_CODER_SPEC);
-    String pTransformId = "pTransformId";
-    String mainOutputId = "101";
-    String additionalOutputId = "102";
-
-    DoFnInfo<?, ?> doFnInfo = DoFnInfo.forFn(
-        new TestDoFn(),
-        WindowingStrategy.globalDefault(),
-        ImmutableList.of(),
-        StringUtf8Coder.of(),
-        Long.parseLong(mainOutputId),
-        ImmutableMap.of(
-            Long.parseLong(mainOutputId), TestDoFn.mainOutput,
-            Long.parseLong(additionalOutputId), TestDoFn.additionalOutput));
-    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder()
-        .setUrn("urn:org.apache.beam:dofn:java:0.1")
-        .setParameter(Any.pack(BytesValue.newBuilder()
-            .setValue(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnInfo)))
-            .build()))
-        .build();
-    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder()
-        .setSpec(functionSpec)
-        .putInputs("inputA", "inputATarget")
-        .putInputs("inputB", "inputBTarget")
-        .putOutputs(mainOutputId, "mainOutputTarget")
-        .putOutputs(additionalOutputId, "additionalOutputTarget")
-        .build();
-
-    List<WindowedValue<String>> mainOutputValues = new ArrayList<>();
-    List<WindowedValue<String>> additionalOutputValues = new ArrayList<>();
-    Multimap<String, ThrowingConsumer<WindowedValue<?>>> consumers = HashMultimap.create();
-    consumers.put("mainOutputTarget",
-        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) mainOutputValues::add);
-    consumers.put("additionalOutputTarget",
-        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) additionalOutputValues::add);
-    List<ThrowingRunnable> startFunctions = new ArrayList<>();
-    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
-
-    new DoFnRunnerFactory.Factory<>().createRunnerForPTransform(
-        PipelineOptionsFactory.create(),
-        null /* beamFnDataClient */,
-        pTransformId,
-        pTransform,
-        Suppliers.ofInstance("57L")::get,
-        ImmutableMap.of(),
-        ImmutableMap.of(),
-        consumers,
-        startFunctions::add,
-        finishFunctions::add);
-
-    Iterables.getOnlyElement(startFunctions).run();
-    mainOutputValues.clear();
-
-    assertThat(consumers.keySet(), containsInAnyOrder(
-        "inputATarget", "inputBTarget", "mainOutputTarget", "additionalOutputTarget"));
-
-    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("A1"));
-    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("A2"));
-    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("B"));
-    assertThat(mainOutputValues, contains(
-        valueInGlobalWindow("MainOutputA1"),
-        valueInGlobalWindow("MainOutputA2"),
-        valueInGlobalWindow("MainOutputB")));
-    assertThat(additionalOutputValues, contains(
-        valueInGlobalWindow("AdditionalOutputA1"),
-        valueInGlobalWindow("AdditionalOutputA2"),
-        valueInGlobalWindow("AdditionalOutputB")));
-    mainOutputValues.clear();
-    additionalOutputValues.clear();
-
-    Iterables.getOnlyElement(finishFunctions).run();
-    assertThat(
-        mainOutputValues,
-        contains(
-            timestampedValueInGlobalWindow("FinishBundle", GlobalWindow.INSTANCE.maxTimestamp())));
-    mainOutputValues.clear();
-  }
-
-  @Test
-  public void testRegistration() {
-    for (Registrar registrar :
-        ServiceLoader.load(Registrar.class)) {
-      if (registrar instanceof DoFnRunnerFactory.Registrar) {
-        assertThat(registrar.getPTransformRunnerFactories(), IsMapContaining.hasKey(URN));
-        return;
-      }
-    }
-    fail("Expected registrar not found.");
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/513ccdf1/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java
new file mode 100644
index 0000000..ae5cbac
--- /dev/null
+++ b/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.core;
+
+import static org.apache.beam.sdk.util.WindowedValue.timestampedValueInGlobalWindow;
+import static org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Suppliers;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Multimap;
+import com.google.protobuf.Any;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.BytesValue;
+import com.google.protobuf.Message;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.ServiceLoader;
+import org.apache.beam.fn.harness.fn.ThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingRunnable;
+import org.apache.beam.runners.core.PTransformRunnerFactory.Registrar;
+import org.apache.beam.runners.dataflow.util.CloudObjects;
+import org.apache.beam.runners.dataflow.util.DoFnInfo;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.util.SerializableUtils;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.WindowingStrategy;
+import org.hamcrest.collection.IsMapContaining;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link FnApiDoFnRunner}. */
+@RunWith(JUnit4.class)
+public class FnApiDoFnRunnerTest {
+
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+  private static final Coder<WindowedValue<String>> STRING_CODER =
+      WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE);
+  private static final String STRING_CODER_SPEC_ID = "999L";
+  private static final RunnerApi.Coder STRING_CODER_SPEC;
+  private static final String URN = "urn:org.apache.beam:dofn:java:0.1";
+
+  static {
+    try {
+      STRING_CODER_SPEC = RunnerApi.Coder.newBuilder()
+          .setSpec(RunnerApi.SdkFunctionSpec.newBuilder()
+              .setSpec(RunnerApi.FunctionSpec.newBuilder()
+                  .setParameter(Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(
+                      OBJECT_MAPPER.writeValueAsBytes(CloudObjects.asCloudObject(STRING_CODER))))
+                      .build())))
+              .build())
+          .build();
+    } catch (IOException e) {
+      throw new ExceptionInInitializerError(e);
+    }
+  }
+
+  private static class TestDoFn extends DoFn<String, String> {
+    private static final TupleTag<String> mainOutput = new TupleTag<>("mainOutput");
+    private static final TupleTag<String> additionalOutput = new TupleTag<>("output");
+
+    private BoundedWindow window;
+
+    @ProcessElement
+    public void processElement(ProcessContext context, BoundedWindow window) {
+      context.output("MainOutput" + context.element());
+      context.output(additionalOutput, "AdditionalOutput" + context.element());
+      this.window = window;
+    }
+
+    @FinishBundle
+    public void finishBundle(FinishBundleContext context) {
+      if (window != null) {
+        context.output("FinishBundle", window.maxTimestamp(), window);
+        window = null;
+      }
+    }
+  }
+
+  /**
+   * Create a DoFn that has 3 inputs (inputATarget1, inputATarget2, inputBTarget) and 2 outputs
+   * (mainOutput, output). Validate that inputs are fed to the {@link DoFn} and that outputs
+   * are directed to the correct consumers.
+   */
+  @Test
+  public void testCreatingAndProcessingDoFn() throws Exception {
+    Map<String, Message> fnApiRegistry = ImmutableMap.of(STRING_CODER_SPEC_ID, STRING_CODER_SPEC);
+    String pTransformId = "pTransformId";
+    String mainOutputId = "101";
+    String additionalOutputId = "102";
+
+    DoFnInfo<?, ?> doFnInfo = DoFnInfo.forFn(
+        new TestDoFn(),
+        WindowingStrategy.globalDefault(),
+        ImmutableList.of(),
+        StringUtf8Coder.of(),
+        Long.parseLong(mainOutputId),
+        ImmutableMap.of(
+            Long.parseLong(mainOutputId), TestDoFn.mainOutput,
+            Long.parseLong(additionalOutputId), TestDoFn.additionalOutput));
+    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder()
+        .setUrn("urn:org.apache.beam:dofn:java:0.1")
+        .setParameter(Any.pack(BytesValue.newBuilder()
+            .setValue(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnInfo)))
+            .build()))
+        .build();
+    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder()
+        .setSpec(functionSpec)
+        .putInputs("inputA", "inputATarget")
+        .putInputs("inputB", "inputBTarget")
+        .putOutputs(mainOutputId, "mainOutputTarget")
+        .putOutputs(additionalOutputId, "additionalOutputTarget")
+        .build();
+
+    List<WindowedValue<String>> mainOutputValues = new ArrayList<>();
+    List<WindowedValue<String>> additionalOutputValues = new ArrayList<>();
+    Multimap<String, ThrowingConsumer<WindowedValue<?>>> consumers = HashMultimap.create();
+    consumers.put("mainOutputTarget",
+        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) mainOutputValues::add);
+    consumers.put("additionalOutputTarget",
+        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) additionalOutputValues::add);
+    List<ThrowingRunnable> startFunctions = new ArrayList<>();
+    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
+
+    new FnApiDoFnRunner.Factory<>().createRunnerForPTransform(
+        PipelineOptionsFactory.create(),
+        null /* beamFnDataClient */,
+        pTransformId,
+        pTransform,
+        Suppliers.ofInstance("57L")::get,
+        ImmutableMap.of(),
+        ImmutableMap.of(),
+        consumers,
+        startFunctions::add,
+        finishFunctions::add);
+
+    Iterables.getOnlyElement(startFunctions).run();
+    mainOutputValues.clear();
+
+    assertThat(consumers.keySet(), containsInAnyOrder(
+        "inputATarget", "inputBTarget", "mainOutputTarget", "additionalOutputTarget"));
+
+    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("A1"));
+    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("A2"));
+    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("B"));
+    assertThat(mainOutputValues, contains(
+        valueInGlobalWindow("MainOutputA1"),
+        valueInGlobalWindow("MainOutputA2"),
+        valueInGlobalWindow("MainOutputB")));
+    assertThat(additionalOutputValues, contains(
+        valueInGlobalWindow("AdditionalOutputA1"),
+        valueInGlobalWindow("AdditionalOutputA2"),
+        valueInGlobalWindow("AdditionalOutputB")));
+    mainOutputValues.clear();
+    additionalOutputValues.clear();
+
+    Iterables.getOnlyElement(finishFunctions).run();
+    assertThat(
+        mainOutputValues,
+        contains(
+            timestampedValueInGlobalWindow("FinishBundle", GlobalWindow.INSTANCE.maxTimestamp())));
+    mainOutputValues.clear();
+  }
+
+  @Test
+  public void testRegistration() {
+    for (Registrar registrar :
+        ServiceLoader.load(Registrar.class)) {
+      if (registrar instanceof FnApiDoFnRunner.Registrar) {
+        assertThat(registrar.getPTransformRunnerFactories(), IsMapContaining.hasKey(URN));
+        return;
+      }
+    }
+    fail("Expected registrar not found.");
+  }
+}


[05/50] [abbrv] beam git commit: [maven-release-plugin] prepare branch release-2.1.0

Posted by ta...@apache.org.
[maven-release-plugin] prepare branch release-2.1.0


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/89531a89
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/89531a89
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/89531a89

Branch: refs/heads/DSL_SQL
Commit: 89531a89d4ad7d1516c7102b3fff14331b9276c1
Parents: 967c71a
Author: Jean-Baptiste Onofré <jb...@apache.org>
Authored: Wed Jul 5 16:47:29 2017 +0200
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:59 2017 -0700

----------------------------------------------------------------------
 pom.xml                     | 2 +-
 runners/direct-java/pom.xml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/89531a89/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index be3fe20..a5930ca 100644
--- a/pom.xml
+++ b/pom.xml
@@ -48,7 +48,7 @@
     <connection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</connection>
     <developerConnection>scm:git:https://git-wip-us.apache.org/repos/asf/beam.git</developerConnection>
     <url>https://git-wip-us.apache.org/repos/asf?p=beam.git;a=summary</url>
-    <tag>HEAD</tag>
+    <tag>release-2.1.0</tag>
   </scm>
 
   <issueManagement>

http://git-wip-us.apache.org/repos/asf/beam/blob/89531a89/runners/direct-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/direct-java/pom.xml b/runners/direct-java/pom.xml
index 6346575..5b5aec2 100644
--- a/runners/direct-java/pom.xml
+++ b/runners/direct-java/pom.xml
@@ -117,7 +117,7 @@
                   </relocation>
                 </relocations>
                 <transformers>
-                  <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+                  <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
                 </transformers>
               </configuration>
             </execution>


[18/50] [abbrv] beam git commit: Update Dataflow container version to 20170706

Posted by ta...@apache.org.
Update Dataflow container version to 20170706


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/92eec586
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/92eec586
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/92eec586

Branch: refs/heads/DSL_SQL
Commit: 92eec586c966d9cce89539596dd750c757d92316
Parents: 699d59a
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Jul 6 11:07:38 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 runners/google-cloud-dataflow-java/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/92eec586/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index 91908cd..c8d63ac 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -33,7 +33,7 @@
   <packaging>jar</packaging>
 
   <properties>
-    <dataflow.container_version>beam-master-20170623</dataflow.container_version>
+    <dataflow.container_version>beam-master-20170706</dataflow.container_version>
     <dataflow.fnapi_environment_major_version>1</dataflow.fnapi_environment_major_version>
     <dataflow.legacy_environment_major_version>6</dataflow.legacy_environment_major_version>
   </properties>


[33/50] [abbrv] beam git commit: Adds DynamicDestinations support to FileBasedSink

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
index 9468893..8797ff7 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
@@ -42,7 +42,9 @@ import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.google.common.base.Function;
+import com.google.common.base.Functions;
 import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
 import com.google.common.collect.FluentIterable;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
@@ -69,22 +71,31 @@ import java.util.zip.GZIPOutputStream;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipOutputStream;
 import javax.annotation.Nullable;
+import org.apache.beam.sdk.coders.AvroCoder;
 import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.DefaultCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.io.BoundedSource.BoundedReader;
+import org.apache.beam.sdk.io.DefaultFilenamePolicy.Params;
+import org.apache.beam.sdk.io.FileBasedSink.DynamicDestinations;
+import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
 import org.apache.beam.sdk.io.FileBasedSink.WritableByteChannelFactory;
 import org.apache.beam.sdk.io.TextIO.CompressionType;
 import org.apache.beam.sdk.io.fs.MatchResult;
 import org.apache.beam.sdk.io.fs.MatchResult.Metadata;
+import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
+import org.apache.beam.sdk.io.fs.ResourceId;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.testing.NeedsRunner;
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.SourceTestUtils;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.testing.ValidatesRunner;
 import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.transforms.display.DisplayDataEvaluator;
 import org.apache.beam.sdk.util.CoderUtils;
@@ -205,7 +216,7 @@ public class TextIOTest {
     });
   }
 
-  private <T> void runTestRead(String[] expected) throws Exception {
+  private void runTestRead(String[] expected) throws Exception {
     File tmpFile = Files.createTempFile(tempFolder, "file", "txt").toFile();
     String filename = tmpFile.getPath();
 
@@ -274,6 +285,213 @@ public class TextIOTest {
         displayData, hasItem(hasDisplayItem(hasValue(startsWith("foobar")))));
   }
 
+  static class TestDynamicDestinations extends DynamicDestinations<String, String> {
+    ResourceId baseDir;
+
+    TestDynamicDestinations(ResourceId baseDir) {
+      this.baseDir = baseDir;
+    }
+
+    @Override
+    public String getDestination(String element) {
+      // Destination is based on first character of string.
+      return element.substring(0, 1);
+    }
+
+    @Override
+    public String getDefaultDestination() {
+      return "";
+    }
+
+    @Nullable
+    @Override
+    public Coder<String> getDestinationCoder() {
+      return StringUtf8Coder.of();
+    }
+
+    @Override
+    public FilenamePolicy getFilenamePolicy(String destination) {
+      return DefaultFilenamePolicy.fromStandardParameters(
+          StaticValueProvider.of(
+              baseDir.resolve("file_" + destination + ".txt", StandardResolveOptions.RESOLVE_FILE)),
+          null,
+          null,
+          false);
+    }
+  }
+
+  class StartsWith implements Predicate<String> {
+    String prefix;
+
+    StartsWith(String prefix) {
+      this.prefix = prefix;
+    }
+
+    @Override
+    public boolean apply(@Nullable String input) {
+      return input.startsWith(prefix);
+    }
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testDynamicDestinations() throws Exception {
+    ResourceId baseDir =
+        FileSystems.matchNewResource(
+            Files.createTempDirectory(tempFolder, "testDynamicDestinations").toString(), true);
+
+    List<String> elements = Lists.newArrayList("aaaa", "aaab", "baaa", "baab", "caaa", "caab");
+    PCollection<String> input = p.apply(Create.of(elements).withCoder(StringUtf8Coder.of()));
+    input.apply(
+        TextIO.write()
+            .to(new TestDynamicDestinations(baseDir))
+            .withTempDirectory(FileSystems.matchNewResource(baseDir.toString(), true)));
+    p.run();
+
+    assertOutputFiles(
+        Iterables.toArray(Iterables.filter(elements, new StartsWith("a")), String.class),
+        null,
+        null,
+        0,
+        baseDir.resolve("file_a.txt", StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+    assertOutputFiles(
+        Iterables.toArray(Iterables.filter(elements, new StartsWith("b")), String.class),
+        null,
+        null,
+        0,
+        baseDir.resolve("file_b.txt", StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+    assertOutputFiles(
+        Iterables.toArray(Iterables.filter(elements, new StartsWith("c")), String.class),
+        null,
+        null,
+        0,
+        baseDir.resolve("file_c.txt", StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+  }
+
+  @DefaultCoder(AvroCoder.class)
+  private static class UserWriteType {
+    String destination;
+    String metadata;
+
+    UserWriteType() {
+      this.destination = "";
+      this.metadata = "";
+    }
+
+    UserWriteType(String destination, String metadata) {
+      this.destination = destination;
+      this.metadata = metadata;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("destination: %s metadata : %s", destination, metadata);
+    }
+  }
+
+  private static class SerializeUserWrite implements SerializableFunction<UserWriteType, String> {
+    @Override
+    public String apply(UserWriteType input) {
+      return input.toString();
+    }
+  }
+
+  private static class UserWriteDestination implements SerializableFunction<UserWriteType, Params> {
+    private ResourceId baseDir;
+
+    UserWriteDestination(ResourceId baseDir) {
+      this.baseDir = baseDir;
+    }
+
+    @Override
+    public Params apply(UserWriteType input) {
+      return new Params()
+          .withBaseFilename(
+              baseDir.resolve(
+                  "file_" + input.destination.substring(0, 1) + ".txt",
+                  StandardResolveOptions.RESOLVE_FILE));
+    }
+  }
+
+  private static class ExtractWriteDestination implements Function<UserWriteType, String> {
+    @Override
+    public String apply(@Nullable UserWriteType input) {
+      return input.destination;
+    }
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testDynamicDefaultFilenamePolicy() throws Exception {
+    ResourceId baseDir =
+        FileSystems.matchNewResource(
+            Files.createTempDirectory(tempFolder, "testDynamicDestinations").toString(), true);
+
+    List<UserWriteType> elements =
+        Lists.newArrayList(
+            new UserWriteType("aaaa", "first"),
+            new UserWriteType("aaab", "second"),
+            new UserWriteType("baaa", "third"),
+            new UserWriteType("baab", "fourth"),
+            new UserWriteType("caaa", "fifth"),
+            new UserWriteType("caab", "sixth"));
+    PCollection<UserWriteType> input = p.apply(Create.of(elements));
+    input.apply(
+        TextIO.writeCustomType(new SerializeUserWrite())
+            .to(new UserWriteDestination(baseDir), new Params())
+            .withTempDirectory(FileSystems.matchNewResource(baseDir.toString(), true)));
+    p.run();
+
+    String[] aElements =
+        Iterables.toArray(
+            Iterables.transform(
+                Iterables.filter(
+                    elements,
+                    Predicates.compose(new StartsWith("a"), new ExtractWriteDestination())),
+                Functions.toStringFunction()),
+            String.class);
+    String[] bElements =
+        Iterables.toArray(
+            Iterables.transform(
+                Iterables.filter(
+                    elements,
+                    Predicates.compose(new StartsWith("b"), new ExtractWriteDestination())),
+                Functions.toStringFunction()),
+            String.class);
+    String[] cElements =
+        Iterables.toArray(
+            Iterables.transform(
+                Iterables.filter(
+                    elements,
+                    Predicates.compose(new StartsWith("c"), new ExtractWriteDestination())),
+                Functions.toStringFunction()),
+            String.class);
+    assertOutputFiles(
+        aElements,
+        null,
+        null,
+        0,
+        baseDir.resolve("file_a.txt", StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+    assertOutputFiles(
+        bElements,
+        null,
+        null,
+        0,
+        baseDir.resolve("file_b.txt", StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+    assertOutputFiles(
+        cElements,
+        null,
+        null,
+        0,
+        baseDir.resolve("file_c.txt", StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+  }
+
   private void runTestWrite(String[] elems) throws Exception {
     runTestWrite(elems, null, null, 1);
   }
@@ -291,7 +509,8 @@ public class TextIOTest {
       String[] elems, String header, String footer, int numShards) throws Exception {
     String outputName = "file.txt";
     Path baseDir = Files.createTempDirectory(tempFolder, "testwrite");
-    String baseFilename = baseDir.resolve(outputName).toString();
+    ResourceId baseFilename =
+        FileBasedSink.convertToFileResourceIfPossible(baseDir.resolve(outputName).toString());
 
     PCollection<String> input =
         p.apply(Create.of(Arrays.asList(elems)).withCoder(StringUtf8Coder.of()));
@@ -311,8 +530,14 @@ public class TextIOTest {
 
     p.run();
 
-    assertOutputFiles(elems, header, footer, numShards, baseDir, outputName,
-        firstNonNull(write.getShardTemplate(),
+    assertOutputFiles(
+        elems,
+        header,
+        footer,
+        numShards,
+        baseFilename,
+        firstNonNull(
+            write.inner.getShardTemplate(),
             DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE));
   }
 
@@ -321,13 +546,12 @@ public class TextIOTest {
       final String header,
       final String footer,
       int numShards,
-      Path rootLocation,
-      String outputName,
+      ResourceId outputPrefix,
       String shardNameTemplate)
       throws Exception {
     List<File> expectedFiles = new ArrayList<>();
     if (numShards == 0) {
-      String pattern = rootLocation.toAbsolutePath().resolve(outputName + "*").toString();
+      String pattern = outputPrefix.toString() + "*";
       List<MatchResult> matches = FileSystems.match(Collections.singletonList(pattern));
       for (Metadata expectedFile : Iterables.getOnlyElement(matches).metadata()) {
         expectedFiles.add(new File(expectedFile.resourceId().toString()));
@@ -336,9 +560,9 @@ public class TextIOTest {
       for (int i = 0; i < numShards; i++) {
         expectedFiles.add(
             new File(
-                rootLocation.toString(),
                 DefaultFilenamePolicy.constructName(
-                    outputName, shardNameTemplate, "", i, numShards, null, null)));
+                        outputPrefix, shardNameTemplate, "", i, numShards, null, null)
+                    .toString()));
       }
     }
 
@@ -456,14 +680,19 @@ public class TextIOTest {
   public void testWriteWithWritableByteChannelFactory() throws Exception {
     Coder<String> coder = StringUtf8Coder.of();
     String outputName = "file.txt";
-    Path baseDir = Files.createTempDirectory(tempFolder, "testwrite");
+    ResourceId baseDir =
+        FileSystems.matchNewResource(
+            Files.createTempDirectory(tempFolder, "testwrite").toString(), true);
 
     PCollection<String> input = p.apply(Create.of(Arrays.asList(LINES2_ARRAY)).withCoder(coder));
 
     final WritableByteChannelFactory writableByteChannelFactory =
         new DrunkWritableByteChannelFactory();
-    TextIO.Write write = TextIO.write().to(baseDir.resolve(outputName).toString())
-        .withoutSharding().withWritableByteChannelFactory(writableByteChannelFactory);
+    TextIO.Write write =
+        TextIO.write()
+            .to(baseDir.resolve(outputName, StandardResolveOptions.RESOLVE_FILE).toString())
+            .withoutSharding()
+            .withWritableByteChannelFactory(writableByteChannelFactory);
     DisplayData displayData = DisplayData.from(write);
     assertThat(displayData, hasDisplayItem("writableByteChannelFactory", "DRUNK"));
 
@@ -476,8 +705,15 @@ public class TextIOTest {
       drunkElems.add(elem);
       drunkElems.add(elem);
     }
-    assertOutputFiles(drunkElems.toArray(new String[0]), null, null, 1, baseDir,
-        outputName + writableByteChannelFactory.getFilenameSuffix(), write.getShardTemplate());
+    assertOutputFiles(
+        drunkElems.toArray(new String[0]),
+        null,
+        null,
+        1,
+        baseDir.resolve(
+            outputName + writableByteChannelFactory.getSuggestedFilenameSuffix(),
+            StandardResolveOptions.RESOLVE_FILE),
+        write.inner.getShardTemplate());
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteFilesTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteFilesTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteFilesTest.java
index e6a0dcf..55f2a87 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteFilesTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/WriteFilesTest.java
@@ -17,6 +17,7 @@
  */
 package org.apache.beam.sdk.io;
 
+import static com.google.common.base.MoreObjects.firstNonNull;
 import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
 import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.includesDisplayDataFor;
 import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -41,7 +42,11 @@ import java.util.List;
 import java.util.concurrent.ThreadLocalRandom;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.io.DefaultFilenamePolicy.Params;
+import org.apache.beam.sdk.io.FileBasedSink.CompressionType;
+import org.apache.beam.sdk.io.FileBasedSink.DynamicDestinations;
 import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
+import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints;
 import org.apache.beam.sdk.io.SimpleSink.SimpleWriter;
 import org.apache.beam.sdk.io.fs.MatchResult.Metadata;
 import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
@@ -58,16 +63,20 @@ import org.apache.beam.sdk.transforms.GroupByKey;
 import org.apache.beam.sdk.transforms.MapElements;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.SerializableFunctions;
 import org.apache.beam.sdk.transforms.SimpleFunction;
 import org.apache.beam.sdk.transforms.Top;
 import org.apache.beam.sdk.transforms.View;
 import org.apache.beam.sdk.transforms.display.DisplayData;
+import org.apache.beam.sdk.transforms.display.DisplayData.Builder;
 import org.apache.beam.sdk.transforms.windowing.FixedWindows;
 import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
 import org.apache.beam.sdk.transforms.windowing.Sessions;
 import org.apache.beam.sdk.transforms.windowing.Window;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollection.IsBounded;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.joda.time.Duration;
 import org.joda.time.format.DateTimeFormatter;
@@ -164,7 +173,11 @@ public class WriteFilesTest {
   public void testWrite() throws IOException {
     List<String> inputs = Arrays.asList("Critical canary", "Apprehensive eagle",
         "Intimidating pigeon", "Pedantic gull", "Frisky finch");
-    runWrite(inputs, IDENTITY_MAP, getBaseOutputFilename(), WriteFiles.to(makeSimpleSink()));
+    runWrite(
+        inputs,
+        IDENTITY_MAP,
+        getBaseOutputFilename(),
+        WriteFiles.to(makeSimpleSink(), SerializableFunctions.<String>identity()));
   }
 
   /**
@@ -173,8 +186,11 @@ public class WriteFilesTest {
   @Test
   @Category(NeedsRunner.class)
   public void testEmptyWrite() throws IOException {
-    runWrite(Collections.<String>emptyList(), IDENTITY_MAP, getBaseOutputFilename(),
-        WriteFiles.to(makeSimpleSink()));
+    runWrite(
+        Collections.<String>emptyList(),
+        IDENTITY_MAP,
+        getBaseOutputFilename(),
+        WriteFiles.to(makeSimpleSink(), SerializableFunctions.<String>identity()));
     checkFileContents(getBaseOutputFilename(), Collections.<String>emptyList(),
         Optional.of(1));
   }
@@ -190,7 +206,7 @@ public class WriteFilesTest {
         Arrays.asList("one", "two", "three", "four", "five", "six"),
         IDENTITY_MAP,
         getBaseOutputFilename(),
-        WriteFiles.to(makeSimpleSink()).withNumShards(1));
+        WriteFiles.to(makeSimpleSink(), SerializableFunctions.<String>identity()).withNumShards(1));
   }
 
   private ResourceId getBaseOutputDirectory() {
@@ -198,9 +214,13 @@ public class WriteFilesTest {
         .resolve("output", StandardResolveOptions.RESOLVE_DIRECTORY);
 
   }
-  private SimpleSink makeSimpleSink() {
-    FilenamePolicy filenamePolicy = new PerWindowFiles("file", "simple");
-    return new SimpleSink(getBaseOutputDirectory(), filenamePolicy);
+
+  private SimpleSink<Void> makeSimpleSink() {
+    FilenamePolicy filenamePolicy =
+        new PerWindowFiles(
+            getBaseOutputDirectory().resolve("file", StandardResolveOptions.RESOLVE_FILE),
+            "simple");
+    return SimpleSink.makeSimpleSink(getBaseOutputDirectory(), filenamePolicy);
   }
 
   @Test
@@ -219,8 +239,10 @@ public class WriteFilesTest {
       timestamps.add(i + 1);
     }
 
-    SimpleSink sink = makeSimpleSink();
-    WriteFiles<String> write = WriteFiles.to(sink).withSharding(new LargestInt());
+    SimpleSink<Void> sink = makeSimpleSink();
+    WriteFiles<String, ?, String> write =
+        WriteFiles.to(sink, SerializableFunctions.<String>identity())
+            .withSharding(new LargestInt());
     p.apply(Create.timestamped(inputs, timestamps).withCoder(StringUtf8Coder.of()))
         .apply(IDENTITY_MAP)
         .apply(write);
@@ -241,7 +263,8 @@ public class WriteFilesTest {
         Arrays.asList("one", "two", "three", "four", "five", "six"),
         IDENTITY_MAP,
         getBaseOutputFilename(),
-        WriteFiles.to(makeSimpleSink()).withNumShards(20));
+        WriteFiles.to(makeSimpleSink(), SerializableFunctions.<String>identity())
+            .withNumShards(20));
   }
 
   /**
@@ -251,7 +274,11 @@ public class WriteFilesTest {
   @Category(NeedsRunner.class)
   public void testWriteWithEmptyPCollection() throws IOException {
     List<String> inputs = new ArrayList<>();
-    runWrite(inputs, IDENTITY_MAP, getBaseOutputFilename(), WriteFiles.to(makeSimpleSink()));
+    runWrite(
+        inputs,
+        IDENTITY_MAP,
+        getBaseOutputFilename(),
+        WriteFiles.to(makeSimpleSink(), SerializableFunctions.<String>identity()));
   }
 
   /**
@@ -263,8 +290,10 @@ public class WriteFilesTest {
     List<String> inputs = Arrays.asList("Critical canary", "Apprehensive eagle",
         "Intimidating pigeon", "Pedantic gull", "Frisky finch");
     runWrite(
-        inputs, new WindowAndReshuffle<>(Window.<String>into(FixedWindows.of(Duration.millis(2)))),
-        getBaseOutputFilename(), WriteFiles.to(makeSimpleSink()));
+        inputs,
+        new WindowAndReshuffle<>(Window.<String>into(FixedWindows.of(Duration.millis(2)))),
+        getBaseOutputFilename(),
+        WriteFiles.to(makeSimpleSink(), SerializableFunctions.<String>identity()));
   }
 
   /**
@@ -278,10 +307,9 @@ public class WriteFilesTest {
 
     runWrite(
         inputs,
-        new WindowAndReshuffle<>(
-            Window.<String>into(Sessions.withGapDuration(Duration.millis(1)))),
+        new WindowAndReshuffle<>(Window.<String>into(Sessions.withGapDuration(Duration.millis(1)))),
         getBaseOutputFilename(),
-        WriteFiles.to(makeSimpleSink()));
+        WriteFiles.to(makeSimpleSink(), SerializableFunctions.<String>identity()));
   }
 
   @Test
@@ -292,15 +320,19 @@ public class WriteFilesTest {
       inputs.add("mambo_number_" + i);
     }
     runWrite(
-        inputs, Window.<String>into(FixedWindows.of(Duration.millis(2))),
+        inputs,
+        Window.<String>into(FixedWindows.of(Duration.millis(2))),
         getBaseOutputFilename(),
-        WriteFiles.to(makeSimpleSink()).withMaxNumWritersPerBundle(2).withWindowedWrites());
+        WriteFiles.to(makeSimpleSink(), SerializableFunctions.<String>identity())
+            .withMaxNumWritersPerBundle(2)
+            .withWindowedWrites());
   }
 
   public void testBuildWrite() {
-    SimpleSink sink = makeSimpleSink();
-    WriteFiles<String> write = WriteFiles.to(sink).withNumShards(3);
-    assertThat((SimpleSink) write.getSink(), is(sink));
+    SimpleSink<Void> sink = makeSimpleSink();
+    WriteFiles<String, ?, String> write =
+        WriteFiles.to(sink, SerializableFunctions.<String>identity()).withNumShards(3);
+    assertThat((SimpleSink<Void>) write.getSink(), is(sink));
     PTransform<PCollection<String>, PCollectionView<Integer>> originalSharding =
         write.getSharding();
 
@@ -309,25 +341,37 @@ public class WriteFilesTest {
     assertThat(write.getNumShards().get(), equalTo(3));
     assertThat(write.getSharding(), equalTo(originalSharding));
 
-    WriteFiles<String> write2 = write.withSharding(SHARDING_TRANSFORM);
-    assertThat((SimpleSink) write2.getSink(), is(sink));
+    WriteFiles<String, ?, ?> write2 = write.withSharding(SHARDING_TRANSFORM);
+    assertThat((SimpleSink<Void>) write2.getSink(), is(sink));
     assertThat(write2.getSharding(), equalTo(SHARDING_TRANSFORM));
     // original unchanged
 
-    WriteFiles<String> writeUnsharded = write2.withRunnerDeterminedSharding();
+    WriteFiles<String, ?, ?> writeUnsharded = write2.withRunnerDeterminedSharding();
     assertThat(writeUnsharded.getSharding(), nullValue());
     assertThat(write.getSharding(), equalTo(originalSharding));
   }
 
   @Test
   public void testDisplayData() {
-    SimpleSink sink = new SimpleSink(getBaseOutputDirectory(), "file", "-SS-of-NN", "") {
-      @Override
-      public void populateDisplayData(DisplayData.Builder builder) {
-        builder.add(DisplayData.item("foo", "bar"));
-      }
-    };
-    WriteFiles<String> write = WriteFiles.to(sink);
+    DynamicDestinations<String, Void> dynamicDestinations =
+        DynamicFileDestinations.constant(
+            DefaultFilenamePolicy.fromParams(
+                new Params()
+                    .withBaseFilename(
+                        getBaseOutputDirectory()
+                            .resolve("file", StandardResolveOptions.RESOLVE_FILE))
+                    .withShardTemplate("-SS-of-NN")));
+    SimpleSink<Void> sink =
+        new SimpleSink<Void>(
+            getBaseOutputDirectory(), dynamicDestinations, CompressionType.UNCOMPRESSED) {
+          @Override
+          public void populateDisplayData(DisplayData.Builder builder) {
+            builder.add(DisplayData.item("foo", "bar"));
+          }
+        };
+    WriteFiles<String, ?, String> write =
+        WriteFiles.to(sink, SerializableFunctions.<String>identity());
+
     DisplayData displayData = DisplayData.from(write);
 
     assertThat(displayData, hasDisplayItem("sink", sink.getClass()));
@@ -335,14 +379,145 @@ public class WriteFilesTest {
   }
 
   @Test
+  @Category(NeedsRunner.class)
+  public void testUnboundedNeedsWindowed() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(
+        "Must use windowed writes when applying WriteFiles to an unbounded PCollection");
+
+    SimpleSink<Void> sink = makeSimpleSink();
+    p.apply(Create.of("foo"))
+        .setIsBoundedInternal(IsBounded.UNBOUNDED)
+        .apply(WriteFiles.to(sink, SerializableFunctions.<String>identity()));
+    p.run();
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testUnboundedNeedsSharding() {
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage(
+        "When applying WriteFiles to an unbounded PCollection, "
+            + "must specify number of output shards explicitly");
+
+    SimpleSink<Void> sink = makeSimpleSink();
+    p.apply(Create.of("foo"))
+        .setIsBoundedInternal(IsBounded.UNBOUNDED)
+        .apply(WriteFiles.to(sink, SerializableFunctions.<String>identity()).withWindowedWrites());
+    p.run();
+  }
+
+  // Test DynamicDestinations class. Expects user values to be string-encoded integers.
+  // Stores the integer mod 5 as the destination, and uses that in the file prefix.
+  static class TestDestinations extends DynamicDestinations<String, Integer> {
+    private ResourceId baseOutputDirectory;
+
+    TestDestinations(ResourceId baseOutputDirectory) {
+      this.baseOutputDirectory = baseOutputDirectory;
+    }
+
+    @Override
+    public Integer getDestination(String element) {
+      return Integer.valueOf(element) % 5;
+    }
+
+    @Override
+    public Integer getDefaultDestination() {
+      return 0;
+    }
+
+    @Override
+    public FilenamePolicy getFilenamePolicy(Integer destination) {
+      return new PerWindowFiles(
+          baseOutputDirectory.resolve("file_" + destination, StandardResolveOptions.RESOLVE_FILE),
+          "simple");
+    }
+
+    @Override
+    public void populateDisplayData(Builder builder) {
+      super.populateDisplayData(builder);
+    }
+  }
+
+  // Test format function. Prepend a string to each record before writing.
+  static class TestDynamicFormatFunction implements SerializableFunction<String, String> {
+    @Override
+    public String apply(String input) {
+      return "record_" + input;
+    }
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testDynamicDestinationsBounded() throws Exception {
+    testDynamicDestinationsHelper(true);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testDynamicDestinationsUnbounded() throws Exception {
+    testDynamicDestinationsHelper(false);
+  }
+
+  private void testDynamicDestinationsHelper(boolean bounded) throws IOException {
+    TestDestinations dynamicDestinations = new TestDestinations(getBaseOutputDirectory());
+    SimpleSink<Integer> sink =
+        new SimpleSink<>(
+            getBaseOutputDirectory(), dynamicDestinations, CompressionType.UNCOMPRESSED);
+
+    // Flag to validate that the pipeline options are passed to the Sink.
+    WriteOptions options = TestPipeline.testingPipelineOptions().as(WriteOptions.class);
+    options.setTestFlag("test_value");
+    Pipeline p = TestPipeline.create(options);
+
+    List<String> inputs = Lists.newArrayList("0", "1", "2", "3", "4", "5", "6", "7", "8", "9");
+    // Prepare timestamps for the elements.
+    List<Long> timestamps = new ArrayList<>();
+    for (long i = 0; i < inputs.size(); i++) {
+      timestamps.add(i + 1);
+    }
+
+    WriteFiles<String, Integer, String> writeFiles =
+        WriteFiles.to(sink, new TestDynamicFormatFunction()).withNumShards(1);
+
+    PCollection<String> input = p.apply(Create.timestamped(inputs, timestamps));
+    if (!bounded) {
+      input.setIsBoundedInternal(IsBounded.UNBOUNDED);
+      input = input.apply(Window.<String>into(FixedWindows.of(Duration.standardDays(1))));
+      input.apply(writeFiles.withWindowedWrites());
+    } else {
+      input.apply(writeFiles);
+    }
+    p.run();
+
+    for (int i = 0; i < 5; ++i) {
+      ResourceId base =
+          getBaseOutputDirectory().resolve("file_" + i, StandardResolveOptions.RESOLVE_FILE);
+      List<String> expected = Lists.newArrayList("record_" + i, "record_" + (i + 5));
+      checkFileContents(base.toString(), expected, Optional.of(1));
+    }
+  }
+
+  @Test
   public void testShardedDisplayData() {
-    SimpleSink sink = new SimpleSink(getBaseOutputDirectory(), "file", "-SS-of-NN", "") {
-      @Override
-      public void populateDisplayData(DisplayData.Builder builder) {
-        builder.add(DisplayData.item("foo", "bar"));
-      }
-    };
-    WriteFiles<String> write = WriteFiles.to(sink).withNumShards(1);
+    DynamicDestinations<String, Void> dynamicDestinations =
+        DynamicFileDestinations.constant(
+            DefaultFilenamePolicy.fromParams(
+                new Params()
+                    .withBaseFilename(
+                        getBaseOutputDirectory()
+                            .resolve("file", StandardResolveOptions.RESOLVE_FILE))
+                    .withShardTemplate("-SS-of-NN")));
+    SimpleSink<Void> sink =
+        new SimpleSink<Void>(
+            getBaseOutputDirectory(), dynamicDestinations, CompressionType.UNCOMPRESSED) {
+          @Override
+          public void populateDisplayData(DisplayData.Builder builder) {
+            builder.add(DisplayData.item("foo", "bar"));
+          }
+        };
+    WriteFiles<String, ?, String> write =
+        WriteFiles.to(sink, SerializableFunctions.<String>identity()).withNumShards(1);
     DisplayData displayData = DisplayData.from(write);
     assertThat(displayData, hasDisplayItem("sink", sink.getClass()));
     assertThat(displayData, includesDisplayDataFor("sink", sink));
@@ -351,14 +526,24 @@ public class WriteFilesTest {
 
   @Test
   public void testCustomShardStrategyDisplayData() {
-    SimpleSink sink = new SimpleSink(getBaseOutputDirectory(), "file", "-SS-of-NN", "") {
-      @Override
-      public void populateDisplayData(DisplayData.Builder builder) {
-        builder.add(DisplayData.item("foo", "bar"));
-      }
-    };
-    WriteFiles<String> write =
-        WriteFiles.to(sink)
+    DynamicDestinations<String, Void> dynamicDestinations =
+        DynamicFileDestinations.constant(
+            DefaultFilenamePolicy.fromParams(
+                new Params()
+                    .withBaseFilename(
+                        getBaseOutputDirectory()
+                            .resolve("file", StandardResolveOptions.RESOLVE_FILE))
+                    .withShardTemplate("-SS-of-NN")));
+    SimpleSink<Void> sink =
+        new SimpleSink<Void>(
+            getBaseOutputDirectory(), dynamicDestinations, CompressionType.UNCOMPRESSED) {
+          @Override
+          public void populateDisplayData(DisplayData.Builder builder) {
+            builder.add(DisplayData.item("foo", "bar"));
+          }
+        };
+    WriteFiles<String, ?, String> write =
+        WriteFiles.to(sink, SerializableFunctions.<String>identity())
             .withSharding(
                 new PTransform<PCollection<String>, PCollectionView<Integer>>() {
                   @Override
@@ -383,59 +568,77 @@ public class WriteFilesTest {
    * PCollection are written to the sink.
    */
   private void runWrite(
-      List<String> inputs, PTransform<PCollection<String>, PCollection<String>> transform,
-      String baseName, WriteFiles<String> write) throws IOException {
+      List<String> inputs,
+      PTransform<PCollection<String>, PCollection<String>> transform,
+      String baseName,
+      WriteFiles<String, ?, String> write)
+      throws IOException {
     runShardedWrite(inputs, transform, baseName, write);
   }
 
   private static class PerWindowFiles extends FilenamePolicy {
     private static final DateTimeFormatter FORMATTER = ISODateTimeFormat.hourMinuteSecondMillis();
-    private final String prefix;
+    private final ResourceId baseFilename;
     private final String suffix;
 
-    public PerWindowFiles(String prefix, String suffix) {
-      this.prefix = prefix;
+    public PerWindowFiles(ResourceId baseFilename, String suffix) {
+      this.baseFilename = baseFilename;
       this.suffix = suffix;
     }
 
     public String filenamePrefixForWindow(IntervalWindow window) {
+      String prefix =
+          baseFilename.isDirectory() ? "" : firstNonNull(baseFilename.getFilename(), "");
       return String.format("%s%s-%s",
           prefix, FORMATTER.print(window.start()), FORMATTER.print(window.end()));
     }
 
     @Override
-    public ResourceId windowedFilename(
-        ResourceId outputDirectory, WindowedContext context, String extension) {
+    public ResourceId windowedFilename(WindowedContext context, OutputFileHints outputFileHints) {
       IntervalWindow window = (IntervalWindow) context.getWindow();
-      String filename = String.format(
-          "%s-%s-of-%s%s%s",
-          filenamePrefixForWindow(window), context.getShardNumber(), context.getNumShards(),
-          extension, suffix);
-      return outputDirectory.resolve(filename, StandardResolveOptions.RESOLVE_FILE);
+      String filename =
+          String.format(
+              "%s-%s-of-%s%s%s",
+              filenamePrefixForWindow(window),
+              context.getShardNumber(),
+              context.getNumShards(),
+              outputFileHints.getSuggestedFilenameSuffix(),
+              suffix);
+      return baseFilename
+          .getCurrentDirectory()
+          .resolve(filename, StandardResolveOptions.RESOLVE_FILE);
     }
 
     @Override
-    public ResourceId unwindowedFilename(
-        ResourceId outputDirectory, Context context, String extension) {
-      String filename = String.format(
-          "%s%s-of-%s%s%s",
-          prefix, context.getShardNumber(), context.getNumShards(),
-          extension, suffix);
-      return outputDirectory.resolve(filename, StandardResolveOptions.RESOLVE_FILE);
+    public ResourceId unwindowedFilename(Context context, OutputFileHints outputFileHints) {
+      String prefix =
+          baseFilename.isDirectory() ? "" : firstNonNull(baseFilename.getFilename(), "");
+      String filename =
+          String.format(
+              "%s-%s-of-%s%s%s",
+              prefix,
+              context.getShardNumber(),
+              context.getNumShards(),
+              outputFileHints.getSuggestedFilenameSuffix(),
+              suffix);
+      return baseFilename
+          .getCurrentDirectory()
+          .resolve(filename, StandardResolveOptions.RESOLVE_FILE);
     }
   }
 
   /**
    * Performs a WriteFiles transform with the desired number of shards. Verifies the WriteFiles
    * transform calls the appropriate methods on a test sink in the correct order, as well as
-   * verifies that the elements of a PCollection are written to the sink. If numConfiguredShards
-   * is not null, also verifies that the output number of shards is correct.
+   * verifies that the elements of a PCollection are written to the sink. If numConfiguredShards is
+   * not null, also verifies that the output number of shards is correct.
    */
   private void runShardedWrite(
       List<String> inputs,
       PTransform<PCollection<String>, PCollection<String>> transform,
       String baseName,
-      WriteFiles<String> write) throws IOException {
+      WriteFiles<String, ?, String> write)
+      throws IOException {
     // Flag to validate that the pipeline options are passed to the Sink
     WriteOptions options = TestPipeline.testingPipelineOptions().as(WriteOptions.class);
     options.setTestFlag("test_value");

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
index 4393a63..e46b1d3 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
@@ -32,6 +32,7 @@ import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.ListCoder;
 import org.apache.beam.sdk.coders.NullableCoder;
+import org.apache.beam.sdk.coders.ShardedKeyCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.VoidCoder;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
@@ -57,6 +58,7 @@ import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionList;
 import org.apache.beam.sdk.values.PCollectionTuple;
 import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.ShardedKey;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.TupleTagList;
 import org.apache.beam.sdk.values.TypeDescriptor;

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinations.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinations.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinations.java
index edb1e0d..c5c2462 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinations.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/DynamicDestinations.java
@@ -23,8 +23,7 @@ import static com.google.common.base.Preconditions.checkArgument;
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.common.collect.Lists;
 import java.io.Serializable;
-import java.lang.reflect.ParameterizedType;
-import java.lang.reflect.Type;
+import java.lang.reflect.TypeVariable;
 import java.util.List;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.coders.CannotProvideCoderException;
@@ -32,6 +31,7 @@ import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.CoderRegistry;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TypeDescriptor;
 import org.apache.beam.sdk.values.ValueInSingleWindow;
 
 /**
@@ -158,21 +158,16 @@ public abstract class DynamicDestinations<T, DestinationT> implements Serializab
     }
     // If dynamicDestinations doesn't provide a coder, try to find it in the coder registry.
     // We must first use reflection to figure out what the type parameter is.
-    for (Type superclass = getClass().getGenericSuperclass();
-        superclass != null;
-        superclass = ((Class) superclass).getGenericSuperclass()) {
-      if (superclass instanceof ParameterizedType) {
-        ParameterizedType parameterized = (ParameterizedType) superclass;
-        if (parameterized.getRawType() == DynamicDestinations.class) {
-          // DestinationT is the second parameter.
-          Type parameter = parameterized.getActualTypeArguments()[1];
-          @SuppressWarnings("unchecked")
-          Class<DestinationT> parameterClass = (Class<DestinationT>) parameter;
-          return registry.getCoder(parameterClass);
-        }
-      }
+    TypeDescriptor<?> superDescriptor =
+        TypeDescriptor.of(getClass()).getSupertype(DynamicDestinations.class);
+    if (!superDescriptor.getRawType().equals(DynamicDestinations.class)) {
+      throw new AssertionError(
+          "Couldn't find the DynamicDestinations superclass of " + this.getClass());
     }
-    throw new AssertionError(
-        "Couldn't find the DynamicDestinations superclass of " + this.getClass());
+    TypeVariable typeVariable = superDescriptor.getTypeParameter("DestinationT");
+    @SuppressWarnings("unchecked")
+    TypeDescriptor<DestinationT> descriptor =
+        (TypeDescriptor<DestinationT>) superDescriptor.resolveType(typeVariable);
+    return registry.getCoder(descriptor);
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java
index 90d41a0..55672ff 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java
@@ -23,6 +23,7 @@ import java.util.concurrent.ThreadLocalRandom;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.ShardedKey;
 
 /**
  * Given a write to a specific table, assign that to one of the

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
deleted file mode 100644
index c2b739f..0000000
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.sdk.io.gcp.bigquery;
-
-import java.io.Serializable;
-import java.util.Objects;
-
-/**
- * A key and a shard number.
- */
-class ShardedKey<K> implements Serializable {
-  private static final long serialVersionUID = 1L;
-  private final K key;
-  private final int shardNumber;
-
-  public static <K> ShardedKey<K> of(K key, int shardNumber) {
-    return new ShardedKey<>(key, shardNumber);
-  }
-
-  ShardedKey(K key, int shardNumber) {
-    this.key = key;
-    this.shardNumber = shardNumber;
-  }
-
-  public K getKey() {
-    return key;
-  }
-
-  public int getShardNumber() {
-    return shardNumber;
-  }
-
-  @Override
-  public String toString() {
-    return "key: " + key + " shard: " + shardNumber;
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (!(o instanceof ShardedKey)) {
-      return false;
-    }
-    ShardedKey<K> other = (ShardedKey<K>) o;
-    return Objects.equals(key, other.key) && Objects.equals(shardNumber, other.shardNumber);
-  }
-
-  @Override
-  public int hashCode() {
-    return Objects.hash(key, shardNumber);
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKeyCoder.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKeyCoder.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKeyCoder.java
deleted file mode 100644
index c2b62b7..0000000
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKeyCoder.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.sdk.io.gcp.bigquery;
-
-import com.google.common.annotations.VisibleForTesting;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.Arrays;
-import java.util.List;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.StructuredCoder;
-import org.apache.beam.sdk.coders.VarIntCoder;
-
-
-/**
- * A {@link Coder} for {@link ShardedKey}, using a wrapped key {@link Coder}.
- */
-@VisibleForTesting
-class ShardedKeyCoder<KeyT>
-    extends StructuredCoder<ShardedKey<KeyT>> {
-  public static <KeyT> ShardedKeyCoder<KeyT> of(Coder<KeyT> keyCoder) {
-    return new ShardedKeyCoder<>(keyCoder);
-  }
-
-  private final Coder<KeyT> keyCoder;
-  private final VarIntCoder shardNumberCoder;
-
-  protected ShardedKeyCoder(Coder<KeyT> keyCoder) {
-    this.keyCoder = keyCoder;
-    this.shardNumberCoder = VarIntCoder.of();
-  }
-
-  @Override
-  public List<? extends Coder<?>> getCoderArguments() {
-    return Arrays.asList(keyCoder);
-  }
-
-  @Override
-  public void encode(ShardedKey<KeyT> key, OutputStream outStream)
-      throws IOException {
-    keyCoder.encode(key.getKey(), outStream);
-    shardNumberCoder.encode(key.getShardNumber(), outStream);
-  }
-
-  @Override
-  public ShardedKey<KeyT> decode(InputStream inStream)
-      throws IOException {
-    return new ShardedKey<>(
-        keyCoder.decode(inStream),
-        shardNumberCoder.decode(inStream));
-  }
-
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    keyCoder.verifyDeterministic();
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
index 63e5bc1..a210858 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
@@ -33,6 +33,7 @@ import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.util.SystemDoFnInternal;
 import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.ShardedKey;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.ValueInSingleWindow;
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java
index 18b2033..fa5b3ce 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java
@@ -19,6 +19,7 @@ package org.apache.beam.sdk.io.gcp.bigquery;
 
 import com.google.api.services.bigquery.model.TableRow;
 import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ShardedKeyCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
@@ -29,6 +30,7 @@ import org.apache.beam.sdk.transforms.windowing.Window;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.ShardedKey;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.TupleTagList;
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
index cd88222..51b9375 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
@@ -26,6 +26,7 @@ import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.ShardedKey;
 
 /**
  * Fn that tags each table row with a unique id and destination table. To avoid calling

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
index d68779a..e1ed746 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
@@ -19,6 +19,7 @@
 package org.apache.beam.sdk.io.gcp.bigquery;
 
 import static com.google.common.base.Preconditions.checkNotNull;
+
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
@@ -40,6 +41,7 @@ import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.ShardedKey;
 import org.apache.beam.sdk.values.TupleTag;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteGroupedRecordsToFiles.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteGroupedRecordsToFiles.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteGroupedRecordsToFiles.java
index 45dc2a8..887cb93 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteGroupedRecordsToFiles.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteGroupedRecordsToFiles.java
@@ -22,6 +22,7 @@ import com.google.api.services.bigquery.model.TableRow;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.ShardedKey;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
index acd1132..451d1bd 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
@@ -26,6 +26,7 @@ import org.apache.beam.sdk.io.gcp.bigquery.WriteBundlesToFiles.Result;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.ShardedKey;
 import org.apache.beam.sdk.values.TupleTag;
 
 /**

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
index c5494d8..9ed2916 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
@@ -42,6 +42,7 @@ import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.ShardedKey;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index bfd260a..d31f3a0 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -82,6 +82,7 @@ import org.apache.beam.sdk.coders.Coder.Context;
 import org.apache.beam.sdk.coders.CoderException;
 import org.apache.beam.sdk.coders.IterableCoder;
 import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ShardedKeyCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.VarIntCoder;
 import org.apache.beam.sdk.io.BoundedSource;
@@ -131,6 +132,7 @@ import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollection.IsBounded;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.PCollectionViews;
+import org.apache.beam.sdk.values.ShardedKey;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.TypeDescriptor;
 import org.apache.beam.sdk.values.ValueInSingleWindow;

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/xml/src/main/java/org/apache/beam/sdk/io/xml/XmlIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/xml/src/main/java/org/apache/beam/sdk/io/xml/XmlIO.java b/sdks/java/io/xml/src/main/java/org/apache/beam/sdk/io/xml/XmlIO.java
index 7255a94..442fba5 100644
--- a/sdks/java/io/xml/src/main/java/org/apache/beam/sdk/io/xml/XmlIO.java
+++ b/sdks/java/io/xml/src/main/java/org/apache/beam/sdk/io/xml/XmlIO.java
@@ -36,6 +36,7 @@ import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.SerializableFunctions;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.values.PBegin;
 import org.apache.beam.sdk.values.PCollection;
@@ -521,7 +522,8 @@ public class XmlIO {
 
     @Override
     public PDone expand(PCollection<T> input) {
-      return input.apply(org.apache.beam.sdk.io.WriteFiles.to(createSink()));
+      return input.apply(
+          org.apache.beam.sdk.io.WriteFiles.to(createSink(), SerializableFunctions.<T>identity()));
     }
 
     @VisibleForTesting

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/xml/src/main/java/org/apache/beam/sdk/io/xml/XmlSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/xml/src/main/java/org/apache/beam/sdk/io/xml/XmlSink.java b/sdks/java/io/xml/src/main/java/org/apache/beam/sdk/io/xml/XmlSink.java
index 6ae83f2..74e0bda 100644
--- a/sdks/java/io/xml/src/main/java/org/apache/beam/sdk/io/xml/XmlSink.java
+++ b/sdks/java/io/xml/src/main/java/org/apache/beam/sdk/io/xml/XmlSink.java
@@ -25,6 +25,7 @@ import javax.xml.bind.JAXBContext;
 import javax.xml.bind.Marshaller;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.io.DefaultFilenamePolicy;
+import org.apache.beam.sdk.io.DynamicFileDestinations;
 import org.apache.beam.sdk.io.FileBasedSink;
 import org.apache.beam.sdk.io.ShardNameTemplate;
 import org.apache.beam.sdk.io.fs.ResourceId;
@@ -34,18 +35,18 @@ import org.apache.beam.sdk.util.CoderUtils;
 import org.apache.beam.sdk.util.MimeTypes;
 
 /** Implementation of {@link XmlIO#write}. */
-class XmlSink<T> extends FileBasedSink<T> {
+class XmlSink<T> extends FileBasedSink<T, Void> {
   private static final String XML_EXTENSION = ".xml";
 
   private final XmlIO.Write<T> spec;
 
-  private static DefaultFilenamePolicy makeFilenamePolicy(XmlIO.Write<?> spec) {
-    return DefaultFilenamePolicy.constructUsingStandardParameters(
+  private static <T> DefaultFilenamePolicy makeFilenamePolicy(XmlIO.Write<T> spec) {
+    return DefaultFilenamePolicy.fromStandardParameters(
         spec.getFilenamePrefix(), ShardNameTemplate.INDEX_OF_MAX, XML_EXTENSION, false);
   }
 
   XmlSink(XmlIO.Write<T> spec) {
-    super(spec.getFilenamePrefix(), makeFilenamePolicy(spec));
+    super(spec.getFilenamePrefix(), DynamicFileDestinations.constant(makeFilenamePolicy(spec)));
     this.spec = spec;
   }
 
@@ -75,10 +76,8 @@ class XmlSink<T> extends FileBasedSink<T> {
     super.populateDisplayData(builder);
   }
 
-  /**
-   * {@link WriteOperation} for XML {@link FileBasedSink}s.
-   */
-  protected static final class XmlWriteOperation<T> extends WriteOperation<T> {
+  /** {@link WriteOperation} for XML {@link FileBasedSink}s. */
+  protected static final class XmlWriteOperation<T> extends WriteOperation<T, Void> {
     public XmlWriteOperation(XmlSink<T> sink) {
       super(sink);
     }
@@ -112,10 +111,8 @@ class XmlSink<T> extends FileBasedSink<T> {
     }
   }
 
-  /**
-   * A {@link Writer} that can write objects as XML elements.
-   */
-  protected static final class XmlWriter<T> extends Writer<T> {
+  /** A {@link Writer} that can write objects as XML elements. */
+  protected static final class XmlWriter<T> extends Writer<T, Void> {
     final Marshaller marshaller;
     private OutputStream os = null;
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlSinkTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlSinkTest.java b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlSinkTest.java
index aa0c1c3..d1584dc 100644
--- a/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlSinkTest.java
+++ b/sdks/java/io/xml/src/test/java/org/apache/beam/sdk/io/xml/XmlSinkTest.java
@@ -197,8 +197,8 @@ public class XmlSinkTest {
         .withRecordClass(Integer.class);
 
     DisplayData displayData = DisplayData.from(write);
-
-    assertThat(displayData, hasDisplayItem("filenamePattern", "file-SSSSS-of-NNNNN.xml"));
+    assertThat(
+        displayData, hasDisplayItem("filenamePattern", "/path/to/file-SSSSS-of-NNNNN" + ".xml"));
     assertThat(displayData, hasDisplayItem("rootElement", "bird"));
     assertThat(displayData, hasDisplayItem("recordClass", Integer.class));
   }


[23/50] [abbrv] beam git commit: Process timer firings for a window together

Posted by ta...@apache.org.
Process timer firings for a window together


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7b4fa891
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7b4fa891
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7b4fa891

Branch: refs/heads/DSL_SQL
Commit: 7b4fa8913bdf85f85cbeb2c13b8779db921b2dec
Parents: 951f3ca
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Jun 22 18:43:39 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 .../examples/complete/game/LeaderBoardTest.java |  2 +
 .../beam/runners/core/ReduceFnRunner.java       | 98 +++++++++++++-------
 .../beam/runners/core/ReduceFnRunnerTest.java   | 49 +++++++++-
 3 files changed, 115 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/7b4fa891/examples/java8/src/test/java/org/apache/beam/examples/complete/game/LeaderBoardTest.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/test/java/org/apache/beam/examples/complete/game/LeaderBoardTest.java b/examples/java8/src/test/java/org/apache/beam/examples/complete/game/LeaderBoardTest.java
index 745c210..611e2b3 100644
--- a/examples/java8/src/test/java/org/apache/beam/examples/complete/game/LeaderBoardTest.java
+++ b/examples/java8/src/test/java/org/apache/beam/examples/complete/game/LeaderBoardTest.java
@@ -276,6 +276,8 @@ public class LeaderBoardTest implements Serializable {
         .addElements(event(TestUser.RED_ONE, 4, Duration.standardMinutes(2)),
             event(TestUser.BLUE_TWO, 3, Duration.ZERO),
             event(TestUser.BLUE_ONE, 3, Duration.standardMinutes(3)))
+        // Move the watermark to the end of the window to output on time
+        .advanceWatermarkTo(baseTime.plus(TEAM_WINDOW_DURATION))
         // Move the watermark past the end of the allowed lateness plus the end of the window
         .advanceWatermarkTo(baseTime.plus(ALLOWED_LATENESS)
             .plus(TEAM_WINDOW_DURATION).plus(Duration.standardMinutes(1)))

http://git-wip-us.apache.org/repos/asf/beam/blob/7b4fa891/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java
index 0632c05..634a2d1 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java
@@ -29,7 +29,6 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
-import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -638,11 +637,9 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
   }
 
   /**
-   * Enriches TimerData with state necessary for processing a timer as well as
-   * common queries about a timer.
+   * A descriptor of the activation for a window based on a timer.
    */
-  private class EnrichedTimerData {
-    public final Instant timestamp;
+  private class WindowActivation {
     public final ReduceFn<K, InputT, OutputT, W>.Context directContext;
     public final ReduceFn<K, InputT, OutputT, W>.Context renamedContext;
     // If this is an end-of-window timer then we may need to set a garbage collection timer
@@ -653,19 +650,34 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
     // end-of-window time to be a signal to garbage collect.
     public final boolean isGarbageCollection;
 
-    EnrichedTimerData(
-        TimerData timer,
+    WindowActivation(
         ReduceFn<K, InputT, OutputT, W>.Context directContext,
         ReduceFn<K, InputT, OutputT, W>.Context renamedContext) {
-      this.timestamp = timer.getTimestamp();
       this.directContext = directContext;
       this.renamedContext = renamedContext;
       W window = directContext.window();
-      this.isEndOfWindow = TimeDomain.EVENT_TIME == timer.getDomain()
-          && timer.getTimestamp().equals(window.maxTimestamp());
-      Instant cleanupTime = LateDataUtils.garbageCollectionTime(window, windowingStrategy);
+
+      // The output watermark is before the end of the window if it is either unknown
+      // or it is known to be before it. If it is unknown, that means that there hasn't been
+      // enough data to advance it.
+      boolean outputWatermarkBeforeEOW =
+              timerInternals.currentOutputWatermarkTime() == null
+          || !timerInternals.currentOutputWatermarkTime().isAfter(window.maxTimestamp());
+
+      // The "end of the window" is reached when the local input watermark (for this key) surpasses
+      // it but the local output watermark (also for this key) has not. After data is emitted and
+      // the output watermark hold is released, the output watermark on this key will immediately
+      // exceed the end of the window (otherwise we could see multiple ON_TIME outputs)
+      this.isEndOfWindow =
+          timerInternals.currentInputWatermarkTime().isAfter(window.maxTimestamp())
+              && outputWatermarkBeforeEOW;
+
+      // The "GC time" is reached when the input watermark surpasses the end of the window
+      // plus allowed lateness. After this, the window is expired and expunged.
       this.isGarbageCollection =
-          TimeDomain.EVENT_TIME == timer.getDomain() && !timer.getTimestamp().isBefore(cleanupTime);
+          timerInternals
+              .currentInputWatermarkTime()
+              .isAfter(LateDataUtils.garbageCollectionTime(window, windowingStrategy));
     }
 
     // Has this window had its trigger finish?
@@ -684,9 +696,10 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
       return;
     }
 
-    // Create a reusable context for each timer and begin prefetching necessary
+    // Create a reusable context for each window and begin prefetching necessary
     // state.
-    List<EnrichedTimerData> enrichedTimers = new LinkedList();
+    Map<BoundedWindow, WindowActivation> windowActivations = new HashMap();
+
     for (TimerData timer : timers) {
       checkArgument(timer.getNamespace() instanceof WindowNamespace,
           "Expected timer to be in WindowNamespace, but was in %s", timer.getNamespace());
@@ -694,7 +707,24 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
         WindowNamespace<W> windowNamespace = (WindowNamespace<W>) timer.getNamespace();
       W window = windowNamespace.getWindow();
 
-      if (TimeDomain.PROCESSING_TIME == timer.getDomain() && windowIsExpired(window)) {
+      WindowTracing.debug("{}: Received timer key:{}; window:{}; data:{} with "
+              + "inputWatermark:{}; outputWatermark:{}",
+          ReduceFnRunner.class.getSimpleName(),
+          key, window, timer,
+          timerInternals.currentInputWatermarkTime(),
+          timerInternals.currentOutputWatermarkTime());
+
+      // Processing time timers for an expired window are ignored, just like elements
+      // that show up too late. Window GC is management by an event time timer
+      if (TimeDomain.EVENT_TIME != timer.getDomain() && windowIsExpired(window)) {
+        continue;
+      }
+
+      // How a window is processed is a function only of the current state, not the details
+      // of the timer. This makes us robust to large leaps in processing time and watermark
+      // time, where both EOW and GC timers come in together and we need to GC and emit
+      // the final pane.
+      if (windowActivations.containsKey(window)) {
         continue;
       }
 
@@ -702,11 +732,11 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
           contextFactory.base(window, StateStyle.DIRECT);
       ReduceFn<K, InputT, OutputT, W>.Context renamedContext =
           contextFactory.base(window, StateStyle.RENAMED);
-      EnrichedTimerData enrichedTimer = new EnrichedTimerData(timer, directContext, renamedContext);
-      enrichedTimers.add(enrichedTimer);
+      WindowActivation windowActivation = new WindowActivation(directContext, renamedContext);
+      windowActivations.put(window, windowActivation);
 
       // Perform prefetching of state to determine if the trigger should fire.
-      if (enrichedTimer.isGarbageCollection) {
+      if (windowActivation.isGarbageCollection) {
         triggerRunner.prefetchIsClosed(directContext.state());
       } else {
         triggerRunner.prefetchShouldFire(directContext.window(), directContext.state());
@@ -714,7 +744,7 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
     }
 
     // For those windows that are active and open, prefetch the triggering or emitting state.
-    for (EnrichedTimerData timer : enrichedTimers) {
+    for (WindowActivation timer : windowActivations.values()) {
       if (timer.windowIsActiveAndOpen()) {
         ReduceFn<K, InputT, OutputT, W>.Context directContext = timer.directContext;
         if (timer.isGarbageCollection) {
@@ -727,25 +757,27 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
     }
 
     // Perform processing now that everything is prefetched.
-    for (EnrichedTimerData timer : enrichedTimers) {
-      ReduceFn<K, InputT, OutputT, W>.Context directContext = timer.directContext;
-      ReduceFn<K, InputT, OutputT, W>.Context renamedContext = timer.renamedContext;
+    for (WindowActivation windowActivation : windowActivations.values()) {
+      ReduceFn<K, InputT, OutputT, W>.Context directContext = windowActivation.directContext;
+      ReduceFn<K, InputT, OutputT, W>.Context renamedContext = windowActivation.renamedContext;
 
-      if (timer.isGarbageCollection) {
-        WindowTracing.debug("ReduceFnRunner.onTimer: Cleaning up for key:{}; window:{} at {} with "
-                + "inputWatermark:{}; outputWatermark:{}",
-            key, directContext.window(), timer.timestamp,
+      if (windowActivation.isGarbageCollection) {
+        WindowTracing.debug(
+            "{}: Cleaning up for key:{}; window:{} with inputWatermark:{}; outputWatermark:{}",
+            ReduceFnRunner.class.getSimpleName(),
+            key,
+            directContext.window(),
             timerInternals.currentInputWatermarkTime(),
             timerInternals.currentOutputWatermarkTime());
 
-        boolean windowIsActiveAndOpen = timer.windowIsActiveAndOpen();
+        boolean windowIsActiveAndOpen = windowActivation.windowIsActiveAndOpen();
         if (windowIsActiveAndOpen) {
           // We need to call onTrigger to emit the final pane if required.
           // The final pane *may* be ON_TIME if no prior ON_TIME pane has been emitted,
           // and the watermark has passed the end of the window.
           @Nullable
           Instant newHold = onTrigger(
-              directContext, renamedContext, true /* isFinished */, timer.isEndOfWindow);
+              directContext, renamedContext, true /* isFinished */, windowActivation.isEndOfWindow);
           checkState(newHold == null, "Hold placed at %s despite isFinished being true.", newHold);
         }
 
@@ -753,18 +785,20 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
         // see elements for it again.
         clearAllState(directContext, renamedContext, windowIsActiveAndOpen);
       } else {
-        WindowTracing.debug("ReduceFnRunner.onTimer: Triggering for key:{}; window:{} at {} with "
+        WindowTracing.debug(
+            "{}.onTimers: Triggering for key:{}; window:{} at {} with "
                 + "inputWatermark:{}; outputWatermark:{}",
-            key, directContext.window(), timer.timestamp,
+            key,
+            directContext.window(),
             timerInternals.currentInputWatermarkTime(),
             timerInternals.currentOutputWatermarkTime());
-        if (timer.windowIsActiveAndOpen()
+        if (windowActivation.windowIsActiveAndOpen()
             && triggerRunner.shouldFire(
                    directContext.window(), directContext.timers(), directContext.state())) {
           emit(directContext, renamedContext);
         }
 
-        if (timer.isEndOfWindow) {
+        if (windowActivation.isEndOfWindow) {
           // If the window strategy trigger includes a watermark trigger then at this point
           // there should be no data holds, either because we'd already cleared them on an
           // earlier onTrigger, or because we just cleared them on the above emit.

http://git-wip-us.apache.org/repos/asf/beam/blob/7b4fa891/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnRunnerTest.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnRunnerTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnRunnerTest.java
index 79ee91b..4f13af1 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnRunnerTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnRunnerTest.java
@@ -55,6 +55,7 @@ import org.apache.beam.sdk.transforms.windowing.AfterPane;
 import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime;
 import org.apache.beam.sdk.transforms.windowing.AfterWatermark;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.DefaultTrigger;
 import org.apache.beam.sdk.transforms.windowing.FixedWindows;
 import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
 import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
@@ -79,7 +80,6 @@ import org.apache.beam.sdk.values.WindowingStrategy.AccumulationMode;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Before;
-import org.junit.Ignore;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
@@ -246,6 +246,52 @@ public class ReduceFnRunnerTest {
     tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow);
   }
 
+  /**
+   * Tests that with the default trigger we will not produce two ON_TIME panes, even
+   * if there are two outputs that are both candidates.
+   */
+  @Test
+  public void testOnlyOneOnTimePane() throws Exception {
+    WindowingStrategy<?, IntervalWindow> strategy =
+        WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(10)))
+            .withTrigger(DefaultTrigger.of())
+            .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
+            .withAllowedLateness(Duration.millis(100));
+
+    ReduceFnTester<Integer, Integer, IntervalWindow> tester =
+        ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of());
+
+    tester.advanceInputWatermark(new Instant(0));
+
+    int value1 = 1;
+    int value2 = 3;
+
+    // A single element that should be in the ON_TIME output
+    tester.injectElements(
+        TimestampedValue.of(value1, new Instant(1)));
+
+    // Should fire ON_TIME
+    tester.advanceInputWatermark(new Instant(10));
+
+    // The DefaultTrigger should cause output labeled LATE, even though it does not have to be
+    // labeled as such.
+    tester.injectElements(
+        TimestampedValue.of(value2, new Instant(3)));
+
+    List<WindowedValue<Integer>> output = tester.extractOutput();
+    assertEquals(2, output.size());
+
+    assertThat(output.get(0), WindowMatchers.isWindowedValue(equalTo(value1)));
+    assertThat(output.get(1), WindowMatchers.isWindowedValue(equalTo(value1 + value2)));
+
+    assertThat(
+        output.get(0),
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(true, false, Timing.ON_TIME, 0, 0)));
+    assertThat(
+        output.get(1),
+        WindowMatchers.valueWithPaneInfo(PaneInfo.createPane(false, false, Timing.LATE, 1, 1)));
+  }
+
   @Test
   public void testOnElementCombiningDiscarding() throws Exception {
     // Test basic execution of a trigger using a non-combining window set and discarding mode.
@@ -458,7 +504,6 @@ public class ReduceFnRunnerTest {
    * marked as final.
    */
   @Test
-  @Ignore("https://issues.apache.org/jira/browse/BEAM-2505")
   public void testCombiningAccumulatingEventTime() throws Exception {
     WindowingStrategy<?, IntervalWindow> strategy =
         WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100)))


[04/50] [abbrv] beam git commit: [BEAM-2553] Update Maven exec plugin to 1.6.0 to incorporate messaging improvements

Posted by ta...@apache.org.
[BEAM-2553] Update Maven exec plugin to 1.6.0 to incorporate messaging improvements


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/c73e69af
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/c73e69af
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/c73e69af

Branch: refs/heads/DSL_SQL
Commit: c73e69af7fdaf4d74be990e56df0ef69b84ac7b5
Parents: f2c337c
Author: Luke Cwik <lc...@google.com>
Authored: Wed Jul 5 10:38:44 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:59 2017 -0700

----------------------------------------------------------------------
 pom.xml                                                            | 2 +-
 .../starter/src/test/resources/projects/basic/reference/pom.xml    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/c73e69af/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index f5d4815..fd01781 100644
--- a/pom.xml
+++ b/pom.xml
@@ -159,7 +159,7 @@
     <failsafe-plugin.version>2.20</failsafe-plugin.version>
     <maven-compiler-plugin.version>3.6.1</maven-compiler-plugin.version>
     <maven-dependency-plugin.version>3.0.1</maven-dependency-plugin.version>
-    <maven-exec-plugin.version>1.4.0</maven-exec-plugin.version>
+    <maven-exec-plugin.version>1.6.0</maven-exec-plugin.version>
     <maven-jar-plugin.version>3.0.2</maven-jar-plugin.version>
     <maven-resources-plugin.version>3.0.2</maven-resources-plugin.version>
     <maven-shade-plugin.version>3.0.0</maven-shade-plugin.version>

http://git-wip-us.apache.org/repos/asf/beam/blob/c73e69af/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
index 60405e6..6056fb0 100644
--- a/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
+++ b/sdks/java/maven-archetypes/starter/src/test/resources/projects/basic/reference/pom.xml
@@ -28,7 +28,7 @@
     <beam.version>@project.version@</beam.version>
 
     <maven-compiler-plugin.version>3.6.1</maven-compiler-plugin.version>
-    <maven-exec-plugin.version>1.4.0</maven-exec-plugin.version>
+    <maven-exec-plugin.version>1.6.0</maven-exec-plugin.version>
     <slf4j.version>1.7.14</slf4j.version>
   </properties>
 


[48/50] [abbrv] beam git commit: Reformatting Kinesis IO to comply with official code style

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClient.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClient.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClient.java
index 3e3984a..80c950f 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClient.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClient.java
@@ -17,7 +17,6 @@
  */
 package org.apache.beam.sdk.io.kinesis;
 
-
 import com.amazonaws.AmazonServiceException;
 import com.amazonaws.services.kinesis.AmazonKinesis;
 import com.amazonaws.services.kinesis.clientlibrary.types.UserRecord;
@@ -31,9 +30,11 @@ import com.amazonaws.services.kinesis.model.Shard;
 import com.amazonaws.services.kinesis.model.ShardIteratorType;
 import com.amazonaws.services.kinesis.model.StreamDescription;
 import com.google.common.collect.Lists;
+
 import java.util.Date;
 import java.util.List;
 import java.util.concurrent.Callable;
+
 import org.joda.time.Instant;
 
 /**
@@ -41,117 +42,121 @@ import org.joda.time.Instant;
  * proper error handling.
  */
 class SimplifiedKinesisClient {
-    private final AmazonKinesis kinesis;
 
-    public SimplifiedKinesisClient(AmazonKinesis kinesis) {
-        this.kinesis = kinesis;
-    }
+  private final AmazonKinesis kinesis;
 
-    public static SimplifiedKinesisClient from(KinesisClientProvider provider) {
-        return new SimplifiedKinesisClient(provider.get());
-    }
+  public SimplifiedKinesisClient(AmazonKinesis kinesis) {
+    this.kinesis = kinesis;
+  }
 
-    public String getShardIterator(final String streamName, final String shardId,
-                                   final ShardIteratorType shardIteratorType,
-                                   final String startingSequenceNumber, final Instant timestamp)
-            throws TransientKinesisException {
-        final Date date = timestamp != null ? timestamp.toDate() : null;
-        return wrapExceptions(new Callable<String>() {
-            @Override
-            public String call() throws Exception {
-                return kinesis.getShardIterator(new GetShardIteratorRequest()
-                        .withStreamName(streamName)
-                        .withShardId(shardId)
-                        .withShardIteratorType(shardIteratorType)
-                        .withStartingSequenceNumber(startingSequenceNumber)
-                        .withTimestamp(date)
-                ).getShardIterator();
-            }
-        });
-    }
+  public static SimplifiedKinesisClient from(KinesisClientProvider provider) {
+    return new SimplifiedKinesisClient(provider.get());
+  }
 
-    public List<Shard> listShards(final String streamName) throws TransientKinesisException {
-        return wrapExceptions(new Callable<List<Shard>>() {
-            @Override
-            public List<Shard> call() throws Exception {
-                List<Shard> shards = Lists.newArrayList();
-                String lastShardId = null;
-
-                StreamDescription description;
-                do {
-                    description = kinesis.describeStream(streamName, lastShardId)
-                            .getStreamDescription();
-
-                    shards.addAll(description.getShards());
-                    lastShardId = shards.get(shards.size() - 1).getShardId();
-                } while (description.getHasMoreShards());
-
-                return shards;
-            }
-        });
-    }
+  public String getShardIterator(final String streamName, final String shardId,
+      final ShardIteratorType shardIteratorType,
+      final String startingSequenceNumber, final Instant timestamp)
+      throws TransientKinesisException {
+    final Date date = timestamp != null ? timestamp.toDate() : null;
+    return wrapExceptions(new Callable<String>() {
 
-    /**
-     * Gets records from Kinesis and deaggregates them if needed.
-     *
-     * @return list of deaggregated records
-     * @throws TransientKinesisException - in case of recoverable situation
-     */
-    public GetKinesisRecordsResult getRecords(String shardIterator, String streamName,
-                                              String shardId) throws TransientKinesisException {
-        return getRecords(shardIterator, streamName, shardId, null);
-    }
+      @Override
+      public String call() throws Exception {
+        return kinesis.getShardIterator(new GetShardIteratorRequest()
+            .withStreamName(streamName)
+            .withShardId(shardId)
+            .withShardIteratorType(shardIteratorType)
+            .withStartingSequenceNumber(startingSequenceNumber)
+            .withTimestamp(date)
+        ).getShardIterator();
+      }
+    });
+  }
 
-    /**
-     * Gets records from Kinesis and deaggregates them if needed.
-     *
-     * @return list of deaggregated records
-     * @throws TransientKinesisException - in case of recoverable situation
-     */
-    public GetKinesisRecordsResult getRecords(final String shardIterator, final String streamName,
-                                              final String shardId, final Integer limit)
-            throws
-            TransientKinesisException {
-        return wrapExceptions(new Callable<GetKinesisRecordsResult>() {
-            @Override
-            public GetKinesisRecordsResult call() throws Exception {
-                GetRecordsResult response = kinesis.getRecords(new GetRecordsRequest()
-                        .withShardIterator(shardIterator)
-                        .withLimit(limit));
-                return new GetKinesisRecordsResult(
-                        UserRecord.deaggregate(response.getRecords()),
-                        response.getNextShardIterator(),
-                        streamName, shardId);
-            }
-        });
-    }
+  public List<Shard> listShards(final String streamName) throws TransientKinesisException {
+    return wrapExceptions(new Callable<List<Shard>>() {
+
+      @Override
+      public List<Shard> call() throws Exception {
+        List<Shard> shards = Lists.newArrayList();
+        String lastShardId = null;
+
+        StreamDescription description;
+        do {
+          description = kinesis.describeStream(streamName, lastShardId)
+              .getStreamDescription();
+
+          shards.addAll(description.getShards());
+          lastShardId = shards.get(shards.size() - 1).getShardId();
+        } while (description.getHasMoreShards());
+
+        return shards;
+      }
+    });
+  }
+
+  /**
+   * Gets records from Kinesis and deaggregates them if needed.
+   *
+   * @return list of deaggregated records
+   * @throws TransientKinesisException - in case of recoverable situation
+   */
+  public GetKinesisRecordsResult getRecords(String shardIterator, String streamName,
+      String shardId) throws TransientKinesisException {
+    return getRecords(shardIterator, streamName, shardId, null);
+  }
+
+  /**
+   * Gets records from Kinesis and deaggregates them if needed.
+   *
+   * @return list of deaggregated records
+   * @throws TransientKinesisException - in case of recoverable situation
+   */
+  public GetKinesisRecordsResult getRecords(final String shardIterator, final String streamName,
+      final String shardId, final Integer limit)
+      throws
+      TransientKinesisException {
+    return wrapExceptions(new Callable<GetKinesisRecordsResult>() {
+
+      @Override
+      public GetKinesisRecordsResult call() throws Exception {
+        GetRecordsResult response = kinesis.getRecords(new GetRecordsRequest()
+            .withShardIterator(shardIterator)
+            .withLimit(limit));
+        return new GetKinesisRecordsResult(
+            UserRecord.deaggregate(response.getRecords()),
+            response.getNextShardIterator(),
+            streamName, shardId);
+      }
+    });
+  }
 
-    /**
-     * Wraps Amazon specific exceptions into more friendly format.
-     *
-     * @throws TransientKinesisException              - in case of recoverable situation, i.e.
-     *                                  the request rate is too high, Kinesis remote service
-     *                                  failed, network issue, etc.
-     * @throws ExpiredIteratorException - if iterator needs to be refreshed
-     * @throws RuntimeException         - in all other cases
-     */
-    private <T> T wrapExceptions(Callable<T> callable) throws TransientKinesisException {
-        try {
-            return callable.call();
-        } catch (ExpiredIteratorException e) {
-            throw e;
-        } catch (LimitExceededException | ProvisionedThroughputExceededException e) {
-            throw new TransientKinesisException(
-                    "Too many requests to Kinesis. Wait some time and retry.", e);
-        } catch (AmazonServiceException e) {
-            if (e.getErrorType() == AmazonServiceException.ErrorType.Service) {
-                throw new TransientKinesisException(
-                        "Kinesis backend failed. Wait some time and retry.", e);
-            }
-            throw new RuntimeException("Kinesis client side failure", e);
-        } catch (Exception e) {
-            throw new RuntimeException("Unknown kinesis failure, when trying to reach kinesis", e);
-        }
+  /**
+   * Wraps Amazon specific exceptions into more friendly format.
+   *
+   * @throws TransientKinesisException              - in case of recoverable situation, i.e.
+   *                                  the request rate is too high, Kinesis remote service
+   *                                  failed, network issue, etc.
+   * @throws ExpiredIteratorException - if iterator needs to be refreshed
+   * @throws RuntimeException         - in all other cases
+   */
+  private <T> T wrapExceptions(Callable<T> callable) throws TransientKinesisException {
+    try {
+      return callable.call();
+    } catch (ExpiredIteratorException e) {
+      throw e;
+    } catch (LimitExceededException | ProvisionedThroughputExceededException e) {
+      throw new TransientKinesisException(
+          "Too many requests to Kinesis. Wait some time and retry.", e);
+    } catch (AmazonServiceException e) {
+      if (e.getErrorType() == AmazonServiceException.ErrorType.Service) {
+        throw new TransientKinesisException(
+            "Kinesis backend failed. Wait some time and retry.", e);
+      }
+      throw new RuntimeException("Kinesis client side failure", e);
+    } catch (Exception e) {
+      throw new RuntimeException("Unknown kinesis failure, when trying to reach kinesis", e);
     }
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StartingPoint.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StartingPoint.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StartingPoint.java
index d8842c4..f9298fa 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StartingPoint.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StartingPoint.java
@@ -17,13 +17,14 @@
  */
 package org.apache.beam.sdk.io.kinesis;
 
-
 import static com.google.common.base.Preconditions.checkNotNull;
 
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
 import com.amazonaws.services.kinesis.model.ShardIteratorType;
+
 import java.io.Serializable;
 import java.util.Objects;
+
 import org.joda.time.Instant;
 
 /**
@@ -32,54 +33,55 @@ import org.joda.time.Instant;
  * in which case the reader will start reading at the specified point in time.
  */
 class StartingPoint implements Serializable {
-    private final InitialPositionInStream position;
-    private final Instant timestamp;
 
-    public StartingPoint(InitialPositionInStream position) {
-        this.position = checkNotNull(position, "position");
-        this.timestamp = null;
-    }
+  private final InitialPositionInStream position;
+  private final Instant timestamp;
 
-    public StartingPoint(Instant timestamp) {
-        this.timestamp = checkNotNull(timestamp, "timestamp");
-        this.position = null;
-    }
+  public StartingPoint(InitialPositionInStream position) {
+    this.position = checkNotNull(position, "position");
+    this.timestamp = null;
+  }
 
-    public InitialPositionInStream getPosition() {
-        return position;
-    }
+  public StartingPoint(Instant timestamp) {
+    this.timestamp = checkNotNull(timestamp, "timestamp");
+    this.position = null;
+  }
 
-    public String getPositionName() {
-        return position != null ? position.name() : ShardIteratorType.AT_TIMESTAMP.name();
-    }
+  public InitialPositionInStream getPosition() {
+    return position;
+  }
 
-    public Instant getTimestamp() {
-        return timestamp != null ? timestamp : null;
-    }
+  public String getPositionName() {
+    return position != null ? position.name() : ShardIteratorType.AT_TIMESTAMP.name();
+  }
 
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) {
-            return true;
-        }
-        if (o == null || getClass() != o.getClass()) {
-            return false;
-        }
-        StartingPoint that = (StartingPoint) o;
-        return position == that.position && Objects.equals(timestamp, that.timestamp);
-    }
+  public Instant getTimestamp() {
+    return timestamp != null ? timestamp : null;
+  }
 
-    @Override
-    public int hashCode() {
-        return Objects.hash(position, timestamp);
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
     }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    StartingPoint that = (StartingPoint) o;
+    return position == that.position && Objects.equals(timestamp, that.timestamp);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(position, timestamp);
+  }
 
-    @Override
-    public String toString() {
-        if (timestamp == null) {
-            return position.toString();
-        } else {
-            return "Starting at timestamp " + timestamp;
-        }
+  @Override
+  public String toString() {
+    if (timestamp == null) {
+      return position.toString();
+    } else {
+      return "Starting at timestamp " + timestamp;
     }
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StaticCheckpointGenerator.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StaticCheckpointGenerator.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StaticCheckpointGenerator.java
index 22dc973..1ec865d 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StaticCheckpointGenerator.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/StaticCheckpointGenerator.java
@@ -23,20 +23,21 @@ import static com.google.common.base.Preconditions.checkNotNull;
  * Always returns the same instance of checkpoint.
  */
 class StaticCheckpointGenerator implements CheckpointGenerator {
-    private final KinesisReaderCheckpoint checkpoint;
 
-    public StaticCheckpointGenerator(KinesisReaderCheckpoint checkpoint) {
-        checkNotNull(checkpoint, "checkpoint");
-        this.checkpoint = checkpoint;
-    }
+  private final KinesisReaderCheckpoint checkpoint;
 
-    @Override
-    public KinesisReaderCheckpoint generate(SimplifiedKinesisClient client) {
-        return checkpoint;
-    }
+  public StaticCheckpointGenerator(KinesisReaderCheckpoint checkpoint) {
+    checkNotNull(checkpoint, "checkpoint");
+    this.checkpoint = checkpoint;
+  }
 
-    @Override
-    public String toString() {
-        return checkpoint.toString();
-    }
+  @Override
+  public KinesisReaderCheckpoint generate(SimplifiedKinesisClient client) {
+    return checkpoint;
+  }
+
+  @Override
+  public String toString() {
+    return checkpoint.toString();
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/TransientKinesisException.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/TransientKinesisException.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/TransientKinesisException.java
index 57ad8a8..68ca0d7 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/TransientKinesisException.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/TransientKinesisException.java
@@ -23,7 +23,8 @@ import com.amazonaws.AmazonServiceException;
  * A transient exception thrown by Kinesis.
  */
 class TransientKinesisException extends Exception {
-    public TransientKinesisException(String s, AmazonServiceException e) {
-        super(s, e);
-    }
+
+  public TransientKinesisException(String s, AmazonServiceException e) {
+    super(s, e);
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/AmazonKinesisMock.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/AmazonKinesisMock.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/AmazonKinesisMock.java
index 046c9d9..994d6e3 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/AmazonKinesisMock.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/AmazonKinesisMock.java
@@ -66,10 +66,12 @@ import com.amazonaws.services.kinesis.model.SplitShardRequest;
 import com.amazonaws.services.kinesis.model.SplitShardResult;
 import com.amazonaws.services.kinesis.model.StreamDescription;
 import com.google.common.base.Function;
+
 import java.io.Serializable;
 import java.nio.ByteBuffer;
 import java.util.List;
 import javax.annotation.Nullable;
+
 import org.apache.commons.lang.builder.EqualsBuilder;
 import org.joda.time.Instant;
 
@@ -78,298 +80,301 @@ import org.joda.time.Instant;
  */
 class AmazonKinesisMock implements AmazonKinesis {
 
-    static class TestData implements Serializable {
-        private final String data;
-        private final Instant arrivalTimestamp;
-        private final String sequenceNumber;
-
-        public TestData(KinesisRecord record) {
-            this(new String(record.getData().array()),
-                    record.getApproximateArrivalTimestamp(),
-                    record.getSequenceNumber());
-        }
-
-        public TestData(String data, Instant arrivalTimestamp, String sequenceNumber) {
-            this.data = data;
-            this.arrivalTimestamp = arrivalTimestamp;
-            this.sequenceNumber = sequenceNumber;
-        }
-
-        public Record convertToRecord() {
-            return new Record().
-                    withApproximateArrivalTimestamp(arrivalTimestamp.toDate()).
-                    withData(ByteBuffer.wrap(data.getBytes())).
-                    withSequenceNumber(sequenceNumber).
-                    withPartitionKey("");
-        }
-
-        @Override
-        public boolean equals(Object obj) {
-            return EqualsBuilder.reflectionEquals(this, obj);
-        }
-
-        @Override
-        public int hashCode() {
-            return reflectionHashCode(this);
-        }
-    }
-
-    static class Provider implements KinesisClientProvider {
-
-        private final List<List<TestData>> shardedData;
-        private final int numberOfRecordsPerGet;
-
-        public Provider(List<List<TestData>> shardedData, int numberOfRecordsPerGet) {
-            this.shardedData = shardedData;
-            this.numberOfRecordsPerGet = numberOfRecordsPerGet;
-        }
-
-        @Override
-        public AmazonKinesis get() {
-            return new AmazonKinesisMock(transform(shardedData,
-                    new Function<List<TestData>, List<Record>>() {
-                        @Override
-                        public List<Record> apply(@Nullable List<TestData> testDatas) {
-                            return transform(testDatas, new Function<TestData, Record>() {
-                                @Override
-                                public Record apply(@Nullable TestData testData) {
-                                    return testData.convertToRecord();
-                                }
-                            });
-                        }
-                    }), numberOfRecordsPerGet);
-        }
-    }
-
-    private final List<List<Record>> shardedData;
-    private final int numberOfRecordsPerGet;
-
-    public AmazonKinesisMock(List<List<Record>> shardedData, int numberOfRecordsPerGet) {
-        this.shardedData = shardedData;
-        this.numberOfRecordsPerGet = numberOfRecordsPerGet;
-    }
-
-    @Override
-    public GetRecordsResult getRecords(GetRecordsRequest getRecordsRequest) {
-        String[] shardIteratorParts = getRecordsRequest.getShardIterator().split(":");
-        int shardId = parseInt(shardIteratorParts[0]);
-        int startingRecord = parseInt(shardIteratorParts[1]);
-        List<Record> shardData = shardedData.get(shardId);
-
-        int toIndex = min(startingRecord + numberOfRecordsPerGet, shardData.size());
-        int fromIndex = min(startingRecord, toIndex);
-        return new GetRecordsResult().
-                withRecords(shardData.subList(fromIndex, toIndex)).
-                withNextShardIterator(String.format("%s:%s", shardId, toIndex));
-    }
-
-    @Override
-    public GetShardIteratorResult getShardIterator(
-            GetShardIteratorRequest getShardIteratorRequest) {
-        ShardIteratorType shardIteratorType = ShardIteratorType.fromValue(
-                getShardIteratorRequest.getShardIteratorType());
-
-        String shardIterator;
-        if (shardIteratorType == ShardIteratorType.TRIM_HORIZON) {
-            shardIterator = String.format("%s:%s", getShardIteratorRequest.getShardId(), 0);
-        } else {
-            throw new RuntimeException("Not implemented");
-        }
-
-        return new GetShardIteratorResult().withShardIterator(shardIterator);
-    }
-
-    @Override
-    public DescribeStreamResult describeStream(String streamName, String exclusiveStartShardId) {
-        int nextShardId = 0;
-        if (exclusiveStartShardId != null) {
-            nextShardId = parseInt(exclusiveStartShardId) + 1;
-        }
-        boolean hasMoreShards = nextShardId + 1 < shardedData.size();
-
-        List<Shard> shards = newArrayList();
-        if (nextShardId < shardedData.size()) {
-            shards.add(new Shard().withShardId(Integer.toString(nextShardId)));
-        }
-
-        return new DescribeStreamResult().withStreamDescription(
-                new StreamDescription().withHasMoreShards(hasMoreShards).withShards(shards)
-        );
-    }
-
-    @Override
-    public void setEndpoint(String endpoint) {
-
-    }
-
-    @Override
-    public void setRegion(Region region) {
-
-    }
-
-    @Override
-    public AddTagsToStreamResult addTagsToStream(AddTagsToStreamRequest addTagsToStreamRequest) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public CreateStreamResult createStream(CreateStreamRequest createStreamRequest) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public CreateStreamResult createStream(String streamName, Integer shardCount) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public DecreaseStreamRetentionPeriodResult decreaseStreamRetentionPeriod(
-            DecreaseStreamRetentionPeriodRequest decreaseStreamRetentionPeriodRequest) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public DeleteStreamResult deleteStream(DeleteStreamRequest deleteStreamRequest) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public DeleteStreamResult deleteStream(String streamName) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public DescribeStreamResult describeStream(DescribeStreamRequest describeStreamRequest) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public DescribeStreamResult describeStream(String streamName) {
-
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public DescribeStreamResult describeStream(String streamName,
-                                               Integer limit, String exclusiveStartShardId) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public DisableEnhancedMonitoringResult disableEnhancedMonitoring(
-            DisableEnhancedMonitoringRequest disableEnhancedMonitoringRequest) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public EnableEnhancedMonitoringResult enableEnhancedMonitoring(
-            EnableEnhancedMonitoringRequest enableEnhancedMonitoringRequest) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public GetShardIteratorResult getShardIterator(String streamName,
-                                                   String shardId,
-                                                   String shardIteratorType) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public GetShardIteratorResult getShardIterator(String streamName,
-                                                   String shardId,
-                                                   String shardIteratorType,
-                                                   String startingSequenceNumber) {
-        throw new RuntimeException("Not implemented");
-    }
-
-    @Override
-    public IncreaseStreamRetentionPeriodResult increaseStreamRetentionPeriod(
-            IncreaseStreamRetentionPeriodRequest increaseStreamRetentionPeriodRequest) {
-        throw new RuntimeException("Not implemented");
-    }
+  static class TestData implements Serializable {
 
-    @Override
-    public ListStreamsResult listStreams(ListStreamsRequest listStreamsRequest) {
-        throw new RuntimeException("Not implemented");
-    }
+    private final String data;
+    private final Instant arrivalTimestamp;
+    private final String sequenceNumber;
 
-    @Override
-    public ListStreamsResult listStreams() {
-        throw new RuntimeException("Not implemented");
+    public TestData(KinesisRecord record) {
+      this(new String(record.getData().array()),
+          record.getApproximateArrivalTimestamp(),
+          record.getSequenceNumber());
     }
 
-    @Override
-    public ListStreamsResult listStreams(String exclusiveStartStreamName) {
-        throw new RuntimeException("Not implemented");
+    public TestData(String data, Instant arrivalTimestamp, String sequenceNumber) {
+      this.data = data;
+      this.arrivalTimestamp = arrivalTimestamp;
+      this.sequenceNumber = sequenceNumber;
     }
 
-    @Override
-    public ListStreamsResult listStreams(Integer limit, String exclusiveStartStreamName) {
-        throw new RuntimeException("Not implemented");
+    public Record convertToRecord() {
+      return new Record().
+          withApproximateArrivalTimestamp(arrivalTimestamp.toDate()).
+          withData(ByteBuffer.wrap(data.getBytes())).
+          withSequenceNumber(sequenceNumber).
+          withPartitionKey("");
     }
 
     @Override
-    public ListTagsForStreamResult listTagsForStream(
-            ListTagsForStreamRequest listTagsForStreamRequest) {
-        throw new RuntimeException("Not implemented");
+    public boolean equals(Object obj) {
+      return EqualsBuilder.reflectionEquals(this, obj);
     }
 
     @Override
-    public MergeShardsResult mergeShards(MergeShardsRequest mergeShardsRequest) {
-        throw new RuntimeException("Not implemented");
+    public int hashCode() {
+      return reflectionHashCode(this);
     }
+  }
 
-    @Override
-    public MergeShardsResult mergeShards(String streamName,
-                                         String shardToMerge, String adjacentShardToMerge) {
-        throw new RuntimeException("Not implemented");
-    }
+  static class Provider implements KinesisClientProvider {
 
-    @Override
-    public PutRecordResult putRecord(PutRecordRequest putRecordRequest) {
-        throw new RuntimeException("Not implemented");
-    }
+    private final List<List<TestData>> shardedData;
+    private final int numberOfRecordsPerGet;
 
-    @Override
-    public PutRecordResult putRecord(String streamName, ByteBuffer data, String partitionKey) {
-        throw new RuntimeException("Not implemented");
+    public Provider(List<List<TestData>> shardedData, int numberOfRecordsPerGet) {
+      this.shardedData = shardedData;
+      this.numberOfRecordsPerGet = numberOfRecordsPerGet;
     }
 
     @Override
-    public PutRecordResult putRecord(String streamName, ByteBuffer data,
-                                     String partitionKey, String sequenceNumberForOrdering) {
-        throw new RuntimeException("Not implemented");
-    }
+    public AmazonKinesis get() {
+      return new AmazonKinesisMock(transform(shardedData,
+          new Function<List<TestData>, List<Record>>() {
 
-    @Override
-    public PutRecordsResult putRecords(PutRecordsRequest putRecordsRequest) {
-        throw new RuntimeException("Not implemented");
-    }
+            @Override
+            public List<Record> apply(@Nullable List<TestData> testDatas) {
+              return transform(testDatas, new Function<TestData, Record>() {
 
-    @Override
-    public RemoveTagsFromStreamResult removeTagsFromStream(
-            RemoveTagsFromStreamRequest removeTagsFromStreamRequest) {
-        throw new RuntimeException("Not implemented");
+                @Override
+                public Record apply(@Nullable TestData testData) {
+                  return testData.convertToRecord();
+                }
+              });
+            }
+          }), numberOfRecordsPerGet);
     }
+  }
 
-    @Override
-    public SplitShardResult splitShard(SplitShardRequest splitShardRequest) {
-        throw new RuntimeException("Not implemented");
-    }
+  private final List<List<Record>> shardedData;
+  private final int numberOfRecordsPerGet;
 
-    @Override
-    public SplitShardResult splitShard(String streamName,
-                                       String shardToSplit, String newStartingHashKey) {
-        throw new RuntimeException("Not implemented");
-    }
+  public AmazonKinesisMock(List<List<Record>> shardedData, int numberOfRecordsPerGet) {
+    this.shardedData = shardedData;
+    this.numberOfRecordsPerGet = numberOfRecordsPerGet;
+  }
 
-    @Override
-    public void shutdown() {
+  @Override
+  public GetRecordsResult getRecords(GetRecordsRequest getRecordsRequest) {
+    String[] shardIteratorParts = getRecordsRequest.getShardIterator().split(":");
+    int shardId = parseInt(shardIteratorParts[0]);
+    int startingRecord = parseInt(shardIteratorParts[1]);
+    List<Record> shardData = shardedData.get(shardId);
 
-    }
+    int toIndex = min(startingRecord + numberOfRecordsPerGet, shardData.size());
+    int fromIndex = min(startingRecord, toIndex);
+    return new GetRecordsResult().
+        withRecords(shardData.subList(fromIndex, toIndex)).
+        withNextShardIterator(String.format("%s:%s", shardId, toIndex));
+  }
 
-    @Override
-    public ResponseMetadata getCachedResponseMetadata(AmazonWebServiceRequest request) {
-        throw new RuntimeException("Not implemented");
-    }
+  @Override
+  public GetShardIteratorResult getShardIterator(
+      GetShardIteratorRequest getShardIteratorRequest) {
+    ShardIteratorType shardIteratorType = ShardIteratorType.fromValue(
+        getShardIteratorRequest.getShardIteratorType());
+
+    String shardIterator;
+    if (shardIteratorType == ShardIteratorType.TRIM_HORIZON) {
+      shardIterator = String.format("%s:%s", getShardIteratorRequest.getShardId(), 0);
+    } else {
+      throw new RuntimeException("Not implemented");
+    }
+
+    return new GetShardIteratorResult().withShardIterator(shardIterator);
+  }
+
+  @Override
+  public DescribeStreamResult describeStream(String streamName, String exclusiveStartShardId) {
+    int nextShardId = 0;
+    if (exclusiveStartShardId != null) {
+      nextShardId = parseInt(exclusiveStartShardId) + 1;
+    }
+    boolean hasMoreShards = nextShardId + 1 < shardedData.size();
+
+    List<Shard> shards = newArrayList();
+    if (nextShardId < shardedData.size()) {
+      shards.add(new Shard().withShardId(Integer.toString(nextShardId)));
+    }
+
+    return new DescribeStreamResult().withStreamDescription(
+        new StreamDescription().withHasMoreShards(hasMoreShards).withShards(shards)
+    );
+  }
+
+  @Override
+  public void setEndpoint(String endpoint) {
+
+  }
+
+  @Override
+  public void setRegion(Region region) {
+
+  }
+
+  @Override
+  public AddTagsToStreamResult addTagsToStream(AddTagsToStreamRequest addTagsToStreamRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public CreateStreamResult createStream(CreateStreamRequest createStreamRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public CreateStreamResult createStream(String streamName, Integer shardCount) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public DecreaseStreamRetentionPeriodResult decreaseStreamRetentionPeriod(
+      DecreaseStreamRetentionPeriodRequest decreaseStreamRetentionPeriodRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public DeleteStreamResult deleteStream(DeleteStreamRequest deleteStreamRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public DeleteStreamResult deleteStream(String streamName) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public DescribeStreamResult describeStream(DescribeStreamRequest describeStreamRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public DescribeStreamResult describeStream(String streamName) {
+
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public DescribeStreamResult describeStream(String streamName,
+      Integer limit, String exclusiveStartShardId) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public DisableEnhancedMonitoringResult disableEnhancedMonitoring(
+      DisableEnhancedMonitoringRequest disableEnhancedMonitoringRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public EnableEnhancedMonitoringResult enableEnhancedMonitoring(
+      EnableEnhancedMonitoringRequest enableEnhancedMonitoringRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public GetShardIteratorResult getShardIterator(String streamName,
+      String shardId,
+      String shardIteratorType) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public GetShardIteratorResult getShardIterator(String streamName,
+      String shardId,
+      String shardIteratorType,
+      String startingSequenceNumber) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public IncreaseStreamRetentionPeriodResult increaseStreamRetentionPeriod(
+      IncreaseStreamRetentionPeriodRequest increaseStreamRetentionPeriodRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public ListStreamsResult listStreams(ListStreamsRequest listStreamsRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public ListStreamsResult listStreams() {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public ListStreamsResult listStreams(String exclusiveStartStreamName) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public ListStreamsResult listStreams(Integer limit, String exclusiveStartStreamName) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public ListTagsForStreamResult listTagsForStream(
+      ListTagsForStreamRequest listTagsForStreamRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public MergeShardsResult mergeShards(MergeShardsRequest mergeShardsRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public MergeShardsResult mergeShards(String streamName,
+      String shardToMerge, String adjacentShardToMerge) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public PutRecordResult putRecord(PutRecordRequest putRecordRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public PutRecordResult putRecord(String streamName, ByteBuffer data, String partitionKey) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public PutRecordResult putRecord(String streamName, ByteBuffer data,
+      String partitionKey, String sequenceNumberForOrdering) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public PutRecordsResult putRecords(PutRecordsRequest putRecordsRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public RemoveTagsFromStreamResult removeTagsFromStream(
+      RemoveTagsFromStreamRequest removeTagsFromStreamRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public SplitShardResult splitShard(SplitShardRequest splitShardRequest) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public SplitShardResult splitShard(String streamName,
+      String shardToSplit, String newStartingHashKey) {
+    throw new RuntimeException("Not implemented");
+  }
+
+  @Override
+  public void shutdown() {
+
+  }
+
+  @Override
+  public ResponseMetadata getCachedResponseMetadata(AmazonWebServiceRequest request) {
+    throw new RuntimeException("Not implemented");
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/CustomOptionalTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/CustomOptionalTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/CustomOptionalTest.java
index 00acffe..0b16bb7 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/CustomOptionalTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/CustomOptionalTest.java
@@ -18,24 +18,27 @@
 package org.apache.beam.sdk.io.kinesis;
 
 import com.google.common.testing.EqualsTester;
+
 import java.util.NoSuchElementException;
+
 import org.junit.Test;
 
 /**
  * Tests {@link CustomOptional}.
  */
 public class CustomOptionalTest {
-    @Test(expected = NoSuchElementException.class)
-    public void absentThrowsNoSuchElementExceptionOnGet() {
-        CustomOptional.absent().get();
-    }
 
-    @Test
-    public void testEqualsAndHashCode() {
-        new EqualsTester()
-            .addEqualityGroup(CustomOptional.absent(), CustomOptional.absent())
-            .addEqualityGroup(CustomOptional.of(3), CustomOptional.of(3))
-            .addEqualityGroup(CustomOptional.of(11))
-            .addEqualityGroup(CustomOptional.of("3")).testEquals();
-    }
+  @Test(expected = NoSuchElementException.class)
+  public void absentThrowsNoSuchElementExceptionOnGet() {
+    CustomOptional.absent().get();
+  }
+
+  @Test
+  public void testEqualsAndHashCode() {
+    new EqualsTester()
+        .addEqualityGroup(CustomOptional.absent(), CustomOptional.absent())
+        .addEqualityGroup(CustomOptional.of(3), CustomOptional.of(3))
+        .addEqualityGroup(CustomOptional.of(11))
+        .addEqualityGroup(CustomOptional.of("3")).testEquals();
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java
index c92ac9a..1bb9717 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGeneratorTest.java
@@ -28,30 +28,29 @@ import org.junit.runner.RunWith;
 import org.mockito.Mock;
 import org.mockito.runners.MockitoJUnitRunner;
 
-
 /***
  */
 @RunWith(MockitoJUnitRunner.class)
 public class DynamicCheckpointGeneratorTest {
 
-    @Mock
-    private SimplifiedKinesisClient kinesisClient;
-    @Mock
-    private Shard shard1, shard2, shard3;
+  @Mock
+  private SimplifiedKinesisClient kinesisClient;
+  @Mock
+  private Shard shard1, shard2, shard3;
 
-    @Test
-    public void shouldMapAllShardsToCheckpoints() throws Exception {
-        given(shard1.getShardId()).willReturn("shard-01");
-        given(shard2.getShardId()).willReturn("shard-02");
-        given(shard3.getShardId()).willReturn("shard-03");
-        given(kinesisClient.listShards("stream")).willReturn(asList(shard1, shard2, shard3));
+  @Test
+  public void shouldMapAllShardsToCheckpoints() throws Exception {
+    given(shard1.getShardId()).willReturn("shard-01");
+    given(shard2.getShardId()).willReturn("shard-02");
+    given(shard3.getShardId()).willReturn("shard-03");
+    given(kinesisClient.listShards("stream")).willReturn(asList(shard1, shard2, shard3));
 
-        StartingPoint startingPoint = new StartingPoint(InitialPositionInStream.LATEST);
-        DynamicCheckpointGenerator underTest = new DynamicCheckpointGenerator("stream",
-                startingPoint);
+    StartingPoint startingPoint = new StartingPoint(InitialPositionInStream.LATEST);
+    DynamicCheckpointGenerator underTest = new DynamicCheckpointGenerator("stream",
+        startingPoint);
 
-        KinesisReaderCheckpoint checkpoint = underTest.generate(kinesisClient);
+    KinesisReaderCheckpoint checkpoint = underTest.generate(kinesisClient);
 
-        assertThat(checkpoint).hasSize(3);
-    }
+    assertThat(checkpoint).hasSize(3);
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockReadTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockReadTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockReadTest.java
index 567e25f..44ad67d 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockReadTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisMockReadTest.java
@@ -21,7 +21,9 @@ import static com.google.common.collect.Lists.newArrayList;
 
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
 import com.google.common.collect.Iterables;
+
 import java.util.List;
+
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.transforms.DoFn;
@@ -36,59 +38,60 @@ import org.junit.Test;
  */
 public class KinesisMockReadTest {
 
-    @Rule
-    public final transient TestPipeline p = TestPipeline.create();
-
-    @Test
-    public void readsDataFromMockKinesis() {
-        int noOfShards = 3;
-        int noOfEventsPerShard = 100;
-        List<List<AmazonKinesisMock.TestData>> testData =
-                provideTestData(noOfShards, noOfEventsPerShard);
-
-        PCollection<AmazonKinesisMock.TestData> result = p
-                .apply(
-                        KinesisIO.read()
-                                .from("stream", InitialPositionInStream.TRIM_HORIZON)
-                                .withClientProvider(new AmazonKinesisMock.Provider(testData, 10))
-                                .withMaxNumRecords(noOfShards * noOfEventsPerShard))
-                .apply(ParDo.of(new KinesisRecordToTestData()));
-        PAssert.that(result).containsInAnyOrder(Iterables.concat(testData));
-        p.run();
-    }
+  @Rule
+  public final transient TestPipeline p = TestPipeline.create();
 
-    private static class KinesisRecordToTestData extends
-            DoFn<KinesisRecord, AmazonKinesisMock.TestData> {
-        @ProcessElement
-        public void processElement(ProcessContext c) throws Exception {
-            c.output(new AmazonKinesisMock.TestData(c.element()));
-        }
-    }
+  @Test
+  public void readsDataFromMockKinesis() {
+    int noOfShards = 3;
+    int noOfEventsPerShard = 100;
+    List<List<AmazonKinesisMock.TestData>> testData =
+        provideTestData(noOfShards, noOfEventsPerShard);
 
-    private List<List<AmazonKinesisMock.TestData>> provideTestData(
-            int noOfShards,
-            int noOfEventsPerShard) {
+    PCollection<AmazonKinesisMock.TestData> result = p
+        .apply(
+            KinesisIO.read()
+                .from("stream", InitialPositionInStream.TRIM_HORIZON)
+                .withClientProvider(new AmazonKinesisMock.Provider(testData, 10))
+                .withMaxNumRecords(noOfShards * noOfEventsPerShard))
+        .apply(ParDo.of(new KinesisRecordToTestData()));
+    PAssert.that(result).containsInAnyOrder(Iterables.concat(testData));
+    p.run();
+  }
 
-        int seqNumber = 0;
+  private static class KinesisRecordToTestData extends
+      DoFn<KinesisRecord, AmazonKinesisMock.TestData> {
 
-        List<List<AmazonKinesisMock.TestData>> shardedData = newArrayList();
-        for (int i = 0; i < noOfShards; ++i) {
-            List<AmazonKinesisMock.TestData> shardData = newArrayList();
-            shardedData.add(shardData);
+    @ProcessElement
+    public void processElement(ProcessContext c) throws Exception {
+      c.output(new AmazonKinesisMock.TestData(c.element()));
+    }
+  }
+
+  private List<List<AmazonKinesisMock.TestData>> provideTestData(
+      int noOfShards,
+      int noOfEventsPerShard) {
 
-            DateTime arrival = DateTime.now();
-            for (int j = 0; j < noOfEventsPerShard; ++j) {
-                arrival = arrival.plusSeconds(1);
+    int seqNumber = 0;
 
-                seqNumber++;
-                shardData.add(new AmazonKinesisMock.TestData(
-                        Integer.toString(seqNumber),
-                        arrival.toInstant(),
-                        Integer.toString(seqNumber))
-                );
-            }
-        }
+    List<List<AmazonKinesisMock.TestData>> shardedData = newArrayList();
+    for (int i = 0; i < noOfShards; ++i) {
+      List<AmazonKinesisMock.TestData> shardData = newArrayList();
+      shardedData.add(shardData);
 
-        return shardedData;
+      DateTime arrival = DateTime.now();
+      for (int j = 0; j < noOfEventsPerShard; ++j) {
+        arrival = arrival.plusSeconds(1);
+
+        seqNumber++;
+        shardData.add(new AmazonKinesisMock.TestData(
+            Integer.toString(seqNumber),
+            arrival.toInstant(),
+            Integer.toString(seqNumber))
+        );
+      }
     }
+
+    return shardedData;
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java
index 8c8da64..1038a47 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpointTest.java
@@ -17,13 +17,14 @@
  */
 package org.apache.beam.sdk.io.kinesis;
 
-
 import static java.util.Arrays.asList;
 import static org.assertj.core.api.Assertions.assertThat;
 
 import com.google.common.collect.Iterables;
+
 import java.util.Iterator;
 import java.util.List;
+
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -35,33 +36,34 @@ import org.mockito.runners.MockitoJUnitRunner;
  */
 @RunWith(MockitoJUnitRunner.class)
 public class KinesisReaderCheckpointTest {
-    @Mock
-    private ShardCheckpoint a, b, c;
 
-    private KinesisReaderCheckpoint checkpoint;
+  @Mock
+  private ShardCheckpoint a, b, c;
+
+  private KinesisReaderCheckpoint checkpoint;
 
-    @Before
-    public void setUp() {
-        checkpoint = new KinesisReaderCheckpoint(asList(a, b, c));
-    }
+  @Before
+  public void setUp() {
+    checkpoint = new KinesisReaderCheckpoint(asList(a, b, c));
+  }
 
-    @Test
-    public void splitsCheckpointAccordingly() {
-        verifySplitInto(1);
-        verifySplitInto(2);
-        verifySplitInto(3);
-        verifySplitInto(4);
-    }
+  @Test
+  public void splitsCheckpointAccordingly() {
+    verifySplitInto(1);
+    verifySplitInto(2);
+    verifySplitInto(3);
+    verifySplitInto(4);
+  }
 
-    @Test(expected = UnsupportedOperationException.class)
-    public void isImmutable() {
-        Iterator<ShardCheckpoint> iterator = checkpoint.iterator();
-        iterator.remove();
-    }
+  @Test(expected = UnsupportedOperationException.class)
+  public void isImmutable() {
+    Iterator<ShardCheckpoint> iterator = checkpoint.iterator();
+    iterator.remove();
+  }
 
-    private void verifySplitInto(int size) {
-        List<KinesisReaderCheckpoint> split = checkpoint.splitInto(size);
-        assertThat(Iterables.concat(split)).containsOnly(a, b, c);
-        assertThat(split).hasSize(Math.min(size, 3));
-    }
+  private void verifySplitInto(int size) {
+    List<KinesisReaderCheckpoint> split = checkpoint.splitInto(size);
+    assertThat(Iterables.concat(split)).containsOnly(a, b, c);
+    assertThat(split).hasSize(Math.min(size, 3));
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderIT.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderIT.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderIT.java
index 8eb6546..5781033 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderIT.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderIT.java
@@ -23,6 +23,7 @@ import static java.util.concurrent.Executors.newSingleThreadExecutor;
 import static org.assertj.core.api.Assertions.assertThat;
 
 import com.amazonaws.regions.Regions;
+
 import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
@@ -31,6 +32,7 @@ import java.util.concurrent.ExecutionException;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Future;
 import java.util.concurrent.TimeUnit;
+
 import org.apache.beam.sdk.PipelineResult;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.testing.PAssert;
@@ -50,72 +52,75 @@ import org.junit.Test;
  * You need to provide all {@link KinesisTestOptions} in order to run this.
  */
 public class KinesisReaderIT {
-    private static final long PIPELINE_STARTUP_TIME = TimeUnit.SECONDS.toMillis(10);
-    private ExecutorService singleThreadExecutor = newSingleThreadExecutor();
-
-    @Rule
-    public final transient TestPipeline p = TestPipeline.create();
-
-    @Ignore
-    @Test
-    public void readsDataFromRealKinesisStream()
-            throws IOException, InterruptedException, ExecutionException {
-        KinesisTestOptions options = readKinesisOptions();
-        List<String> testData = prepareTestData(1000);
-
-        Future<?> future = startTestPipeline(testData, options);
-        KinesisUploader.uploadAll(testData, options);
-        future.get();
-    }
 
-    private List<String> prepareTestData(int count) {
-        List<String> data = newArrayList();
-        for (int i = 0; i < count; ++i) {
-            data.add(RandomStringUtils.randomAlphabetic(32));
-        }
-        return data;
-    }
+  private static final long PIPELINE_STARTUP_TIME = TimeUnit.SECONDS.toMillis(10);
+  private ExecutorService singleThreadExecutor = newSingleThreadExecutor();
 
-    private Future<?> startTestPipeline(List<String> testData, KinesisTestOptions options)
-            throws InterruptedException {
-
-        PCollection<String> result = p.
-                apply(KinesisIO.read()
-                        .from(options.getAwsKinesisStream(), Instant.now())
-                        .withClientProvider(options.getAwsAccessKey(), options.getAwsSecretKey(),
-                                Regions.fromName(options.getAwsKinesisRegion()))
-                        .withMaxReadTime(Duration.standardMinutes(3))
-                ).
-                apply(ParDo.of(new RecordDataToString()));
-        PAssert.that(result).containsInAnyOrder(testData);
-
-        Future<?> future = singleThreadExecutor.submit(new Callable<Void>() {
-            @Override
-            public Void call() throws Exception {
-                PipelineResult result = p.run();
-                PipelineResult.State state = result.getState();
-                while (state != PipelineResult.State.DONE && state != PipelineResult.State.FAILED) {
-                    Thread.sleep(1000);
-                    state = result.getState();
-                }
-                assertThat(state).isEqualTo(PipelineResult.State.DONE);
-                return null;
-            }
-        });
-        Thread.sleep(PIPELINE_STARTUP_TIME);
-        return future;
-    }
+  @Rule
+  public final transient TestPipeline p = TestPipeline.create();
+
+  @Ignore
+  @Test
+  public void readsDataFromRealKinesisStream()
+      throws IOException, InterruptedException, ExecutionException {
+    KinesisTestOptions options = readKinesisOptions();
+    List<String> testData = prepareTestData(1000);
 
-    private KinesisTestOptions readKinesisOptions() {
-        PipelineOptionsFactory.register(KinesisTestOptions.class);
-        return TestPipeline.testingPipelineOptions().as(KinesisTestOptions.class);
+    Future<?> future = startTestPipeline(testData, options);
+    KinesisUploader.uploadAll(testData, options);
+    future.get();
+  }
+
+  private List<String> prepareTestData(int count) {
+    List<String> data = newArrayList();
+    for (int i = 0; i < count; ++i) {
+      data.add(RandomStringUtils.randomAlphabetic(32));
     }
+    return data;
+  }
+
+  private Future<?> startTestPipeline(List<String> testData, KinesisTestOptions options)
+      throws InterruptedException {
+
+    PCollection<String> result = p.
+        apply(KinesisIO.read()
+            .from(options.getAwsKinesisStream(), Instant.now())
+            .withClientProvider(options.getAwsAccessKey(), options.getAwsSecretKey(),
+                Regions.fromName(options.getAwsKinesisRegion()))
+            .withMaxReadTime(Duration.standardMinutes(3))
+        ).
+        apply(ParDo.of(new RecordDataToString()));
+    PAssert.that(result).containsInAnyOrder(testData);
+
+    Future<?> future = singleThreadExecutor.submit(new Callable<Void>() {
 
-    private static class RecordDataToString extends DoFn<KinesisRecord, String> {
-        @ProcessElement
-        public void processElement(ProcessContext c) throws Exception {
-            checkNotNull(c.element(), "Null record given");
-            c.output(new String(c.element().getData().array(), StandardCharsets.UTF_8));
+      @Override
+      public Void call() throws Exception {
+        PipelineResult result = p.run();
+        PipelineResult.State state = result.getState();
+        while (state != PipelineResult.State.DONE && state != PipelineResult.State.FAILED) {
+          Thread.sleep(1000);
+          state = result.getState();
         }
+        assertThat(state).isEqualTo(PipelineResult.State.DONE);
+        return null;
+      }
+    });
+    Thread.sleep(PIPELINE_STARTUP_TIME);
+    return future;
+  }
+
+  private KinesisTestOptions readKinesisOptions() {
+    PipelineOptionsFactory.register(KinesisTestOptions.class);
+    return TestPipeline.testingPipelineOptions().as(KinesisTestOptions.class);
+  }
+
+  private static class RecordDataToString extends DoFn<KinesisRecord, String> {
+
+    @ProcessElement
+    public void processElement(ProcessContext c) throws Exception {
+      checkNotNull(c.element(), "Null record given");
+      c.output(new String(c.element().getData().array(), StandardCharsets.UTF_8));
     }
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderTest.java
index 3111029..a26501a 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisReaderTest.java
@@ -23,6 +23,7 @@ import static org.mockito.Mockito.when;
 
 import java.io.IOException;
 import java.util.NoSuchElementException;
+
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -34,87 +35,88 @@ import org.mockito.runners.MockitoJUnitRunner;
  */
 @RunWith(MockitoJUnitRunner.class)
 public class KinesisReaderTest {
-    @Mock
-    private SimplifiedKinesisClient kinesis;
-    @Mock
-    private CheckpointGenerator generator;
-    @Mock
-    private ShardCheckpoint firstCheckpoint, secondCheckpoint;
-    @Mock
-    private ShardRecordsIterator firstIterator, secondIterator;
-    @Mock
-    private KinesisRecord a, b, c, d;
-
-    private KinesisReader reader;
-
-    @Before
-    public void setUp() throws IOException, TransientKinesisException {
-        when(generator.generate(kinesis)).thenReturn(new KinesisReaderCheckpoint(
-                asList(firstCheckpoint, secondCheckpoint)
-        ));
-        when(firstCheckpoint.getShardRecordsIterator(kinesis)).thenReturn(firstIterator);
-        when(secondCheckpoint.getShardRecordsIterator(kinesis)).thenReturn(secondIterator);
-        when(firstIterator.next()).thenReturn(CustomOptional.<KinesisRecord>absent());
-        when(secondIterator.next()).thenReturn(CustomOptional.<KinesisRecord>absent());
-
-        reader = new KinesisReader(kinesis, generator, null);
-    }
-
-    @Test
-    public void startReturnsFalseIfNoDataAtTheBeginning() throws IOException {
-        assertThat(reader.start()).isFalse();
-    }
-
-    @Test(expected = NoSuchElementException.class)
-    public void throwsNoSuchElementExceptionIfNoData() throws IOException {
-        reader.start();
-        reader.getCurrent();
-    }
-
-    @Test
-    public void startReturnsTrueIfSomeDataAvailable() throws IOException,
-            TransientKinesisException {
-        when(firstIterator.next()).
-                thenReturn(CustomOptional.of(a)).
-                thenReturn(CustomOptional.<KinesisRecord>absent());
-
-        assertThat(reader.start()).isTrue();
-    }
-
-    @Test
-    public void advanceReturnsFalseIfThereIsTransientExceptionInKinesis()
-            throws IOException, TransientKinesisException {
-        reader.start();
-
-        when(firstIterator.next()).thenThrow(TransientKinesisException.class);
-
-        assertThat(reader.advance()).isFalse();
-    }
-
-    @Test
-    public void readsThroughAllDataAvailable() throws IOException, TransientKinesisException {
-        when(firstIterator.next()).
-                thenReturn(CustomOptional.<KinesisRecord>absent()).
-                thenReturn(CustomOptional.of(a)).
-                thenReturn(CustomOptional.<KinesisRecord>absent()).
-                thenReturn(CustomOptional.of(b)).
-                thenReturn(CustomOptional.<KinesisRecord>absent());
-
-        when(secondIterator.next()).
-                thenReturn(CustomOptional.of(c)).
-                thenReturn(CustomOptional.<KinesisRecord>absent()).
-                thenReturn(CustomOptional.of(d)).
-                thenReturn(CustomOptional.<KinesisRecord>absent());
-
-        assertThat(reader.start()).isTrue();
-        assertThat(reader.getCurrent()).isEqualTo(c);
-        assertThat(reader.advance()).isTrue();
-        assertThat(reader.getCurrent()).isEqualTo(a);
-        assertThat(reader.advance()).isTrue();
-        assertThat(reader.getCurrent()).isEqualTo(d);
-        assertThat(reader.advance()).isTrue();
-        assertThat(reader.getCurrent()).isEqualTo(b);
-        assertThat(reader.advance()).isFalse();
-    }
+
+  @Mock
+  private SimplifiedKinesisClient kinesis;
+  @Mock
+  private CheckpointGenerator generator;
+  @Mock
+  private ShardCheckpoint firstCheckpoint, secondCheckpoint;
+  @Mock
+  private ShardRecordsIterator firstIterator, secondIterator;
+  @Mock
+  private KinesisRecord a, b, c, d;
+
+  private KinesisReader reader;
+
+  @Before
+  public void setUp() throws IOException, TransientKinesisException {
+    when(generator.generate(kinesis)).thenReturn(new KinesisReaderCheckpoint(
+        asList(firstCheckpoint, secondCheckpoint)
+    ));
+    when(firstCheckpoint.getShardRecordsIterator(kinesis)).thenReturn(firstIterator);
+    when(secondCheckpoint.getShardRecordsIterator(kinesis)).thenReturn(secondIterator);
+    when(firstIterator.next()).thenReturn(CustomOptional.<KinesisRecord>absent());
+    when(secondIterator.next()).thenReturn(CustomOptional.<KinesisRecord>absent());
+
+    reader = new KinesisReader(kinesis, generator, null);
+  }
+
+  @Test
+  public void startReturnsFalseIfNoDataAtTheBeginning() throws IOException {
+    assertThat(reader.start()).isFalse();
+  }
+
+  @Test(expected = NoSuchElementException.class)
+  public void throwsNoSuchElementExceptionIfNoData() throws IOException {
+    reader.start();
+    reader.getCurrent();
+  }
+
+  @Test
+  public void startReturnsTrueIfSomeDataAvailable() throws IOException,
+      TransientKinesisException {
+    when(firstIterator.next()).
+        thenReturn(CustomOptional.of(a)).
+        thenReturn(CustomOptional.<KinesisRecord>absent());
+
+    assertThat(reader.start()).isTrue();
+  }
+
+  @Test
+  public void advanceReturnsFalseIfThereIsTransientExceptionInKinesis()
+      throws IOException, TransientKinesisException {
+    reader.start();
+
+    when(firstIterator.next()).thenThrow(TransientKinesisException.class);
+
+    assertThat(reader.advance()).isFalse();
+  }
+
+  @Test
+  public void readsThroughAllDataAvailable() throws IOException, TransientKinesisException {
+    when(firstIterator.next()).
+        thenReturn(CustomOptional.<KinesisRecord>absent()).
+        thenReturn(CustomOptional.of(a)).
+        thenReturn(CustomOptional.<KinesisRecord>absent()).
+        thenReturn(CustomOptional.of(b)).
+        thenReturn(CustomOptional.<KinesisRecord>absent());
+
+    when(secondIterator.next()).
+        thenReturn(CustomOptional.of(c)).
+        thenReturn(CustomOptional.<KinesisRecord>absent()).
+        thenReturn(CustomOptional.of(d)).
+        thenReturn(CustomOptional.<KinesisRecord>absent());
+
+    assertThat(reader.start()).isTrue();
+    assertThat(reader.getCurrent()).isEqualTo(c);
+    assertThat(reader.advance()).isTrue();
+    assertThat(reader.getCurrent()).isEqualTo(a);
+    assertThat(reader.advance()).isTrue();
+    assertThat(reader.getCurrent()).isEqualTo(d);
+    assertThat(reader.advance()).isTrue();
+    assertThat(reader.getCurrent()).isEqualTo(b);
+    assertThat(reader.advance()).isFalse();
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoderTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoderTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoderTest.java
index 8771c86..c9f01bb 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoderTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoderTest.java
@@ -18,6 +18,7 @@
 package org.apache.beam.sdk.io.kinesis;
 
 import java.nio.ByteBuffer;
+
 import org.apache.beam.sdk.testing.CoderProperties;
 import org.joda.time.Instant;
 import org.junit.Test;
@@ -26,20 +27,21 @@ import org.junit.Test;
  * Tests {@link KinesisRecordCoder}.
  */
 public class KinesisRecordCoderTest {
-    @Test
-    public void encodingAndDecodingWorks() throws Exception {
-        KinesisRecord record = new KinesisRecord(
-                ByteBuffer.wrap("data".getBytes()),
-                "sequence",
-                128L,
-                "partition",
-                Instant.now(),
-                Instant.now(),
-                "stream",
-                "shard"
-        );
-        CoderProperties.coderDecodeEncodeEqual(
-                new KinesisRecordCoder(), record
-        );
-    }
+
+  @Test
+  public void encodingAndDecodingWorks() throws Exception {
+    KinesisRecord record = new KinesisRecord(
+        ByteBuffer.wrap("data".getBytes()),
+        "sequence",
+        128L,
+        "partition",
+        Instant.now(),
+        Instant.now(),
+        "stream",
+        "shard"
+    );
+    CoderProperties.coderDecodeEncodeEqual(
+        new KinesisRecordCoder(), record
+    );
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisTestOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisTestOptions.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisTestOptions.java
index 324de46..76bcb27 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisTestOptions.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisTestOptions.java
@@ -25,23 +25,28 @@ import org.apache.beam.sdk.testing.TestPipelineOptions;
  * Options for Kinesis integration tests.
  */
 public interface KinesisTestOptions extends TestPipelineOptions {
-    @Description("AWS region where Kinesis stream resided")
-    @Default.String("aws-kinesis-region")
-    String getAwsKinesisRegion();
-    void setAwsKinesisRegion(String value);
-
-    @Description("Kinesis stream name")
-    @Default.String("aws-kinesis-stream")
-    String getAwsKinesisStream();
-    void setAwsKinesisStream(String value);
-
-    @Description("AWS secret key")
-    @Default.String("aws-secret-key")
-    String getAwsSecretKey();
-    void setAwsSecretKey(String value);
-
-    @Description("AWS access key")
-    @Default.String("aws-access-key")
-    String getAwsAccessKey();
-    void setAwsAccessKey(String value);
+
+  @Description("AWS region where Kinesis stream resided")
+  @Default.String("aws-kinesis-region")
+  String getAwsKinesisRegion();
+
+  void setAwsKinesisRegion(String value);
+
+  @Description("Kinesis stream name")
+  @Default.String("aws-kinesis-stream")
+  String getAwsKinesisStream();
+
+  void setAwsKinesisStream(String value);
+
+  @Description("AWS secret key")
+  @Default.String("aws-secret-key")
+  String getAwsSecretKey();
+
+  void setAwsSecretKey(String value);
+
+  @Description("AWS access key")
+  @Default.String("aws-access-key")
+  String getAwsAccessKey();
+
+  void setAwsAccessKey(String value);
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisUploader.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisUploader.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisUploader.java
index 7518ff7..7a7cb02 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisUploader.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/KinesisUploader.java
@@ -29,6 +29,7 @@ import com.amazonaws.services.kinesis.model.PutRecordsResult;
 import com.amazonaws.services.kinesis.model.PutRecordsResultEntry;
 import com.google.common.base.Charsets;
 import com.google.common.collect.Lists;
+
 import java.nio.ByteBuffer;
 import java.util.List;
 
@@ -37,47 +38,46 @@ import java.util.List;
  */
 public class KinesisUploader {
 
-    public static final int MAX_NUMBER_OF_RECORDS_IN_BATCH = 499;
-
-    public static void uploadAll(List<String> data, KinesisTestOptions options) {
-        AmazonKinesisClient client = new AmazonKinesisClient(
-                new StaticCredentialsProvider(
-                        new BasicAWSCredentials(
-                                options.getAwsAccessKey(), options.getAwsSecretKey()))
-        ).withRegion(Regions.fromName(options.getAwsKinesisRegion()));
+  public static final int MAX_NUMBER_OF_RECORDS_IN_BATCH = 499;
 
-        List<List<String>> partitions = Lists.partition(data, MAX_NUMBER_OF_RECORDS_IN_BATCH);
+  public static void uploadAll(List<String> data, KinesisTestOptions options) {
+    AmazonKinesisClient client = new AmazonKinesisClient(
+        new StaticCredentialsProvider(
+            new BasicAWSCredentials(
+                options.getAwsAccessKey(), options.getAwsSecretKey()))
+    ).withRegion(Regions.fromName(options.getAwsKinesisRegion()));
 
+    List<List<String>> partitions = Lists.partition(data, MAX_NUMBER_OF_RECORDS_IN_BATCH);
 
-        for (List<String> partition : partitions) {
-            List<PutRecordsRequestEntry> allRecords = newArrayList();
-            for (String row : partition) {
-                allRecords.add(new PutRecordsRequestEntry().
-                        withData(ByteBuffer.wrap(row.getBytes(Charsets.UTF_8))).
-                        withPartitionKey(Integer.toString(row.hashCode()))
+    for (List<String> partition : partitions) {
+      List<PutRecordsRequestEntry> allRecords = newArrayList();
+      for (String row : partition) {
+        allRecords.add(new PutRecordsRequestEntry().
+            withData(ByteBuffer.wrap(row.getBytes(Charsets.UTF_8))).
+            withPartitionKey(Integer.toString(row.hashCode()))
 
-                );
-            }
+        );
+      }
 
-            PutRecordsResult result;
-            do {
-                result = client.putRecords(
-                        new PutRecordsRequest().
-                                withStreamName(options.getAwsKinesisStream()).
-                                withRecords(allRecords));
-                List<PutRecordsRequestEntry> failedRecords = newArrayList();
-                int i = 0;
-                for (PutRecordsResultEntry row : result.getRecords()) {
-                    if (row.getErrorCode() != null) {
-                        failedRecords.add(allRecords.get(i));
-                    }
-                    ++i;
-                }
-                allRecords = failedRecords;
-            }
-
-            while (result.getFailedRecordCount() > 0);
+      PutRecordsResult result;
+      do {
+        result = client.putRecords(
+            new PutRecordsRequest().
+                withStreamName(options.getAwsKinesisStream()).
+                withRecords(allRecords));
+        List<PutRecordsRequestEntry> failedRecords = newArrayList();
+        int i = 0;
+        for (PutRecordsResultEntry row : result.getRecords()) {
+          if (row.getErrorCode() != null) {
+            failedRecords.add(allRecords.get(i));
+          }
+          ++i;
         }
+        allRecords = failedRecords;
+      }
+
+      while (result.getFailedRecordCount() > 0);
     }
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java
index f979c01..cb32562 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RecordFilterTest.java
@@ -20,47 +20,49 @@ package org.apache.beam.sdk.io.kinesis;
 import static org.mockito.BDDMockito.given;
 
 import com.google.common.collect.Lists;
+
 import java.util.Collections;
 import java.util.List;
+
 import org.assertj.core.api.Assertions;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.mockito.Mock;
 import org.mockito.runners.MockitoJUnitRunner;
 
-
 /***
  */
 @RunWith(MockitoJUnitRunner.class)
 public class RecordFilterTest {
-    @Mock
-    private ShardCheckpoint checkpoint;
-    @Mock
-    private KinesisRecord record1, record2, record3, record4, record5;
 
-    @Test
-    public void shouldFilterOutRecordsBeforeOrAtCheckpoint() {
-        given(checkpoint.isBeforeOrAt(record1)).willReturn(false);
-        given(checkpoint.isBeforeOrAt(record2)).willReturn(true);
-        given(checkpoint.isBeforeOrAt(record3)).willReturn(true);
-        given(checkpoint.isBeforeOrAt(record4)).willReturn(false);
-        given(checkpoint.isBeforeOrAt(record5)).willReturn(true);
-        List<KinesisRecord> records = Lists.newArrayList(record1, record2,
-                record3, record4, record5);
-        RecordFilter underTest = new RecordFilter();
+  @Mock
+  private ShardCheckpoint checkpoint;
+  @Mock
+  private KinesisRecord record1, record2, record3, record4, record5;
+
+  @Test
+  public void shouldFilterOutRecordsBeforeOrAtCheckpoint() {
+    given(checkpoint.isBeforeOrAt(record1)).willReturn(false);
+    given(checkpoint.isBeforeOrAt(record2)).willReturn(true);
+    given(checkpoint.isBeforeOrAt(record3)).willReturn(true);
+    given(checkpoint.isBeforeOrAt(record4)).willReturn(false);
+    given(checkpoint.isBeforeOrAt(record5)).willReturn(true);
+    List<KinesisRecord> records = Lists.newArrayList(record1, record2,
+        record3, record4, record5);
+    RecordFilter underTest = new RecordFilter();
 
-        List<KinesisRecord> retainedRecords = underTest.apply(records, checkpoint);
+    List<KinesisRecord> retainedRecords = underTest.apply(records, checkpoint);
 
-        Assertions.assertThat(retainedRecords).containsOnly(record2, record3, record5);
-    }
+    Assertions.assertThat(retainedRecords).containsOnly(record2, record3, record5);
+  }
 
-    @Test
-    public void shouldNotFailOnEmptyList() {
-        List<KinesisRecord> records = Collections.emptyList();
-        RecordFilter underTest = new RecordFilter();
+  @Test
+  public void shouldNotFailOnEmptyList() {
+    List<KinesisRecord> records = Collections.emptyList();
+    RecordFilter underTest = new RecordFilter();
 
-        List<KinesisRecord> retainedRecords = underTest.apply(records, checkpoint);
+    List<KinesisRecord> retainedRecords = underTest.apply(records, checkpoint);
 
-        Assertions.assertThat(retainedRecords).isEmpty();
-    }
+    Assertions.assertThat(retainedRecords).isEmpty();
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RoundRobinTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RoundRobinTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RoundRobinTest.java
index f032eea..e4abce4 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RoundRobinTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/RoundRobinTest.java
@@ -22,36 +22,38 @@ import static org.assertj.core.api.Assertions.assertThat;
 
 import java.util.Collections;
 import java.util.List;
+
 import org.junit.Test;
 
 /**
  * Tests {@link RoundRobin}.
  */
 public class RoundRobinTest {
-    @Test(expected = IllegalArgumentException.class)
-    public void doesNotAllowCreationWithEmptyCollection() {
-        new RoundRobin<>(Collections.emptyList());
-    }
 
-    @Test
-    public void goesThroughElementsInCycle() {
-        List<String> input = newArrayList("a", "b", "c");
+  @Test(expected = IllegalArgumentException.class)
+  public void doesNotAllowCreationWithEmptyCollection() {
+    new RoundRobin<>(Collections.emptyList());
+  }
 
-        RoundRobin<String> roundRobin = new RoundRobin<>(newArrayList(input));
+  @Test
+  public void goesThroughElementsInCycle() {
+    List<String> input = newArrayList("a", "b", "c");
 
-        input.addAll(input);  // duplicate the input
-        for (String element : input) {
-            assertThat(roundRobin.getCurrent()).isEqualTo(element);
-            assertThat(roundRobin.getCurrent()).isEqualTo(element);
-            roundRobin.moveForward();
-        }
+    RoundRobin<String> roundRobin = new RoundRobin<>(newArrayList(input));
+
+    input.addAll(input);  // duplicate the input
+    for (String element : input) {
+      assertThat(roundRobin.getCurrent()).isEqualTo(element);
+      assertThat(roundRobin.getCurrent()).isEqualTo(element);
+      roundRobin.moveForward();
     }
+  }
 
-    @Test
-    public void usualIteratorGoesThroughElementsOnce() {
-        List<String> input = newArrayList("a", "b", "c");
+  @Test
+  public void usualIteratorGoesThroughElementsOnce() {
+    List<String> input = newArrayList("a", "b", "c");
 
-        RoundRobin<String> roundRobin = new RoundRobin<>(input);
-        assertThat(roundRobin).hasSize(3).containsOnly(input.toArray(new String[0]));
-    }
+    RoundRobin<String> roundRobin = new RoundRobin<>(input);
+    assertThat(roundRobin).hasSize(3).containsOnly(input.toArray(new String[0]));
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java
index 39ab36f..d4784c4 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardCheckpointTest.java
@@ -32,7 +32,9 @@ import static org.mockito.Mockito.when;
 
 import com.amazonaws.services.kinesis.clientlibrary.types.ExtendedSequenceNumber;
 import com.amazonaws.services.kinesis.model.ShardIteratorType;
+
 import java.io.IOException;
+
 import org.joda.time.DateTime;
 import org.joda.time.Instant;
 import org.junit.Before;
@@ -46,104 +48,105 @@ import org.mockito.runners.MockitoJUnitRunner;
  */
 @RunWith(MockitoJUnitRunner.class)
 public class ShardCheckpointTest {
-    private static final String AT_SEQUENCE_SHARD_IT = "AT_SEQUENCE_SHARD_IT";
-    private static final String AFTER_SEQUENCE_SHARD_IT = "AFTER_SEQUENCE_SHARD_IT";
-    private static final String STREAM_NAME = "STREAM";
-    private static final String SHARD_ID = "SHARD_ID";
-    @Mock
-    private SimplifiedKinesisClient client;
-
-    @Before
-    public void setUp() throws IOException, TransientKinesisException {
-        when(client.getShardIterator(
-                eq(STREAM_NAME), eq(SHARD_ID), eq(AT_SEQUENCE_NUMBER),
-                anyString(), isNull(Instant.class))).
-                thenReturn(AT_SEQUENCE_SHARD_IT);
-        when(client.getShardIterator(
-                eq(STREAM_NAME), eq(SHARD_ID), eq(AFTER_SEQUENCE_NUMBER),
-                anyString(), isNull(Instant.class))).
-                thenReturn(AFTER_SEQUENCE_SHARD_IT);
-    }
-
-    @Test
-    public void testProvidingShardIterator() throws IOException, TransientKinesisException {
-        assertThat(checkpoint(AT_SEQUENCE_NUMBER, "100", null).getShardIterator(client))
-                .isEqualTo(AT_SEQUENCE_SHARD_IT);
-        assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "100", null).getShardIterator(client))
-                .isEqualTo(AFTER_SEQUENCE_SHARD_IT);
-        assertThat(checkpoint(AT_SEQUENCE_NUMBER, "100", 10L).getShardIterator(client)).isEqualTo
-                (AT_SEQUENCE_SHARD_IT);
-        assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "100", 10L).getShardIterator(client))
-                .isEqualTo(AT_SEQUENCE_SHARD_IT);
-    }
-
-    @Test
-    public void testComparisonWithExtendedSequenceNumber() {
-        assertThat(new ShardCheckpoint("", "", new StartingPoint(LATEST)).isBeforeOrAt(
-                recordWith(new ExtendedSequenceNumber("100", 0L))
-        )).isTrue();
-
-        assertThat(new ShardCheckpoint("", "", new StartingPoint(TRIM_HORIZON)).isBeforeOrAt(
-                recordWith(new ExtendedSequenceNumber("100", 0L))
-        )).isTrue();
-
-        assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "10", 1L).isBeforeOrAt(
-                recordWith(new ExtendedSequenceNumber("100", 0L))
-        )).isTrue();
-
-        assertThat(checkpoint(AT_SEQUENCE_NUMBER, "100", 0L).isBeforeOrAt(
-                recordWith(new ExtendedSequenceNumber("100", 0L))
-        )).isTrue();
-
-        assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "100", 0L).isBeforeOrAt(
-                recordWith(new ExtendedSequenceNumber("100", 0L))
-        )).isFalse();
-
-        assertThat(checkpoint(AT_SEQUENCE_NUMBER, "100", 1L).isBeforeOrAt(
-                recordWith(new ExtendedSequenceNumber("100", 0L))
-        )).isFalse();
-
-        assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "100", 0L).isBeforeOrAt(
-                recordWith(new ExtendedSequenceNumber("99", 1L))
-        )).isFalse();
-    }
-
-    @Test
-    public void testComparisonWithTimestamp() {
-        DateTime referenceTimestamp = DateTime.now();
-
-        assertThat(checkpoint(AT_TIMESTAMP, referenceTimestamp.toInstant())
-                .isBeforeOrAt(recordWith(referenceTimestamp.minusMillis(10).toInstant()))
-        ).isFalse();
-
-        assertThat(checkpoint(AT_TIMESTAMP, referenceTimestamp.toInstant())
-                .isBeforeOrAt(recordWith(referenceTimestamp.toInstant()))
-        ).isTrue();
-
-        assertThat(checkpoint(AT_TIMESTAMP, referenceTimestamp.toInstant())
-                .isBeforeOrAt(recordWith(referenceTimestamp.plusMillis(10).toInstant()))
-        ).isTrue();
-    }
-
-    private KinesisRecord recordWith(ExtendedSequenceNumber extendedSequenceNumber) {
-        KinesisRecord record = mock(KinesisRecord.class);
-        given(record.getExtendedSequenceNumber()).willReturn(extendedSequenceNumber);
-        return record;
-    }
-
-    private ShardCheckpoint checkpoint(ShardIteratorType iteratorType, String sequenceNumber,
-                                       Long subSequenceNumber) {
-        return new ShardCheckpoint(STREAM_NAME, SHARD_ID, iteratorType, sequenceNumber,
-                subSequenceNumber);
-    }
-
-    private KinesisRecord recordWith(Instant approximateArrivalTimestamp) {
-        KinesisRecord record = mock(KinesisRecord.class);
-        given(record.getApproximateArrivalTimestamp()).willReturn(approximateArrivalTimestamp);
-        return record;
-    }
-
-    private ShardCheckpoint checkpoint(ShardIteratorType iteratorType, Instant timestamp) {
-        return new ShardCheckpoint(STREAM_NAME, SHARD_ID, iteratorType, timestamp);
-    }
+
+  private static final String AT_SEQUENCE_SHARD_IT = "AT_SEQUENCE_SHARD_IT";
+  private static final String AFTER_SEQUENCE_SHARD_IT = "AFTER_SEQUENCE_SHARD_IT";
+  private static final String STREAM_NAME = "STREAM";
+  private static final String SHARD_ID = "SHARD_ID";
+  @Mock
+  private SimplifiedKinesisClient client;
+
+  @Before
+  public void setUp() throws IOException, TransientKinesisException {
+    when(client.getShardIterator(
+        eq(STREAM_NAME), eq(SHARD_ID), eq(AT_SEQUENCE_NUMBER),
+        anyString(), isNull(Instant.class))).
+        thenReturn(AT_SEQUENCE_SHARD_IT);
+    when(client.getShardIterator(
+        eq(STREAM_NAME), eq(SHARD_ID), eq(AFTER_SEQUENCE_NUMBER),
+        anyString(), isNull(Instant.class))).
+        thenReturn(AFTER_SEQUENCE_SHARD_IT);
+  }
+
+  @Test
+  public void testProvidingShardIterator() throws IOException, TransientKinesisException {
+    assertThat(checkpoint(AT_SEQUENCE_NUMBER, "100", null).getShardIterator(client))
+        .isEqualTo(AT_SEQUENCE_SHARD_IT);
+    assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "100", null).getShardIterator(client))
+        .isEqualTo(AFTER_SEQUENCE_SHARD_IT);
+    assertThat(checkpoint(AT_SEQUENCE_NUMBER, "100", 10L).getShardIterator(client)).isEqualTo
+        (AT_SEQUENCE_SHARD_IT);
+    assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "100", 10L).getShardIterator(client))
+        .isEqualTo(AT_SEQUENCE_SHARD_IT);
+  }
+
+  @Test
+  public void testComparisonWithExtendedSequenceNumber() {
+    assertThat(new ShardCheckpoint("", "", new StartingPoint(LATEST)).isBeforeOrAt(
+        recordWith(new ExtendedSequenceNumber("100", 0L))
+    )).isTrue();
+
+    assertThat(new ShardCheckpoint("", "", new StartingPoint(TRIM_HORIZON)).isBeforeOrAt(
+        recordWith(new ExtendedSequenceNumber("100", 0L))
+    )).isTrue();
+
+    assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "10", 1L).isBeforeOrAt(
+        recordWith(new ExtendedSequenceNumber("100", 0L))
+    )).isTrue();
+
+    assertThat(checkpoint(AT_SEQUENCE_NUMBER, "100", 0L).isBeforeOrAt(
+        recordWith(new ExtendedSequenceNumber("100", 0L))
+    )).isTrue();
+
+    assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "100", 0L).isBeforeOrAt(
+        recordWith(new ExtendedSequenceNumber("100", 0L))
+    )).isFalse();
+
+    assertThat(checkpoint(AT_SEQUENCE_NUMBER, "100", 1L).isBeforeOrAt(
+        recordWith(new ExtendedSequenceNumber("100", 0L))
+    )).isFalse();
+
+    assertThat(checkpoint(AFTER_SEQUENCE_NUMBER, "100", 0L).isBeforeOrAt(
+        recordWith(new ExtendedSequenceNumber("99", 1L))
+    )).isFalse();
+  }
+
+  @Test
+  public void testComparisonWithTimestamp() {
+    DateTime referenceTimestamp = DateTime.now();
+
+    assertThat(checkpoint(AT_TIMESTAMP, referenceTimestamp.toInstant())
+        .isBeforeOrAt(recordWith(referenceTimestamp.minusMillis(10).toInstant()))
+    ).isFalse();
+
+    assertThat(checkpoint(AT_TIMESTAMP, referenceTimestamp.toInstant())
+        .isBeforeOrAt(recordWith(referenceTimestamp.toInstant()))
+    ).isTrue();
+
+    assertThat(checkpoint(AT_TIMESTAMP, referenceTimestamp.toInstant())
+        .isBeforeOrAt(recordWith(referenceTimestamp.plusMillis(10).toInstant()))
+    ).isTrue();
+  }
+
+  private KinesisRecord recordWith(ExtendedSequenceNumber extendedSequenceNumber) {
+    KinesisRecord record = mock(KinesisRecord.class);
+    given(record.getExtendedSequenceNumber()).willReturn(extendedSequenceNumber);
+    return record;
+  }
+
+  private ShardCheckpoint checkpoint(ShardIteratorType iteratorType, String sequenceNumber,
+      Long subSequenceNumber) {
+    return new ShardCheckpoint(STREAM_NAME, SHARD_ID, iteratorType, sequenceNumber,
+        subSequenceNumber);
+  }
+
+  private KinesisRecord recordWith(Instant approximateArrivalTimestamp) {
+    KinesisRecord record = mock(KinesisRecord.class);
+    given(record.getApproximateArrivalTimestamp()).willReturn(approximateArrivalTimestamp);
+    return record;
+  }
+
+  private ShardCheckpoint checkpoint(ShardIteratorType iteratorType, Instant timestamp) {
+    return new ShardCheckpoint(STREAM_NAME, SHARD_ID, iteratorType, timestamp);
+  }
 }


[20/50] [abbrv] beam git commit: Reject stateful ParDo if coder not KvCoder with deterministic key coder

Posted by ta...@apache.org.
Reject stateful ParDo if coder not KvCoder with deterministic key coder


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f8974672
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f8974672
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f8974672

Branch: refs/heads/DSL_SQL
Commit: f89746722419ef3c60f92d7a0fa17e4e6247b265
Parents: 81a96ab
Author: Kenneth Knowles <kl...@google.com>
Authored: Wed Jul 5 17:24:25 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 .../org/apache/beam/sdk/transforms/ParDo.java   |  27 +++++
 .../apache/beam/sdk/transforms/ParDoTest.java   | 102 +++++++++++++++++++
 2 files changed, 129 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/f8974672/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ParDo.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ParDo.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ParDo.java
index db1f791..0d03835 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ParDo.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/ParDo.java
@@ -32,6 +32,7 @@ import org.apache.beam.sdk.PipelineRunner;
 import org.apache.beam.sdk.coders.CannotProvideCoderException;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.CoderRegistry;
+import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.state.StateSpec;
 import org.apache.beam.sdk.transforms.DoFn.WindowedContext;
 import org.apache.beam.sdk.transforms.display.DisplayData;
@@ -455,6 +456,27 @@ public class ParDo {
     }
   }
 
+  private static void validateStateApplicableForInput(
+      DoFn<?, ?> fn,
+      PCollection<?> input) {
+    Coder<?> inputCoder = input.getCoder();
+    checkArgument(
+        inputCoder instanceof KvCoder,
+        "%s requires its input to use %s in order to use state and timers.",
+        ParDo.class.getSimpleName(),
+        KvCoder.class.getSimpleName());
+
+    KvCoder<?, ?> kvCoder = (KvCoder<?, ?>) inputCoder;
+    try {
+        kvCoder.getKeyCoder().verifyDeterministic();
+    } catch (Coder.NonDeterministicException exc) {
+      throw new IllegalArgumentException(
+          String.format(
+              "%s requires a deterministic key coder in order to use state and timers",
+              ParDo.class.getSimpleName()));
+    }
+  }
+
   /**
    * Try to provide coders for as many of the type arguments of given
    * {@link DoFnSignature.StateDeclaration} as possible.
@@ -737,6 +759,11 @@ public class ParDo {
       // Use coder registry to determine coders for all StateSpec defined in the fn signature.
       finishSpecifyingStateSpecs(fn, input.getPipeline().getCoderRegistry(), input.getCoder());
 
+      DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
+      if (signature.usesState() || signature.usesTimers()) {
+        validateStateApplicableForInput(fn, input);
+      }
+
       PCollectionTuple outputs = PCollectionTuple.ofPrimitiveOutputsInternal(
           input.getPipeline(),
           TupleTagList.of(mainOutputTag).and(additionalOutputTags.getAll()),

http://git-wip-us.apache.org/repos/asf/beam/blob/f8974672/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoTest.java
index 5b60ef3..fa4949e 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/ParDoTest.java
@@ -1593,6 +1593,108 @@ public class ParDoTest implements Serializable {
   }
 
   @Test
+  public void testStateNotKeyed() {
+    final String stateId = "foo";
+
+    DoFn<String, Integer> fn =
+        new DoFn<String, Integer>() {
+
+          @StateId(stateId)
+          private final StateSpec<ValueState<Integer>> intState =
+              StateSpecs.value();
+
+          @ProcessElement
+          public void processElement(
+              ProcessContext c, @StateId(stateId) ValueState<Integer> state) {}
+        };
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("state");
+    thrown.expectMessage("KvCoder");
+
+    pipeline.apply(Create.of("hello", "goodbye", "hello again")).apply(ParDo.of(fn));
+  }
+
+  @Test
+  public void testStateNotDeterministic() {
+    final String stateId = "foo";
+
+    // DoubleCoder is not deterministic, so this should crash
+    DoFn<KV<Double, String>, Integer> fn =
+        new DoFn<KV<Double, String>, Integer>() {
+
+          @StateId(stateId)
+          private final StateSpec<ValueState<Integer>> intState =
+              StateSpecs.value();
+
+          @ProcessElement
+          public void processElement(
+              ProcessContext c, @StateId(stateId) ValueState<Integer> state) {}
+        };
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("state");
+    thrown.expectMessage("deterministic");
+
+    pipeline
+        .apply(Create.of(KV.of(1.0, "hello"), KV.of(5.4, "goodbye"), KV.of(7.2, "hello again")))
+        .apply(ParDo.of(fn));
+  }
+
+  @Test
+  public void testTimerNotKeyed() {
+    final String timerId = "foo";
+
+    DoFn<String, Integer> fn =
+        new DoFn<String, Integer>() {
+
+          @TimerId(timerId)
+          private final TimerSpec timer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
+
+          @ProcessElement
+          public void processElement(
+              ProcessContext c, @TimerId(timerId) Timer timer) {}
+
+          @OnTimer(timerId)
+          public void onTimer() {}
+        };
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("timer");
+    thrown.expectMessage("KvCoder");
+
+    pipeline.apply(Create.of("hello", "goodbye", "hello again")).apply(ParDo.of(fn));
+  }
+
+  @Test
+  public void testTimerNotDeterministic() {
+    final String timerId = "foo";
+
+    // DoubleCoder is not deterministic, so this should crash
+    DoFn<KV<Double, String>, Integer> fn =
+        new DoFn<KV<Double, String>, Integer>() {
+
+          @TimerId(timerId)
+          private final TimerSpec timer = TimerSpecs.timer(TimeDomain.EVENT_TIME);
+
+          @ProcessElement
+          public void processElement(
+              ProcessContext c, @TimerId(timerId) Timer timer) {}
+
+          @OnTimer(timerId)
+          public void onTimer() {}
+        };
+
+    thrown.expect(IllegalArgumentException.class);
+    thrown.expectMessage("timer");
+    thrown.expectMessage("deterministic");
+
+    pipeline
+        .apply(Create.of(KV.of(1.0, "hello"), KV.of(5.4, "goodbye"), KV.of(7.2, "hello again")))
+        .apply(ParDo.of(fn));
+  }
+
+  @Test
   @Category({ValidatesRunner.class, UsesStatefulParDo.class})
   public void testValueStateCoderInference() {
     final String stateId = "foo";


[47/50] [abbrv] beam git commit: Reformatting Kinesis IO to comply with official code style

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIteratorTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIteratorTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIteratorTest.java
index 49e806d..4b2190f 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIteratorTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIteratorTest.java
@@ -25,8 +25,10 @@ import static org.mockito.Matchers.anyListOf;
 import static org.mockito.Mockito.when;
 
 import com.amazonaws.services.kinesis.model.ExpiredIteratorException;
+
 import java.io.IOException;
 import java.util.Collections;
+
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -40,112 +42,114 @@ import org.mockito.stubbing.Answer;
  */
 @RunWith(MockitoJUnitRunner.class)
 public class ShardRecordsIteratorTest {
-    private static final String INITIAL_ITERATOR = "INITIAL_ITERATOR";
-    private static final String SECOND_ITERATOR = "SECOND_ITERATOR";
-    private static final String SECOND_REFRESHED_ITERATOR = "SECOND_REFRESHED_ITERATOR";
-    private static final String THIRD_ITERATOR = "THIRD_ITERATOR";
-    private static final String STREAM_NAME = "STREAM_NAME";
-    private static final String SHARD_ID = "SHARD_ID";
-
-    @Mock
-    private SimplifiedKinesisClient kinesisClient;
-    @Mock
-    private ShardCheckpoint firstCheckpoint, aCheckpoint, bCheckpoint, cCheckpoint, dCheckpoint;
-    @Mock
-    private GetKinesisRecordsResult firstResult, secondResult, thirdResult;
-    @Mock
-    private KinesisRecord a, b, c, d;
-    @Mock
-    private RecordFilter recordFilter;
-
-    private ShardRecordsIterator iterator;
-
-    @Before
-    public void setUp() throws IOException, TransientKinesisException {
-        when(firstCheckpoint.getShardIterator(kinesisClient)).thenReturn(INITIAL_ITERATOR);
-        when(firstCheckpoint.getStreamName()).thenReturn(STREAM_NAME);
-        when(firstCheckpoint.getShardId()).thenReturn(SHARD_ID);
-
-        when(firstCheckpoint.moveAfter(a)).thenReturn(aCheckpoint);
-        when(aCheckpoint.moveAfter(b)).thenReturn(bCheckpoint);
-        when(aCheckpoint.getStreamName()).thenReturn(STREAM_NAME);
-        when(aCheckpoint.getShardId()).thenReturn(SHARD_ID);
-        when(bCheckpoint.moveAfter(c)).thenReturn(cCheckpoint);
-        when(bCheckpoint.getStreamName()).thenReturn(STREAM_NAME);
-        when(bCheckpoint.getShardId()).thenReturn(SHARD_ID);
-        when(cCheckpoint.moveAfter(d)).thenReturn(dCheckpoint);
-        when(cCheckpoint.getStreamName()).thenReturn(STREAM_NAME);
-        when(cCheckpoint.getShardId()).thenReturn(SHARD_ID);
-        when(dCheckpoint.getStreamName()).thenReturn(STREAM_NAME);
-        when(dCheckpoint.getShardId()).thenReturn(SHARD_ID);
-
-        when(kinesisClient.getRecords(INITIAL_ITERATOR, STREAM_NAME, SHARD_ID))
-                .thenReturn(firstResult);
-        when(kinesisClient.getRecords(SECOND_ITERATOR, STREAM_NAME, SHARD_ID))
-                .thenReturn(secondResult);
-        when(kinesisClient.getRecords(THIRD_ITERATOR, STREAM_NAME, SHARD_ID))
-                .thenReturn(thirdResult);
-
-        when(firstResult.getNextShardIterator()).thenReturn(SECOND_ITERATOR);
-        when(secondResult.getNextShardIterator()).thenReturn(THIRD_ITERATOR);
-        when(thirdResult.getNextShardIterator()).thenReturn(THIRD_ITERATOR);
-
-        when(firstResult.getRecords()).thenReturn(Collections.<KinesisRecord>emptyList());
-        when(secondResult.getRecords()).thenReturn(Collections.<KinesisRecord>emptyList());
-        when(thirdResult.getRecords()).thenReturn(Collections.<KinesisRecord>emptyList());
-
-        when(recordFilter.apply(anyListOf(KinesisRecord.class), any(ShardCheckpoint
-                .class))).thenAnswer(new IdentityAnswer());
-
-        iterator = new ShardRecordsIterator(firstCheckpoint, kinesisClient, recordFilter);
-    }
-
-    @Test
-    public void returnsAbsentIfNoRecordsPresent() throws IOException, TransientKinesisException {
-        assertThat(iterator.next()).isEqualTo(CustomOptional.absent());
-        assertThat(iterator.next()).isEqualTo(CustomOptional.absent());
-        assertThat(iterator.next()).isEqualTo(CustomOptional.absent());
-    }
-
-    @Test
-    public void goesThroughAvailableRecords() throws IOException, TransientKinesisException {
-        when(firstResult.getRecords()).thenReturn(asList(a, b, c));
-        when(secondResult.getRecords()).thenReturn(singletonList(d));
-
-        assertThat(iterator.getCheckpoint()).isEqualTo(firstCheckpoint);
-        assertThat(iterator.next()).isEqualTo(CustomOptional.of(a));
-        assertThat(iterator.getCheckpoint()).isEqualTo(aCheckpoint);
-        assertThat(iterator.next()).isEqualTo(CustomOptional.of(b));
-        assertThat(iterator.getCheckpoint()).isEqualTo(bCheckpoint);
-        assertThat(iterator.next()).isEqualTo(CustomOptional.of(c));
-        assertThat(iterator.getCheckpoint()).isEqualTo(cCheckpoint);
-        assertThat(iterator.next()).isEqualTo(CustomOptional.of(d));
-        assertThat(iterator.getCheckpoint()).isEqualTo(dCheckpoint);
-        assertThat(iterator.next()).isEqualTo(CustomOptional.absent());
-        assertThat(iterator.getCheckpoint()).isEqualTo(dCheckpoint);
-    }
-
-    @Test
-    public void refreshesExpiredIterator() throws IOException, TransientKinesisException {
-        when(firstResult.getRecords()).thenReturn(singletonList(a));
-        when(secondResult.getRecords()).thenReturn(singletonList(b));
-
-        when(kinesisClient.getRecords(SECOND_ITERATOR, STREAM_NAME, SHARD_ID))
-                .thenThrow(ExpiredIteratorException.class);
-        when(aCheckpoint.getShardIterator(kinesisClient))
-                .thenReturn(SECOND_REFRESHED_ITERATOR);
-        when(kinesisClient.getRecords(SECOND_REFRESHED_ITERATOR, STREAM_NAME, SHARD_ID))
-                .thenReturn(secondResult);
-
-        assertThat(iterator.next()).isEqualTo(CustomOptional.of(a));
-        assertThat(iterator.next()).isEqualTo(CustomOptional.of(b));
-        assertThat(iterator.next()).isEqualTo(CustomOptional.absent());
-    }
 
-    private static class IdentityAnswer implements Answer<Object> {
-        @Override
-        public Object answer(InvocationOnMock invocation) throws Throwable {
-            return invocation.getArguments()[0];
-        }
+  private static final String INITIAL_ITERATOR = "INITIAL_ITERATOR";
+  private static final String SECOND_ITERATOR = "SECOND_ITERATOR";
+  private static final String SECOND_REFRESHED_ITERATOR = "SECOND_REFRESHED_ITERATOR";
+  private static final String THIRD_ITERATOR = "THIRD_ITERATOR";
+  private static final String STREAM_NAME = "STREAM_NAME";
+  private static final String SHARD_ID = "SHARD_ID";
+
+  @Mock
+  private SimplifiedKinesisClient kinesisClient;
+  @Mock
+  private ShardCheckpoint firstCheckpoint, aCheckpoint, bCheckpoint, cCheckpoint, dCheckpoint;
+  @Mock
+  private GetKinesisRecordsResult firstResult, secondResult, thirdResult;
+  @Mock
+  private KinesisRecord a, b, c, d;
+  @Mock
+  private RecordFilter recordFilter;
+
+  private ShardRecordsIterator iterator;
+
+  @Before
+  public void setUp() throws IOException, TransientKinesisException {
+    when(firstCheckpoint.getShardIterator(kinesisClient)).thenReturn(INITIAL_ITERATOR);
+    when(firstCheckpoint.getStreamName()).thenReturn(STREAM_NAME);
+    when(firstCheckpoint.getShardId()).thenReturn(SHARD_ID);
+
+    when(firstCheckpoint.moveAfter(a)).thenReturn(aCheckpoint);
+    when(aCheckpoint.moveAfter(b)).thenReturn(bCheckpoint);
+    when(aCheckpoint.getStreamName()).thenReturn(STREAM_NAME);
+    when(aCheckpoint.getShardId()).thenReturn(SHARD_ID);
+    when(bCheckpoint.moveAfter(c)).thenReturn(cCheckpoint);
+    when(bCheckpoint.getStreamName()).thenReturn(STREAM_NAME);
+    when(bCheckpoint.getShardId()).thenReturn(SHARD_ID);
+    when(cCheckpoint.moveAfter(d)).thenReturn(dCheckpoint);
+    when(cCheckpoint.getStreamName()).thenReturn(STREAM_NAME);
+    when(cCheckpoint.getShardId()).thenReturn(SHARD_ID);
+    when(dCheckpoint.getStreamName()).thenReturn(STREAM_NAME);
+    when(dCheckpoint.getShardId()).thenReturn(SHARD_ID);
+
+    when(kinesisClient.getRecords(INITIAL_ITERATOR, STREAM_NAME, SHARD_ID))
+        .thenReturn(firstResult);
+    when(kinesisClient.getRecords(SECOND_ITERATOR, STREAM_NAME, SHARD_ID))
+        .thenReturn(secondResult);
+    when(kinesisClient.getRecords(THIRD_ITERATOR, STREAM_NAME, SHARD_ID))
+        .thenReturn(thirdResult);
+
+    when(firstResult.getNextShardIterator()).thenReturn(SECOND_ITERATOR);
+    when(secondResult.getNextShardIterator()).thenReturn(THIRD_ITERATOR);
+    when(thirdResult.getNextShardIterator()).thenReturn(THIRD_ITERATOR);
+
+    when(firstResult.getRecords()).thenReturn(Collections.<KinesisRecord>emptyList());
+    when(secondResult.getRecords()).thenReturn(Collections.<KinesisRecord>emptyList());
+    when(thirdResult.getRecords()).thenReturn(Collections.<KinesisRecord>emptyList());
+
+    when(recordFilter.apply(anyListOf(KinesisRecord.class), any(ShardCheckpoint
+        .class))).thenAnswer(new IdentityAnswer());
+
+    iterator = new ShardRecordsIterator(firstCheckpoint, kinesisClient, recordFilter);
+  }
+
+  @Test
+  public void returnsAbsentIfNoRecordsPresent() throws IOException, TransientKinesisException {
+    assertThat(iterator.next()).isEqualTo(CustomOptional.absent());
+    assertThat(iterator.next()).isEqualTo(CustomOptional.absent());
+    assertThat(iterator.next()).isEqualTo(CustomOptional.absent());
+  }
+
+  @Test
+  public void goesThroughAvailableRecords() throws IOException, TransientKinesisException {
+    when(firstResult.getRecords()).thenReturn(asList(a, b, c));
+    when(secondResult.getRecords()).thenReturn(singletonList(d));
+
+    assertThat(iterator.getCheckpoint()).isEqualTo(firstCheckpoint);
+    assertThat(iterator.next()).isEqualTo(CustomOptional.of(a));
+    assertThat(iterator.getCheckpoint()).isEqualTo(aCheckpoint);
+    assertThat(iterator.next()).isEqualTo(CustomOptional.of(b));
+    assertThat(iterator.getCheckpoint()).isEqualTo(bCheckpoint);
+    assertThat(iterator.next()).isEqualTo(CustomOptional.of(c));
+    assertThat(iterator.getCheckpoint()).isEqualTo(cCheckpoint);
+    assertThat(iterator.next()).isEqualTo(CustomOptional.of(d));
+    assertThat(iterator.getCheckpoint()).isEqualTo(dCheckpoint);
+    assertThat(iterator.next()).isEqualTo(CustomOptional.absent());
+    assertThat(iterator.getCheckpoint()).isEqualTo(dCheckpoint);
+  }
+
+  @Test
+  public void refreshesExpiredIterator() throws IOException, TransientKinesisException {
+    when(firstResult.getRecords()).thenReturn(singletonList(a));
+    when(secondResult.getRecords()).thenReturn(singletonList(b));
+
+    when(kinesisClient.getRecords(SECOND_ITERATOR, STREAM_NAME, SHARD_ID))
+        .thenThrow(ExpiredIteratorException.class);
+    when(aCheckpoint.getShardIterator(kinesisClient))
+        .thenReturn(SECOND_REFRESHED_ITERATOR);
+    when(kinesisClient.getRecords(SECOND_REFRESHED_ITERATOR, STREAM_NAME, SHARD_ID))
+        .thenReturn(secondResult);
+
+    assertThat(iterator.next()).isEqualTo(CustomOptional.of(a));
+    assertThat(iterator.next()).isEqualTo(CustomOptional.of(b));
+    assertThat(iterator.next()).isEqualTo(CustomOptional.absent());
+  }
+
+  private static class IdentityAnswer implements Answer<Object> {
+
+    @Override
+    public Object answer(InvocationOnMock invocation) throws Throwable {
+      return invocation.getArguments()[0];
     }
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java
index 96434fd..2f8757c 100644
--- a/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java
+++ b/sdks/java/io/kinesis/src/test/java/org/apache/beam/sdk/io/kinesis/SimplifiedKinesisClientTest.java
@@ -34,7 +34,9 @@ import com.amazonaws.services.kinesis.model.ProvisionedThroughputExceededExcepti
 import com.amazonaws.services.kinesis.model.Shard;
 import com.amazonaws.services.kinesis.model.ShardIteratorType;
 import com.amazonaws.services.kinesis.model.StreamDescription;
+
 import java.util.List;
+
 import org.joda.time.Instant;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -46,179 +48,180 @@ import org.mockito.runners.MockitoJUnitRunner;
  */
 @RunWith(MockitoJUnitRunner.class)
 public class SimplifiedKinesisClientTest {
-    private static final String STREAM = "stream";
-    private static final String SHARD_1 = "shard-01";
-    private static final String SHARD_2 = "shard-02";
-    private static final String SHARD_3 = "shard-03";
-    private static final String SHARD_ITERATOR = "iterator";
-    private static final String SEQUENCE_NUMBER = "abc123";
-
-    @Mock
-    private AmazonKinesis kinesis;
-    @InjectMocks
-    private SimplifiedKinesisClient underTest;
-
-    @Test
-    public void shouldReturnIteratorStartingWithSequenceNumber() throws Exception {
-        given(kinesis.getShardIterator(new GetShardIteratorRequest()
-                .withStreamName(STREAM)
-                .withShardId(SHARD_1)
-                .withShardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER)
-                .withStartingSequenceNumber(SEQUENCE_NUMBER)
-        )).willReturn(new GetShardIteratorResult()
-                .withShardIterator(SHARD_ITERATOR));
-
-        String stream = underTest.getShardIterator(STREAM, SHARD_1,
-                ShardIteratorType.AT_SEQUENCE_NUMBER, SEQUENCE_NUMBER, null);
-
-        assertThat(stream).isEqualTo(SHARD_ITERATOR);
-    }
-
-    @Test
-    public void shouldReturnIteratorStartingWithTimestamp() throws Exception {
-        Instant timestamp = Instant.now();
-        given(kinesis.getShardIterator(new GetShardIteratorRequest()
-                .withStreamName(STREAM)
-                .withShardId(SHARD_1)
-                .withShardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER)
-                .withTimestamp(timestamp.toDate())
-        )).willReturn(new GetShardIteratorResult()
-                .withShardIterator(SHARD_ITERATOR));
-
-        String stream = underTest.getShardIterator(STREAM, SHARD_1,
-                ShardIteratorType.AT_SEQUENCE_NUMBER, null, timestamp);
-
-        assertThat(stream).isEqualTo(SHARD_ITERATOR);
-    }
-
-    @Test
-    public void shouldHandleExpiredIterationExceptionForGetShardIterator() {
-        shouldHandleGetShardIteratorError(new ExpiredIteratorException(""),
-                ExpiredIteratorException.class);
-    }
-
-    @Test
-    public void shouldHandleLimitExceededExceptionForGetShardIterator() {
-        shouldHandleGetShardIteratorError(new LimitExceededException(""),
-                TransientKinesisException.class);
-    }
-
-    @Test
-    public void shouldHandleProvisionedThroughputExceededExceptionForGetShardIterator() {
-        shouldHandleGetShardIteratorError(new ProvisionedThroughputExceededException(""),
-                TransientKinesisException.class);
-    }
-
-    @Test
-    public void shouldHandleServiceErrorForGetShardIterator() {
-        shouldHandleGetShardIteratorError(newAmazonServiceException(ErrorType.Service),
-                TransientKinesisException.class);
-    }
-
-    @Test
-    public void shouldHandleClientErrorForGetShardIterator() {
-        shouldHandleGetShardIteratorError(newAmazonServiceException(ErrorType.Client),
-                RuntimeException.class);
-    }
-
-    @Test
-    public void shouldHandleUnexpectedExceptionForGetShardIterator() {
-        shouldHandleGetShardIteratorError(new NullPointerException(),
-                RuntimeException.class);
-    }
-
-    private void shouldHandleGetShardIteratorError(
-            Exception thrownException,
-            Class<? extends Exception> expectedExceptionClass) {
-        GetShardIteratorRequest request = new GetShardIteratorRequest()
-                .withStreamName(STREAM)
-                .withShardId(SHARD_1)
-                .withShardIteratorType(ShardIteratorType.LATEST);
-
-        given(kinesis.getShardIterator(request)).willThrow(thrownException);
-
-        try {
-            underTest.getShardIterator(STREAM, SHARD_1, ShardIteratorType.LATEST, null, null);
-            failBecauseExceptionWasNotThrown(expectedExceptionClass);
-        } catch (Exception e) {
-            assertThat(e).isExactlyInstanceOf(expectedExceptionClass);
-        } finally {
-            reset(kinesis);
-        }
-    }
-
-    @Test
-    public void shouldListAllShards() throws Exception {
-        Shard shard1 = new Shard().withShardId(SHARD_1);
-        Shard shard2 = new Shard().withShardId(SHARD_2);
-        Shard shard3 = new Shard().withShardId(SHARD_3);
-        given(kinesis.describeStream(STREAM, null)).willReturn(new DescribeStreamResult()
-                .withStreamDescription(new StreamDescription()
-                        .withShards(shard1, shard2)
-                        .withHasMoreShards(true)));
-        given(kinesis.describeStream(STREAM, SHARD_2)).willReturn(new DescribeStreamResult()
-                .withStreamDescription(new StreamDescription()
-                        .withShards(shard3)
-                        .withHasMoreShards(false)));
-
-        List<Shard> shards = underTest.listShards(STREAM);
-
-        assertThat(shards).containsOnly(shard1, shard2, shard3);
-    }
-
-    @Test
-    public void shouldHandleExpiredIterationExceptionForShardListing() {
-        shouldHandleShardListingError(new ExpiredIteratorException(""),
-                ExpiredIteratorException.class);
-    }
-
-    @Test
-    public void shouldHandleLimitExceededExceptionForShardListing() {
-        shouldHandleShardListingError(new LimitExceededException(""),
-                TransientKinesisException.class);
-    }
-
-    @Test
-    public void shouldHandleProvisionedThroughputExceededExceptionForShardListing() {
-        shouldHandleShardListingError(new ProvisionedThroughputExceededException(""),
-                TransientKinesisException.class);
-    }
 
-    @Test
-    public void shouldHandleServiceErrorForShardListing() {
-        shouldHandleShardListingError(newAmazonServiceException(ErrorType.Service),
-                TransientKinesisException.class);
-    }
-
-    @Test
-    public void shouldHandleClientErrorForShardListing() {
-        shouldHandleShardListingError(newAmazonServiceException(ErrorType.Client),
-                RuntimeException.class);
-    }
-
-    @Test
-    public void shouldHandleUnexpectedExceptionForShardListing() {
-        shouldHandleShardListingError(new NullPointerException(),
-                RuntimeException.class);
-    }
-
-    private void shouldHandleShardListingError(
-            Exception thrownException,
-            Class<? extends Exception> expectedExceptionClass) {
-        given(kinesis.describeStream(STREAM, null)).willThrow(thrownException);
-        try {
-            underTest.listShards(STREAM);
-            failBecauseExceptionWasNotThrown(expectedExceptionClass);
-        } catch (Exception e) {
-            assertThat(e).isExactlyInstanceOf(expectedExceptionClass);
-        } finally {
-            reset(kinesis);
-        }
-    }
-
-    private AmazonServiceException newAmazonServiceException(ErrorType errorType) {
-        AmazonServiceException exception = new AmazonServiceException("");
-        exception.setErrorType(errorType);
-        return exception;
-    }
+  private static final String STREAM = "stream";
+  private static final String SHARD_1 = "shard-01";
+  private static final String SHARD_2 = "shard-02";
+  private static final String SHARD_3 = "shard-03";
+  private static final String SHARD_ITERATOR = "iterator";
+  private static final String SEQUENCE_NUMBER = "abc123";
+
+  @Mock
+  private AmazonKinesis kinesis;
+  @InjectMocks
+  private SimplifiedKinesisClient underTest;
+
+  @Test
+  public void shouldReturnIteratorStartingWithSequenceNumber() throws Exception {
+    given(kinesis.getShardIterator(new GetShardIteratorRequest()
+        .withStreamName(STREAM)
+        .withShardId(SHARD_1)
+        .withShardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER)
+        .withStartingSequenceNumber(SEQUENCE_NUMBER)
+    )).willReturn(new GetShardIteratorResult()
+        .withShardIterator(SHARD_ITERATOR));
+
+    String stream = underTest.getShardIterator(STREAM, SHARD_1,
+        ShardIteratorType.AT_SEQUENCE_NUMBER, SEQUENCE_NUMBER, null);
+
+    assertThat(stream).isEqualTo(SHARD_ITERATOR);
+  }
+
+  @Test
+  public void shouldReturnIteratorStartingWithTimestamp() throws Exception {
+    Instant timestamp = Instant.now();
+    given(kinesis.getShardIterator(new GetShardIteratorRequest()
+        .withStreamName(STREAM)
+        .withShardId(SHARD_1)
+        .withShardIteratorType(ShardIteratorType.AT_SEQUENCE_NUMBER)
+        .withTimestamp(timestamp.toDate())
+    )).willReturn(new GetShardIteratorResult()
+        .withShardIterator(SHARD_ITERATOR));
+
+    String stream = underTest.getShardIterator(STREAM, SHARD_1,
+        ShardIteratorType.AT_SEQUENCE_NUMBER, null, timestamp);
+
+    assertThat(stream).isEqualTo(SHARD_ITERATOR);
+  }
+
+  @Test
+  public void shouldHandleExpiredIterationExceptionForGetShardIterator() {
+    shouldHandleGetShardIteratorError(new ExpiredIteratorException(""),
+        ExpiredIteratorException.class);
+  }
+
+  @Test
+  public void shouldHandleLimitExceededExceptionForGetShardIterator() {
+    shouldHandleGetShardIteratorError(new LimitExceededException(""),
+        TransientKinesisException.class);
+  }
+
+  @Test
+  public void shouldHandleProvisionedThroughputExceededExceptionForGetShardIterator() {
+    shouldHandleGetShardIteratorError(new ProvisionedThroughputExceededException(""),
+        TransientKinesisException.class);
+  }
+
+  @Test
+  public void shouldHandleServiceErrorForGetShardIterator() {
+    shouldHandleGetShardIteratorError(newAmazonServiceException(ErrorType.Service),
+        TransientKinesisException.class);
+  }
+
+  @Test
+  public void shouldHandleClientErrorForGetShardIterator() {
+    shouldHandleGetShardIteratorError(newAmazonServiceException(ErrorType.Client),
+        RuntimeException.class);
+  }
+
+  @Test
+  public void shouldHandleUnexpectedExceptionForGetShardIterator() {
+    shouldHandleGetShardIteratorError(new NullPointerException(),
+        RuntimeException.class);
+  }
+
+  private void shouldHandleGetShardIteratorError(
+      Exception thrownException,
+      Class<? extends Exception> expectedExceptionClass) {
+    GetShardIteratorRequest request = new GetShardIteratorRequest()
+        .withStreamName(STREAM)
+        .withShardId(SHARD_1)
+        .withShardIteratorType(ShardIteratorType.LATEST);
+
+    given(kinesis.getShardIterator(request)).willThrow(thrownException);
+
+    try {
+      underTest.getShardIterator(STREAM, SHARD_1, ShardIteratorType.LATEST, null, null);
+      failBecauseExceptionWasNotThrown(expectedExceptionClass);
+    } catch (Exception e) {
+      assertThat(e).isExactlyInstanceOf(expectedExceptionClass);
+    } finally {
+      reset(kinesis);
+    }
+  }
+
+  @Test
+  public void shouldListAllShards() throws Exception {
+    Shard shard1 = new Shard().withShardId(SHARD_1);
+    Shard shard2 = new Shard().withShardId(SHARD_2);
+    Shard shard3 = new Shard().withShardId(SHARD_3);
+    given(kinesis.describeStream(STREAM, null)).willReturn(new DescribeStreamResult()
+        .withStreamDescription(new StreamDescription()
+            .withShards(shard1, shard2)
+            .withHasMoreShards(true)));
+    given(kinesis.describeStream(STREAM, SHARD_2)).willReturn(new DescribeStreamResult()
+        .withStreamDescription(new StreamDescription()
+            .withShards(shard3)
+            .withHasMoreShards(false)));
+
+    List<Shard> shards = underTest.listShards(STREAM);
+
+    assertThat(shards).containsOnly(shard1, shard2, shard3);
+  }
+
+  @Test
+  public void shouldHandleExpiredIterationExceptionForShardListing() {
+    shouldHandleShardListingError(new ExpiredIteratorException(""),
+        ExpiredIteratorException.class);
+  }
+
+  @Test
+  public void shouldHandleLimitExceededExceptionForShardListing() {
+    shouldHandleShardListingError(new LimitExceededException(""),
+        TransientKinesisException.class);
+  }
+
+  @Test
+  public void shouldHandleProvisionedThroughputExceededExceptionForShardListing() {
+    shouldHandleShardListingError(new ProvisionedThroughputExceededException(""),
+        TransientKinesisException.class);
+  }
+
+  @Test
+  public void shouldHandleServiceErrorForShardListing() {
+    shouldHandleShardListingError(newAmazonServiceException(ErrorType.Service),
+        TransientKinesisException.class);
+  }
+
+  @Test
+  public void shouldHandleClientErrorForShardListing() {
+    shouldHandleShardListingError(newAmazonServiceException(ErrorType.Client),
+        RuntimeException.class);
+  }
+
+  @Test
+  public void shouldHandleUnexpectedExceptionForShardListing() {
+    shouldHandleShardListingError(new NullPointerException(),
+        RuntimeException.class);
+  }
+
+  private void shouldHandleShardListingError(
+      Exception thrownException,
+      Class<? extends Exception> expectedExceptionClass) {
+    given(kinesis.describeStream(STREAM, null)).willThrow(thrownException);
+    try {
+      underTest.listShards(STREAM);
+      failBecauseExceptionWasNotThrown(expectedExceptionClass);
+    } catch (Exception e) {
+      assertThat(e).isExactlyInstanceOf(expectedExceptionClass);
+    } finally {
+      reset(kinesis);
+    }
+  }
+
+  private AmazonServiceException newAmazonServiceException(ErrorType errorType) {
+    AmazonServiceException exception = new AmazonServiceException("");
+    exception.setErrorType(errorType);
+    return exception;
+  }
 }


[27/50] [abbrv] beam git commit: Port DirectRunner ParDo overrides to SDK-agnostic APIs

Posted by ta...@apache.org.
Port DirectRunner ParDo overrides to SDK-agnostic APIs


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/16d4a154
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/16d4a154
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/16d4a154

Branch: refs/heads/DSL_SQL
Commit: 16d4a154d8667dd1ebdf4993e816c680f4c982e6
Parents: e5ca058
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Jun 8 13:44:52 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:01 2017 -0700

----------------------------------------------------------------------
 .../core/construction/ParDoTranslation.java     | 16 ++++++---
 .../construction/RunnerPCollectionView.java     | 16 +++++++++
 .../direct/ParDoMultiOverrideFactory.java       | 35 +++++++++-----------
 3 files changed, 43 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/16d4a154/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
index fe8c5aa..90c9aad 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
@@ -19,6 +19,7 @@
 package org.apache.beam.runners.core.construction;
 
 import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkState;
 import static org.apache.beam.runners.core.construction.PTransformTranslation.PAR_DO_TRANSFORM_URN;
 
@@ -262,12 +263,19 @@ public class ParDoTranslation {
     ParDoPayload payload = parDoProto.getSpec().getParameter().unpack(ParDoPayload.class);
 
     List<PCollectionView<?>> views = new ArrayList<>();
-    for (Map.Entry<String, SideInput> sideInput : payload.getSideInputsMap().entrySet()) {
+    for (Map.Entry<String, SideInput> sideInputEntry : payload.getSideInputsMap().entrySet()) {
+      String sideInputTag = sideInputEntry.getKey();
+      RunnerApi.SideInput sideInput = sideInputEntry.getValue();
+      PCollection<?> originalPCollection =
+          checkNotNull(
+              (PCollection<?>) application.getInputs().get(new TupleTag<>(sideInputTag)),
+              "no input with tag %s",
+              sideInputTag);
       views.add(
           viewFromProto(
-              application.getPipeline(),
-              sideInput.getValue(),
-              sideInput.getKey(),
+              sideInput,
+              sideInputTag,
+              originalPCollection,
               parDoProto,
               sdkComponents.toComponents()));
     }

http://git-wip-us.apache.org/repos/asf/beam/blob/16d4a154/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/RunnerPCollectionView.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/RunnerPCollectionView.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/RunnerPCollectionView.java
index b275188..85139e8 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/RunnerPCollectionView.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/RunnerPCollectionView.java
@@ -19,6 +19,7 @@
 package org.apache.beam.runners.core.construction;
 
 import java.util.Map;
+import java.util.Objects;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.common.runner.v1.RunnerApi.SideInput;
@@ -94,4 +95,19 @@ class RunnerPCollectionView<T> extends PValueBase implements PCollectionView<T>
     throw new UnsupportedOperationException(String.format(
         "A %s cannot be expanded", RunnerPCollectionView.class.getSimpleName()));
   }
+
+  @Override
+  public boolean equals(Object other) {
+    if (!(other instanceof PCollectionView)) {
+      return false;
+    }
+    @SuppressWarnings("unchecked")
+    PCollectionView<?> otherView = (PCollectionView<?>) other;
+    return tag.equals(otherView.getTagInternal());
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(tag);
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/16d4a154/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoMultiOverrideFactory.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoMultiOverrideFactory.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoMultiOverrideFactory.java
index 8881967..891d102 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoMultiOverrideFactory.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoMultiOverrideFactory.java
@@ -38,7 +38,6 @@ import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.GroupByKey;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.ParDo.MultiOutput;
 import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
 import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
 import org.apache.beam.sdk.transforms.windowing.AfterPane;
@@ -73,9 +72,14 @@ class ParDoMultiOverrideFactory<InputT, OutputT>
                   PCollection<? extends InputT>, PCollectionTuple,
                   PTransform<PCollection<? extends InputT>, PCollectionTuple>>
               application) {
-    return PTransformReplacement.of(
-        PTransformReplacements.getSingletonMainInput(application),
-        getReplacementForApplication(application));
+
+    try {
+      return PTransformReplacement.of(
+          PTransformReplacements.getSingletonMainInput(application),
+          getReplacementForApplication(application));
+    } catch (IOException exc) {
+      throw new RuntimeException(exc);
+    }
   }
 
   @SuppressWarnings("unchecked")
@@ -83,31 +87,22 @@ class ParDoMultiOverrideFactory<InputT, OutputT>
       AppliedPTransform<
               PCollection<? extends InputT>, PCollectionTuple,
               PTransform<PCollection<? extends InputT>, PCollectionTuple>>
-          application) {
+          application)
+      throws IOException {
 
-    DoFn<InputT, OutputT> fn;
-    try {
-      fn = (DoFn<InputT, OutputT>) ParDoTranslation.getDoFn(application);
-    } catch (IOException exc) {
-      throw new RuntimeException(exc);
-    }
+    DoFn<InputT, OutputT> fn = (DoFn<InputT, OutputT>) ParDoTranslation.getDoFn(application);
 
     DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
+
     if (signature.processElement().isSplittable()) {
       return (PTransform) SplittableParDo.forAppliedParDo(application);
     } else if (signature.stateDeclarations().size() > 0
         || signature.timerDeclarations().size() > 0) {
-
-      MultiOutput<InputT, OutputT> transform =
-          (MultiOutput<InputT, OutputT>) application.getTransform();
-
-      // Based on the fact that the signature is stateful, DoFnSignatures ensures
-      // that it is also keyed
       return new GbkThenStatefulParDo(
           fn,
-          transform.getMainOutputTag(),
-          transform.getAdditionalOutputTags(),
-          transform.getSideInputs());
+          ParDoTranslation.getMainOutputTag(application),
+          ParDoTranslation.getAdditionalOutputTags(application),
+          ParDoTranslation.getSideInputs(application));
     } else {
       return application.getTransform();
     }


[35/50] [abbrv] beam git commit: Adds DynamicDestinations support to FileBasedSink

Posted by ta...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DynamicFileDestinations.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DynamicFileDestinations.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DynamicFileDestinations.java
new file mode 100644
index 0000000..e7ef0f6
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DynamicFileDestinations.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.sdk.io;
+
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.io.DefaultFilenamePolicy.Params;
+import org.apache.beam.sdk.io.DefaultFilenamePolicy.ParamsCoder;
+import org.apache.beam.sdk.io.FileBasedSink.DynamicDestinations;
+import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.display.DisplayData;
+
+/** Some helper classes that derive from {@link FileBasedSink.DynamicDestinations}. */
+public class DynamicFileDestinations {
+  /** Always returns a constant {@link FilenamePolicy}. */
+  private static class ConstantFilenamePolicy<T> extends DynamicDestinations<T, Void> {
+    private final FilenamePolicy filenamePolicy;
+
+    public ConstantFilenamePolicy(FilenamePolicy filenamePolicy) {
+      this.filenamePolicy = filenamePolicy;
+    }
+
+    @Override
+    public Void getDestination(T element) {
+      return (Void) null;
+    }
+
+    @Override
+    public Coder<Void> getDestinationCoder() {
+      return null;
+    }
+
+    @Override
+    public Void getDefaultDestination() {
+      return (Void) null;
+    }
+
+    @Override
+    public FilenamePolicy getFilenamePolicy(Void destination) {
+      return filenamePolicy;
+    }
+
+    @Override
+    public void populateDisplayData(DisplayData.Builder builder) {
+      filenamePolicy.populateDisplayData(builder);
+    }
+  }
+
+  /**
+   * A base class for a {@link DynamicDestinations} object that returns differently-configured
+   * instances of {@link DefaultFilenamePolicy}.
+   */
+  private static class DefaultPolicyDestinations<UserT> extends DynamicDestinations<UserT, Params> {
+    SerializableFunction<UserT, Params> destinationFunction;
+    Params emptyDestination;
+
+    public DefaultPolicyDestinations(
+        SerializableFunction<UserT, Params> destinationFunction, Params emptyDestination) {
+      this.destinationFunction = destinationFunction;
+      this.emptyDestination = emptyDestination;
+    }
+
+    @Override
+    public Params getDestination(UserT element) {
+      return destinationFunction.apply(element);
+    }
+
+    @Override
+    public Params getDefaultDestination() {
+      return emptyDestination;
+    }
+
+    @Nullable
+    @Override
+    public Coder<Params> getDestinationCoder() {
+      return ParamsCoder.of();
+    }
+
+    @Override
+    public FilenamePolicy getFilenamePolicy(DefaultFilenamePolicy.Params params) {
+      return DefaultFilenamePolicy.fromParams(params);
+    }
+  }
+
+  /** Returns a {@link DynamicDestinations} that always returns the same {@link FilenamePolicy}. */
+  public static <T> DynamicDestinations<T, Void> constant(FilenamePolicy filenamePolicy) {
+    return new ConstantFilenamePolicy<>(filenamePolicy);
+  }
+
+  /**
+   * Returns a {@link DynamicDestinations} that returns instances of {@link DefaultFilenamePolicy}
+   * configured with the given {@link Params}.
+   */
+  public static <UserT> DynamicDestinations<UserT, Params> toDefaultPolicies(
+      SerializableFunction<UserT, Params> destinationFunction, Params emptyDestination) {
+    return new DefaultPolicyDestinations<>(destinationFunction, emptyDestination);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java
index 8102316..583af60 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/FileBasedSink.java
@@ -33,6 +33,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
+import java.lang.reflect.TypeVariable;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
 import java.util.ArrayList;
@@ -49,8 +50,10 @@ import java.util.zip.GZIPOutputStream;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.annotations.Experimental;
 import org.apache.beam.sdk.annotations.Experimental.Kind;
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.CoderRegistry;
 import org.apache.beam.sdk.coders.NullableCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.StructuredCoder;
@@ -73,6 +76,7 @@ import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.transforms.windowing.PaneInfo;
 import org.apache.beam.sdk.transforms.windowing.PaneInfo.PaneInfoCoder;
 import org.apache.beam.sdk.util.MimeTypes;
+import org.apache.beam.sdk.values.TypeDescriptor;
 import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
 import org.apache.commons.compress.compressors.deflate.DeflateCompressorOutputStream;
 import org.joda.time.Instant;
@@ -82,43 +86,43 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 /**
- * Abstract class for file-based output. An implementation of FileBasedSink writes file-based
- * output and defines the format of output files (how values are written, headers/footers, MIME
- * type, etc.).
+ * Abstract class for file-based output. An implementation of FileBasedSink writes file-based output
+ * and defines the format of output files (how values are written, headers/footers, MIME type,
+ * etc.).
  *
  * <p>At pipeline construction time, the methods of FileBasedSink are called to validate the sink
  * and to create a {@link WriteOperation} that manages the process of writing to the sink.
  *
  * <p>The process of writing to file-based sink is as follows:
+ *
  * <ol>
- * <li>An optional subclass-defined initialization,
- * <li>a parallel write of bundles to temporary files, and finally,
- * <li>these temporary files are renamed with final output filenames.
+ *   <li>An optional subclass-defined initialization,
+ *   <li>a parallel write of bundles to temporary files, and finally,
+ *   <li>these temporary files are renamed with final output filenames.
  * </ol>
  *
  * <p>In order to ensure fault-tolerance, a bundle may be executed multiple times (e.g., in the
  * event of failure/retry or for redundancy). However, exactly one of these executions will have its
- * result passed to the finalize method. Each call to {@link Writer#openWindowed}
- * or {@link Writer#openUnwindowed} is passed a unique <i>bundle id</i> when it is called
- * by the WriteFiles transform, so even redundant or retried bundles will have a unique way of
- * identifying
- * their output.
+ * result passed to the finalize method. Each call to {@link Writer#openWindowed} or {@link
+ * Writer#openUnwindowed} is passed a unique <i>bundle id</i> when it is called by the WriteFiles
+ * transform, so even redundant or retried bundles will have a unique way of identifying their
+ * output.
  *
  * <p>The bundle id should be used to guarantee that a bundle's output is unique. This uniqueness
  * guarantee is important; if a bundle is to be output to a file, for example, the name of the file
  * will encode the unique bundle id to avoid conflicts with other writers.
  *
- * {@link FileBasedSink} can take a custom {@link FilenamePolicy} object to determine output
- * filenames, and this policy object can be used to write windowed or triggered
- * PCollections into separate files per window pane. This allows file output from unbounded
- * PCollections, and also works for bounded PCollecctions.
+ * <p>{@link FileBasedSink} can take a custom {@link FilenamePolicy} object to determine output
+ * filenames, and this policy object can be used to write windowed or triggered PCollections into
+ * separate files per window pane. This allows file output from unbounded PCollections, and also
+ * works for bounded PCollecctions.
  *
  * <p>Supported file systems are those registered with {@link FileSystems}.
  *
- * @param <T> the type of values written to the sink.
+ * @param <OutputT> the type of values written to the sink.
  */
 @Experimental(Kind.FILESYSTEM)
-public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
+public abstract class FileBasedSink<OutputT, DestinationT> implements Serializable, HasDisplayData {
   private static final Logger LOG = LoggerFactory.getLogger(FileBasedSink.class);
 
   /**
@@ -173,7 +177,7 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
     }
 
     @Override
-    public String getFilenameSuffix() {
+    public String getSuggestedFilenameSuffix() {
       return filenameSuffix;
     }
 
@@ -205,6 +209,8 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
     }
   }
 
+  private final DynamicDestinations<?, DestinationT> dynamicDestinations;
+
   /**
    * The {@link WritableByteChannelFactory} that is used to wrap the raw data output to the
    * underlying channel. The default is to not compress the output using
@@ -213,8 +219,70 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
   private final WritableByteChannelFactory writableByteChannelFactory;
 
   /**
-   * A naming policy for output files.
+   * A class that allows value-dependent writes in {@link FileBasedSink}.
+   *
+   * <p>Users can define a custom type to represent destinations, and provide a mapping to turn this
+   * destination type into an instance of {@link FilenamePolicy}.
    */
+  @Experimental(Kind.FILESYSTEM)
+  public abstract static class DynamicDestinations<UserT, DestinationT>
+      implements HasDisplayData, Serializable {
+    /**
+     * Returns an object that represents at a high level the destination being written to. May not
+     * return null.
+     */
+    public abstract DestinationT getDestination(UserT element);
+
+    /**
+     * Returns the default destination. This is used for collections that have no elements as the
+     * destination to write empty files to.
+     */
+    public abstract DestinationT getDefaultDestination();
+
+    /**
+     * Returns the coder for {@link DestinationT}. If this is not overridden, then the coder
+     * registry will be use to find a suitable coder. This must be a deterministic coder, as {@link
+     * DestinationT} will be used as a key type in a {@link
+     * org.apache.beam.sdk.transforms.GroupByKey}.
+     */
+    @Nullable
+    public Coder<DestinationT> getDestinationCoder() {
+      return null;
+    }
+
+    /** Converts a destination into a {@link FilenamePolicy}. May not return null. */
+    public abstract FilenamePolicy getFilenamePolicy(DestinationT destination);
+
+    /** Populates the display data. */
+    @Override
+    public void populateDisplayData(DisplayData.Builder builder) {}
+
+    // Gets the destination coder. If the user does not provide one, try to find one in the coder
+    // registry. If no coder can be found, throws CannotProvideCoderException.
+    final Coder<DestinationT> getDestinationCoderWithDefault(CoderRegistry registry)
+        throws CannotProvideCoderException {
+      Coder<DestinationT> destinationCoder = getDestinationCoder();
+      if (destinationCoder != null) {
+        return destinationCoder;
+      }
+      // If dynamicDestinations doesn't provide a coder, try to find it in the coder registry.
+      // We must first use reflection to figure out what the type parameter is.
+      TypeDescriptor<?> superDescriptor =
+          TypeDescriptor.of(getClass()).getSupertype(DynamicDestinations.class);
+      if (!superDescriptor.getRawType().equals(DynamicDestinations.class)) {
+        throw new AssertionError(
+            "Couldn't find the DynamicDestinations superclass of " + this.getClass());
+      }
+      TypeVariable typeVariable = superDescriptor.getTypeParameter("DestinationT");
+      @SuppressWarnings("unchecked")
+      TypeDescriptor<DestinationT> descriptor =
+          (TypeDescriptor<DestinationT>) superDescriptor.resolveType(typeVariable);
+      return registry.getCoder(descriptor);
+    }
+  }
+
+  /** A naming policy for output files. */
+  @Experimental(Kind.FILESYSTEM)
   public abstract static class FilenamePolicy implements Serializable {
     /**
      * Context used for generating a name based on shard number, and num shards.
@@ -287,29 +355,28 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
     /**
      * When a sink has requested windowed or triggered output, this method will be invoked to return
      * the file {@link ResourceId resource} to be created given the base output directory and a
-     * (possibly empty) extension from {@link FileBasedSink} configuration
-     * (e.g., {@link CompressionType}).
+     * {@link OutputFileHints} containing information about the file, including a suggested
+     * extension (e.g. coming from {@link CompressionType}).
      *
-     * <p>The {@link WindowedContext} object gives access to the window and pane,
-     * as well as sharding information. The policy must return unique and consistent filenames
-     * for different windows and panes.
+     * <p>The {@link WindowedContext} object gives access to the window and pane, as well as
+     * sharding information. The policy must return unique and consistent filenames for different
+     * windows and panes.
      */
     @Experimental(Kind.FILESYSTEM)
-    public abstract ResourceId windowedFilename(
-        ResourceId outputDirectory, WindowedContext c, String extension);
+    public abstract ResourceId windowedFilename(WindowedContext c, OutputFileHints outputFileHints);
 
     /**
      * When a sink has not requested windowed or triggered output, this method will be invoked to
      * return the file {@link ResourceId resource} to be created given the base output directory and
-     * a (possibly empty) extension applied by additional {@link FileBasedSink} configuration
-     * (e.g., {@link CompressionType}).
+     * a {@link OutputFileHints} containing information about the file, including a suggested (e.g.
+     * coming from {@link CompressionType}).
      *
      * <p>The {@link Context} object only provides sharding information, which is used by the policy
      * to generate unique and consistent filenames.
      */
     @Experimental(Kind.FILESYSTEM)
-    @Nullable public abstract ResourceId unwindowedFilename(
-        ResourceId outputDirectory, Context c, String extension);
+    @Nullable
+    public abstract ResourceId unwindowedFilename(Context c, OutputFileHints outputFileHints);
 
     /**
      * Populates the display data.
@@ -318,19 +385,8 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
     }
   }
 
-  /** The policy used to generate names of files to be produced. */
-  private final FilenamePolicy filenamePolicy;
   /** The directory to which files will be written. */
-  private final ValueProvider<ResourceId> baseOutputDirectoryProvider;
-
-  /**
-   * Construct a {@link FileBasedSink} with the given filename policy, producing uncompressed files.
-   */
-  @Experimental(Kind.FILESYSTEM)
-  public FileBasedSink(
-      ValueProvider<ResourceId> baseOutputDirectoryProvider, FilenamePolicy filenamePolicy) {
-    this(baseOutputDirectoryProvider, filenamePolicy, CompressionType.UNCOMPRESSED);
-  }
+  private final ValueProvider<ResourceId> tempDirectoryProvider;
 
   private static class ExtractDirectory implements SerializableFunction<ResourceId, ResourceId> {
     @Override
@@ -340,95 +396,91 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
   }
 
   /**
-   * Construct a {@link FileBasedSink} with the given filename policy and output channel type.
+   * Construct a {@link FileBasedSink} with the given temp directory, producing uncompressed files.
    */
   @Experimental(Kind.FILESYSTEM)
   public FileBasedSink(
-      ValueProvider<ResourceId> baseOutputDirectoryProvider,
-      FilenamePolicy filenamePolicy,
+      ValueProvider<ResourceId> tempDirectoryProvider,
+      DynamicDestinations<?, DestinationT> dynamicDestinations) {
+    this(tempDirectoryProvider, dynamicDestinations, CompressionType.UNCOMPRESSED);
+  }
+
+  /** Construct a {@link FileBasedSink} with the given temp directory and output channel type. */
+  @Experimental(Kind.FILESYSTEM)
+  public FileBasedSink(
+      ValueProvider<ResourceId> tempDirectoryProvider,
+      DynamicDestinations<?, DestinationT> dynamicDestinations,
       WritableByteChannelFactory writableByteChannelFactory) {
-    this.baseOutputDirectoryProvider =
-        NestedValueProvider.of(baseOutputDirectoryProvider, new ExtractDirectory());
-    this.filenamePolicy = filenamePolicy;
+    this.tempDirectoryProvider =
+        NestedValueProvider.of(tempDirectoryProvider, new ExtractDirectory());
+    this.dynamicDestinations = checkNotNull(dynamicDestinations);
     this.writableByteChannelFactory = writableByteChannelFactory;
   }
 
-  /**
-   * Returns the base directory inside which files will be written according to the configured
-   * {@link FilenamePolicy}.
-   */
-  @Experimental(Kind.FILESYSTEM)
-  public ValueProvider<ResourceId> getBaseOutputDirectoryProvider() {
-    return baseOutputDirectoryProvider;
+  /** Return the {@link DynamicDestinations} used. */
+  @SuppressWarnings("unchecked")
+  public <UserT> DynamicDestinations<UserT, DestinationT> getDynamicDestinations() {
+    return (DynamicDestinations<UserT, DestinationT>) dynamicDestinations;
   }
 
   /**
-   * Returns the policy by which files will be named inside of the base output directory. Note that
-   * the {@link FilenamePolicy} may itself specify one or more inner directories before each output
-   * file, say when writing windowed outputs in a {@code output/YYYY/MM/DD/file.txt} format.
+   * Returns the directory inside which temprary files will be written according to the configured
+   * {@link FilenamePolicy}.
    */
   @Experimental(Kind.FILESYSTEM)
-  public final FilenamePolicy getFilenamePolicy() {
-    return filenamePolicy;
+  public ValueProvider<ResourceId> getTempDirectoryProvider() {
+    return tempDirectoryProvider;
   }
 
   public void validate(PipelineOptions options) {}
 
-  /**
-   * Return a subclass of {@link WriteOperation} that will manage the write
-   * to the sink.
-   */
-  public abstract WriteOperation<T> createWriteOperation();
+  /** Return a subclass of {@link WriteOperation} that will manage the write to the sink. */
+  public abstract WriteOperation<OutputT, DestinationT> createWriteOperation();
 
   public void populateDisplayData(DisplayData.Builder builder) {
-    getFilenamePolicy().populateDisplayData(builder);
+    getDynamicDestinations().populateDisplayData(builder);
   }
 
   /**
    * Abstract operation that manages the process of writing to {@link FileBasedSink}.
    *
-   * <p>The primary responsibilities of the WriteOperation is the management of output
-   * files. During a write, {@link Writer}s write bundles to temporary file
-   * locations. After the bundles have been written,
+   * <p>The primary responsibilities of the WriteOperation is the management of output files. During
+   * a write, {@link Writer}s write bundles to temporary file locations. After the bundles have been
+   * written,
+   *
    * <ol>
-   * <li>{@link WriteOperation#finalize} is given a list of the temporary
-   * files containing the output bundles.
-   * <li>During finalize, these temporary files are copied to final output locations and named
-   * according to a file naming template.
-   * <li>Finally, any temporary files that were created during the write are removed.
+   *   <li>{@link WriteOperation#finalize} is given a list of the temporary files containing the
+   *       output bundles.
+   *   <li>During finalize, these temporary files are copied to final output locations and named
+   *       according to a file naming template.
+   *   <li>Finally, any temporary files that were created during the write are removed.
    * </ol>
    *
-   * <p>Subclass implementations of WriteOperation must implement
-   * {@link WriteOperation#createWriter} to return a concrete
-   * FileBasedSinkWriter.
+   * <p>Subclass implementations of WriteOperation must implement {@link
+   * WriteOperation#createWriter} to return a concrete FileBasedSinkWriter.
    *
-   * <h2>Temporary and Output File Naming:</h2> During the write, bundles are written to temporary
-   * files using the tempDirectory that can be provided via the constructor of
-   * WriteOperation. These temporary files will be named
-   * {@code {tempDirectory}/{bundleId}}, where bundleId is the unique id of the bundle.
-   * For example, if tempDirectory is "gs://my-bucket/my_temp_output", the output for a
-   * bundle with bundle id 15723 will be "gs://my-bucket/my_temp_output/15723".
+   * <h2>Temporary and Output File Naming:</h2>
    *
-   * <p>Final output files are written to baseOutputFilename with the format
-   * {@code {baseOutputFilename}-0000i-of-0000n.{extension}} where n is the total number of bundles
-   * written and extension is the file extension. Both baseOutputFilename and extension are required
-   * constructor arguments.
+   * <p>During the write, bundles are written to temporary files using the tempDirectory that can be
+   * provided via the constructor of WriteOperation. These temporary files will be named {@code
+   * {tempDirectory}/{bundleId}}, where bundleId is the unique id of the bundle. For example, if
+   * tempDirectory is "gs://my-bucket/my_temp_output", the output for a bundle with bundle id 15723
+   * will be "gs://my-bucket/my_temp_output/15723".
    *
-   * <p>Subclass implementations can change the file naming template by supplying a value for
-   * fileNamingTemplate.
+   * <p>Final output files are written to the location specified by the {@link FilenamePolicy}. If
+   * no filename policy is specified, then the {@link DefaultFilenamePolicy} will be used. The
+   * directory that the files are written to is determined by the {@link FilenamePolicy} instance.
    *
    * <p>Note that in the case of permanent failure of a bundle's write, no clean up of temporary
    * files will occur.
    *
    * <p>If there are no elements in the PCollection being written, no output will be generated.
    *
-   * @param <T> the type of values written to the sink.
+   * @param <OutputT> the type of values written to the sink.
    */
-  public abstract static class WriteOperation<T> implements Serializable {
-    /**
-     * The Sink that this WriteOperation will write to.
-     */
-    protected final FileBasedSink<T> sink;
+  public abstract static class WriteOperation<OutputT, DestinationT> implements Serializable {
+    /** The Sink that this WriteOperation will write to. */
+    protected final FileBasedSink<OutputT, DestinationT> sink;
 
     /** Directory for temporary output files. */
     protected final ValueProvider<ResourceId> tempDirectory;
@@ -445,17 +497,19 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
     }
 
     /**
-     * Constructs a WriteOperation using the default strategy for generating a temporary
-     * directory from the base output filename.
+     * Constructs a WriteOperation using the default strategy for generating a temporary directory
+     * from the base output filename.
      *
-     * <p>Default is a uniquely named sibling of baseOutputFilename, e.g. if baseOutputFilename is
-     * /path/to/foo, the temporary directory will be /path/to/temp-beam-foo-$date.
+     * <p>Default is a uniquely named subdirectory of the provided tempDirectory, e.g. if
+     * tempDirectory is /path/to/foo/, the temporary directory will be
+     * /path/to/foo/temp-beam-foo-$date.
      *
      * @param sink the FileBasedSink that will be used to configure this write operation.
      */
-    public WriteOperation(FileBasedSink<T> sink) {
-      this(sink, NestedValueProvider.of(
-          sink.getBaseOutputDirectoryProvider(), new TemporaryDirectoryBuilder()));
+    public WriteOperation(FileBasedSink<OutputT, DestinationT> sink) {
+      this(
+          sink,
+          NestedValueProvider.of(sink.getTempDirectoryProvider(), new TemporaryDirectoryBuilder()));
     }
 
     private static class TemporaryDirectoryBuilder
@@ -471,10 +525,12 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
       private final Long tempId = TEMP_COUNT.getAndIncrement();
 
       @Override
-      public ResourceId apply(ResourceId baseOutputDirectory) {
+      public ResourceId apply(ResourceId tempDirectory) {
         // Temp directory has a timestamp and a unique ID
         String tempDirName = String.format(".temp-beam-%s-%s", timestamp, tempId);
-        return baseOutputDirectory.resolve(tempDirName, StandardResolveOptions.RESOLVE_DIRECTORY);
+        return tempDirectory
+            .getCurrentDirectory()
+            .resolve(tempDirName, StandardResolveOptions.RESOLVE_DIRECTORY);
       }
     }
 
@@ -485,22 +541,22 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
      * @param tempDirectory the base directory to be used for temporary output files.
      */
     @Experimental(Kind.FILESYSTEM)
-    public WriteOperation(FileBasedSink<T> sink, ResourceId tempDirectory) {
+    public WriteOperation(FileBasedSink<OutputT, DestinationT> sink, ResourceId tempDirectory) {
       this(sink, StaticValueProvider.of(tempDirectory));
     }
 
     private WriteOperation(
-        FileBasedSink<T> sink, ValueProvider<ResourceId> tempDirectory) {
+        FileBasedSink<OutputT, DestinationT> sink, ValueProvider<ResourceId> tempDirectory) {
       this.sink = sink;
       this.tempDirectory = tempDirectory;
       this.windowedWrites = false;
     }
 
     /**
-     * Clients must implement to return a subclass of {@link Writer}. This
-     * method must not mutate the state of the object.
+     * Clients must implement to return a subclass of {@link Writer}. This method must not mutate
+     * the state of the object.
      */
-    public abstract Writer<T> createWriter() throws Exception;
+    public abstract Writer<OutputT, DestinationT> createWriter() throws Exception;
 
     /**
      * Indicates that the operation will be performing windowed writes.
@@ -514,8 +570,8 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
      * removing temporary files.
      *
      * <p>Finalization may be overridden by subclass implementations to perform customized
-     * finalization (e.g., initiating some operation on output bundles, merging them, etc.).
-     * {@code writerResults} contains the filenames of written bundles.
+     * finalization (e.g., initiating some operation on output bundles, merging them, etc.). {@code
+     * writerResults} contains the filenames of written bundles.
      *
      * <p>If subclasses override this method, they must guarantee that its implementation is
      * idempotent, as it may be executed multiple times in the case of failure or for redundancy. It
@@ -523,7 +579,7 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
      *
      * @param writerResults the results of writes (FileResult).
      */
-    public void finalize(Iterable<FileResult> writerResults) throws Exception {
+    public void finalize(Iterable<FileResult<DestinationT>> writerResults) throws Exception {
       // Collect names of temporary files and rename them.
       Map<ResourceId, ResourceId> outputFilenames = buildOutputFilenames(writerResults);
       copyToOutputFiles(outputFilenames);
@@ -542,17 +598,14 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
 
     @Experimental(Kind.FILESYSTEM)
     protected final Map<ResourceId, ResourceId> buildOutputFilenames(
-        Iterable<FileResult> writerResults) {
+        Iterable<FileResult<DestinationT>> writerResults) {
       int numShards = Iterables.size(writerResults);
       Map<ResourceId, ResourceId> outputFilenames = new HashMap<>();
 
-      FilenamePolicy policy = getSink().getFilenamePolicy();
-      ResourceId baseOutputDir = getSink().getBaseOutputDirectoryProvider().get();
-
       // Either all results have a shard number set (if the sink is configured with a fixed
       // number of shards), or they all don't (otherwise).
       Boolean isShardNumberSetEverywhere = null;
-      for (FileResult result : writerResults) {
+      for (FileResult<DestinationT> result : writerResults) {
         boolean isShardNumberSetHere = (result.getShard() != UNKNOWN_SHARDNUM);
         if (isShardNumberSetEverywhere == null) {
           isShardNumberSetEverywhere = isShardNumberSetHere;
@@ -568,7 +621,7 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
         isShardNumberSetEverywhere = true;
       }
 
-      List<FileResult> resultsWithShardNumbers = Lists.newArrayList();
+      List<FileResult<DestinationT>> resultsWithShardNumbers = Lists.newArrayList();
       if (isShardNumberSetEverywhere) {
         resultsWithShardNumbers = Lists.newArrayList(writerResults);
       } else {
@@ -577,29 +630,32 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
         // case of triggers, the list of FileResult objects in the Finalize iterable is not
         // deterministic, and might change over retries. This breaks the assumption below that
         // sorting the FileResult objects provides idempotency.
-        List<FileResult> sortedByTempFilename =
+        List<FileResult<DestinationT>> sortedByTempFilename =
             Ordering.from(
-                new Comparator<FileResult>() {
-                  @Override
-                  public int compare(FileResult first, FileResult second) {
-                    String firstFilename = first.getTempFilename().toString();
-                    String secondFilename = second.getTempFilename().toString();
-                    return firstFilename.compareTo(secondFilename);
-                  }
-                })
+                    new Comparator<FileResult<DestinationT>>() {
+                      @Override
+                      public int compare(
+                          FileResult<DestinationT> first, FileResult<DestinationT> second) {
+                        String firstFilename = first.getTempFilename().toString();
+                        String secondFilename = second.getTempFilename().toString();
+                        return firstFilename.compareTo(secondFilename);
+                      }
+                    })
                 .sortedCopy(writerResults);
         for (int i = 0; i < sortedByTempFilename.size(); i++) {
           resultsWithShardNumbers.add(sortedByTempFilename.get(i).withShard(i));
         }
       }
 
-      for (FileResult result : resultsWithShardNumbers) {
+      for (FileResult<DestinationT> result : resultsWithShardNumbers) {
         checkArgument(
             result.getShard() != UNKNOWN_SHARDNUM, "Should have set shard number on %s", result);
         outputFilenames.put(
             result.getTempFilename(),
             result.getDestinationFile(
-                policy, baseOutputDir, numShards, getSink().getExtension()));
+                getSink().getDynamicDestinations(),
+                numShards,
+                getSink().getWritableByteChannelFactory()));
       }
 
       int numDistinctShards = new HashSet<>(outputFilenames.values()).size();
@@ -615,18 +671,18 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
      *
      * <p>Can be called from subclasses that override {@link WriteOperation#finalize}.
      *
-     * <p>Files will be named according to the file naming template. The order of the output files
-     * will be the same as the sorted order of the input filenames.  In other words, if the input
-     * filenames are ["C", "A", "B"], baseOutputFilename is "file", the extension is ".txt", and
-     * the fileNamingTemplate is "-SSS-of-NNN", the contents of A will be copied to
-     * file-000-of-003.txt, the contents of B will be copied to file-001-of-003.txt, etc.
+     * <p>Files will be named according to the {@link FilenamePolicy}. The order of the output files
+     * will be the same as the sorted order of the input filenames. In other words (when using
+     * {@link DefaultFilenamePolicy}), if the input filenames are ["C", "A", "B"], baseFilename (int
+     * the policy) is "dir/file", the extension is ".txt", and the fileNamingTemplate is
+     * "-SSS-of-NNN", the contents of A will be copied to dir/file-000-of-003.txt, the contents of B
+     * will be copied to dir/file-001-of-003.txt, etc.
      *
      * @param filenames the filenames of temporary files.
      */
     @VisibleForTesting
     @Experimental(Kind.FILESYSTEM)
-    final void copyToOutputFiles(Map<ResourceId, ResourceId> filenames)
-        throws IOException {
+    final void copyToOutputFiles(Map<ResourceId, ResourceId> filenames) throws IOException {
       int numFiles = filenames.size();
       if (numFiles > 0) {
         LOG.debug("Copying {} files.", numFiles);
@@ -698,10 +754,8 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
       }
     }
 
-    /**
-     * Returns the FileBasedSink for this write operation.
-     */
-    public FileBasedSink<T> getSink() {
+    /** Returns the FileBasedSink for this write operation. */
+    public FileBasedSink<OutputT, DestinationT> getSink() {
       return sink;
     }
 
@@ -719,33 +773,28 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
     }
   }
 
-  /** Returns the extension that will be written to the produced files. */
-  protected final String getExtension() {
-    String extension = MoreObjects.firstNonNull(writableByteChannelFactory.getFilenameSuffix(), "");
-    if (!extension.isEmpty() && !extension.startsWith(".")) {
-      extension = "." + extension;
-    }
-    return extension;
+  /** Returns the {@link WritableByteChannelFactory} used. */
+  protected final WritableByteChannelFactory getWritableByteChannelFactory() {
+    return writableByteChannelFactory;
   }
 
   /**
-   * Abstract writer that writes a bundle to a {@link FileBasedSink}. Subclass
-   * implementations provide a method that can write a single value to a
-   * {@link WritableByteChannel}.
+   * Abstract writer that writes a bundle to a {@link FileBasedSink}. Subclass implementations
+   * provide a method that can write a single value to a {@link WritableByteChannel}.
    *
    * <p>Subclass implementations may also override methods that write headers and footers before and
    * after the values in a bundle, respectively, as well as provide a MIME type for the output
    * channel.
    *
-   * <p>Multiple {@link Writer} instances may be created on the same worker, and therefore
-   * any access to static members or methods should be thread safe.
+   * <p>Multiple {@link Writer} instances may be created on the same worker, and therefore any
+   * access to static members or methods should be thread safe.
    *
-   * @param <T> the type of values to write.
+   * @param <OutputT> the type of values to write.
    */
-  public abstract static class Writer<T> {
+  public abstract static class Writer<OutputT, DestinationT> {
     private static final Logger LOG = LoggerFactory.getLogger(Writer.class);
 
-    private final WriteOperation<T> writeOperation;
+    private final WriteOperation<OutputT, DestinationT> writeOperation;
 
     /** Unique id for this output bundle. */
     private String id;
@@ -753,6 +802,7 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
     private BoundedWindow window;
     private PaneInfo paneInfo;
     private int shard = -1;
+    private DestinationT destination;
 
     /** The output file for this bundle. May be null if opening failed. */
     private @Nullable ResourceId outputFile;
@@ -772,10 +822,8 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
      */
     private final String mimeType;
 
-    /**
-     * Construct a new {@link Writer} that will produce files of the given MIME type.
-     */
-    public Writer(WriteOperation<T> writeOperation, String mimeType) {
+    /** Construct a new {@link Writer} that will produce files of the given MIME type. */
+    public Writer(WriteOperation<OutputT, DestinationT> writeOperation, String mimeType) {
       checkNotNull(writeOperation);
       this.writeOperation = writeOperation;
       this.mimeType = mimeType;
@@ -818,28 +866,29 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
      * id populated for the case of static sharding. In cases where the runner is dynamically
      * picking sharding, shard might be set to -1.
      */
-    public final void openWindowed(String uId, BoundedWindow window, PaneInfo paneInfo, int shard)
+    public final void openWindowed(
+        String uId, BoundedWindow window, PaneInfo paneInfo, int shard, DestinationT destination)
         throws Exception {
       if (!getWriteOperation().windowedWrites) {
         throw new IllegalStateException("openWindowed called a non-windowed sink.");
       }
-      open(uId, window, paneInfo, shard);
+      open(uId, window, paneInfo, shard, destination);
     }
 
     /**
      * Called for each value in the bundle.
      */
-    public abstract void write(T value) throws Exception;
+    public abstract void write(OutputT value) throws Exception;
 
     /**
-     * Similar to {@link #openWindowed} however for the case where unwindowed writes were
-     * requested.
+     * Similar to {@link #openWindowed} however for the case where unwindowed writes were requested.
      */
-    public final void openUnwindowed(String uId, int shard) throws Exception {
+    public final void openUnwindowed(String uId, int shard, DestinationT destination)
+        throws Exception {
       if (getWriteOperation().windowedWrites) {
         throw new IllegalStateException("openUnwindowed called a windowed sink.");
       }
-      open(uId, null, null, shard);
+      open(uId, null, null, shard, destination);
     }
 
     // Helper function to close a channel, on exception cases.
@@ -855,14 +904,18 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
       }
     }
 
-    private void open(String uId,
-                      @Nullable BoundedWindow window,
-                      @Nullable PaneInfo paneInfo,
-                      int shard) throws Exception {
+    private void open(
+        String uId,
+        @Nullable BoundedWindow window,
+        @Nullable PaneInfo paneInfo,
+        int shard,
+        DestinationT destination)
+        throws Exception {
       this.id = uId;
       this.window = window;
       this.paneInfo = paneInfo;
       this.shard = shard;
+      this.destination = destination;
       ResourceId tempDirectory = getWriteOperation().tempDirectory.get();
       outputFile = tempDirectory.resolve(id, StandardResolveOptions.RESOLVE_FILE);
       verifyNotNull(
@@ -908,7 +961,7 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
     }
 
     /** Closes the channel and returns the bundle result. */
-    public final FileResult close() throws Exception {
+    public final FileResult<DestinationT> close() throws Exception {
       checkState(outputFile != null, "FileResult.close cannot be called with a null outputFile");
 
       LOG.debug("Writing footer to {}.", outputFile);
@@ -938,35 +991,41 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
         throw new IOException(String.format("Failed closing channel to %s", outputFile), e);
       }
 
-      FileResult result = new FileResult(outputFile, shard, window, paneInfo);
+      FileResult<DestinationT> result =
+          new FileResult<>(outputFile, shard, window, paneInfo, destination);
       LOG.debug("Result for bundle {}: {}", this.id, outputFile);
       return result;
     }
 
-    /**
-     * Return the WriteOperation that this Writer belongs to.
-     */
-    public WriteOperation<T> getWriteOperation() {
+    /** Return the WriteOperation that this Writer belongs to. */
+    public WriteOperation<OutputT, DestinationT> getWriteOperation() {
       return writeOperation;
     }
   }
 
   /**
-   * Result of a single bundle write. Contains the filename produced by the bundle, and if known
-   * the final output filename.
+   * Result of a single bundle write. Contains the filename produced by the bundle, and if known the
+   * final output filename.
    */
-  public static final class FileResult {
+  public static final class FileResult<DestinationT> {
     private final ResourceId tempFilename;
     private final int shard;
     private final BoundedWindow window;
     private final PaneInfo paneInfo;
+    private final DestinationT destination;
 
     @Experimental(Kind.FILESYSTEM)
-    public FileResult(ResourceId tempFilename, int shard, BoundedWindow window, PaneInfo paneInfo) {
+    public FileResult(
+        ResourceId tempFilename,
+        int shard,
+        BoundedWindow window,
+        PaneInfo paneInfo,
+        DestinationT destination) {
       this.tempFilename = tempFilename;
       this.shard = shard;
       this.window = window;
       this.paneInfo = paneInfo;
+      this.destination = destination;
     }
 
     @Experimental(Kind.FILESYSTEM)
@@ -978,8 +1037,8 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
       return shard;
     }
 
-    public FileResult withShard(int shard) {
-      return new FileResult(tempFilename, shard, window, paneInfo);
+    public FileResult<DestinationT> withShard(int shard) {
+      return new FileResult<>(tempFilename, shard, window, paneInfo, destination);
     }
 
     public BoundedWindow getWindow() {
@@ -990,17 +1049,24 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
       return paneInfo;
     }
 
+    public DestinationT getDestination() {
+      return destination;
+    }
+
     @Experimental(Kind.FILESYSTEM)
-    public ResourceId getDestinationFile(FilenamePolicy policy, ResourceId outputDirectory,
-                                         int numShards, String extension) {
+    public ResourceId getDestinationFile(
+        DynamicDestinations<?, DestinationT> dynamicDestinations,
+        int numShards,
+        OutputFileHints outputFileHints) {
       checkArgument(getShard() != UNKNOWN_SHARDNUM);
       checkArgument(numShards > 0);
+      FilenamePolicy policy = dynamicDestinations.getFilenamePolicy(destination);
       if (getWindow() != null) {
-        return policy.windowedFilename(outputDirectory, new WindowedContext(
-            getWindow(), getPaneInfo(), getShard(), numShards), extension);
+        return policy.windowedFilename(
+            new WindowedContext(getWindow(), getPaneInfo(), getShard(), numShards),
+            outputFileHints);
       } else {
-        return policy.unwindowedFilename(outputDirectory, new Context(getShard(), numShards),
-            extension);
+        return policy.unwindowedFilename(new Context(getShard(), numShards), outputFileHints);
       }
     }
 
@@ -1014,22 +1080,24 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
     }
   }
 
-  /**
-   * A coder for {@link FileResult} objects.
-   */
-  public static final class FileResultCoder extends StructuredCoder<FileResult> {
+  /** A coder for {@link FileResult} objects. */
+  public static final class FileResultCoder<DestinationT>
+      extends StructuredCoder<FileResult<DestinationT>> {
     private static final Coder<String> FILENAME_CODER = StringUtf8Coder.of();
     private static final Coder<Integer> SHARD_CODER = VarIntCoder.of();
     private static final Coder<PaneInfo> PANE_INFO_CODER = NullableCoder.of(PaneInfoCoder.INSTANCE);
-
     private final Coder<BoundedWindow> windowCoder;
+    private final Coder<DestinationT> destinationCoder;
 
-    protected FileResultCoder(Coder<BoundedWindow> windowCoder) {
+    protected FileResultCoder(
+        Coder<BoundedWindow> windowCoder, Coder<DestinationT> destinationCoder) {
       this.windowCoder = NullableCoder.of(windowCoder);
+      this.destinationCoder = destinationCoder;
     }
 
-    public static FileResultCoder of(Coder<BoundedWindow> windowCoder) {
-      return new FileResultCoder(windowCoder);
+    public static <DestinationT> FileResultCoder<DestinationT> of(
+        Coder<BoundedWindow> windowCoder, Coder<DestinationT> destinationCoder) {
+      return new FileResultCoder<>(windowCoder, destinationCoder);
     }
 
     @Override
@@ -1038,8 +1106,7 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
     }
 
     @Override
-    public void encode(FileResult value, OutputStream outStream)
-        throws IOException {
+    public void encode(FileResult<DestinationT> value, OutputStream outStream) throws IOException {
       if (value == null) {
         throw new CoderException("cannot encode a null value");
       }
@@ -1047,17 +1114,22 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
       windowCoder.encode(value.getWindow(), outStream);
       PANE_INFO_CODER.encode(value.getPaneInfo(), outStream);
       SHARD_CODER.encode(value.getShard(), outStream);
+      destinationCoder.encode(value.getDestination(), outStream);
     }
 
     @Override
-    public FileResult decode(InputStream inStream)
-        throws IOException {
+    public FileResult<DestinationT> decode(InputStream inStream) throws IOException {
       String tempFilename = FILENAME_CODER.decode(inStream);
       BoundedWindow window = windowCoder.decode(inStream);
       PaneInfo paneInfo = PANE_INFO_CODER.decode(inStream);
       int shard = SHARD_CODER.decode(inStream);
-      return new FileResult(FileSystems.matchNewResource(tempFilename, false /* isDirectory */),
-          shard, window, paneInfo);
+      DestinationT destination = destinationCoder.decode(inStream);
+      return new FileResult<>(
+          FileSystems.matchNewResource(tempFilename, false /* isDirectory */),
+          shard,
+          window,
+          paneInfo,
+          destination);
     }
 
     @Override
@@ -1066,25 +1138,15 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
       windowCoder.verifyDeterministic();
       PANE_INFO_CODER.verifyDeterministic();
       SHARD_CODER.verifyDeterministic();
+      destinationCoder.verifyDeterministic();
     }
   }
 
   /**
-   * Implementations create instances of {@link WritableByteChannel} used by {@link FileBasedSink}
-   * and related classes to allow <em>decorating</em>, or otherwise transforming, the raw data that
-   * would normally be written directly to the {@link WritableByteChannel} passed into
-   * {@link WritableByteChannelFactory#create(WritableByteChannel)}.
-   *
-   * <p>Subclasses should override {@link #toString()} with something meaningful, as it is used when
-   * building {@link DisplayData}.
+   * Provides hints about how to generate output files, such as a suggested filename suffix (e.g.
+   * based on the compression type), and the file MIME type.
    */
-  public interface WritableByteChannelFactory extends Serializable {
-    /**
-     * @param channel the {@link WritableByteChannel} to wrap
-     * @return the {@link WritableByteChannel} to be used during output
-     */
-    WritableByteChannel create(WritableByteChannel channel) throws IOException;
-
+  public interface OutputFileHints extends Serializable {
     /**
      * Returns the MIME type that should be used for the files that will hold the output data. May
      * return {@code null} if this {@code WritableByteChannelFactory} does not meaningfully change
@@ -1101,6 +1163,23 @@ public abstract class FileBasedSink<T> implements Serializable, HasDisplayData {
      * @return an optional filename suffix, eg, ".gz" is returned by {@link CompressionType#GZIP}
      */
     @Nullable
-    String getFilenameSuffix();
+    String getSuggestedFilenameSuffix();
+  }
+
+  /**
+   * Implementations create instances of {@link WritableByteChannel} used by {@link FileBasedSink}
+   * and related classes to allow <em>decorating</em>, or otherwise transforming, the raw data that
+   * would normally be written directly to the {@link WritableByteChannel} passed into {@link
+   * WritableByteChannelFactory#create(WritableByteChannel)}.
+   *
+   * <p>Subclasses should override {@link #toString()} with something meaningful, as it is used when
+   * building {@link DisplayData}.
+   */
+  public interface WritableByteChannelFactory extends OutputFileHints {
+    /**
+     * @param channel the {@link WritableByteChannel} to wrap
+     * @return the {@link WritableByteChannel} to be used during output
+     */
+    WritableByteChannel create(WritableByteChannel channel) throws IOException;
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
index e288075..6e7b243 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TFRecordIO.java
@@ -45,6 +45,7 @@ import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.SerializableFunctions;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.util.MimeTypes;
 import org.apache.beam.sdk.values.PBegin;
@@ -355,12 +356,11 @@ public class TFRecordIO {
     public PDone expand(PCollection<byte[]> input) {
       checkState(getOutputPrefix() != null,
           "need to set the output prefix of a TFRecordIO.Write transform");
-      WriteFiles<byte[]> write = WriteFiles.to(
+      WriteFiles<byte[], Void, byte[]> write =
+          WriteFiles.<byte[], Void, byte[]>to(
               new TFRecordSink(
-                  getOutputPrefix(),
-                  getShardTemplate(),
-                  getFilenameSuffix(),
-                  getCompressionType()));
+                  getOutputPrefix(), getShardTemplate(), getFilenameSuffix(), getCompressionType()),
+              SerializableFunctions.<byte[]>identity());
       if (getNumShards() > 0) {
         write = write.withNumShards(getNumShards());
       }
@@ -546,20 +546,20 @@ public class TFRecordIO {
     }
   }
 
-  /**
-   * A {@link FileBasedSink} for TFRecord files. Produces TFRecord files.
-   */
+  /** A {@link FileBasedSink} for TFRecord files. Produces TFRecord files. */
   @VisibleForTesting
-  static class TFRecordSink extends FileBasedSink<byte[]> {
+  static class TFRecordSink extends FileBasedSink<byte[], Void> {
     @VisibleForTesting
-    TFRecordSink(ValueProvider<ResourceId> outputPrefix,
+    TFRecordSink(
+        ValueProvider<ResourceId> outputPrefix,
         @Nullable String shardTemplate,
         @Nullable String suffix,
         TFRecordIO.CompressionType compressionType) {
       super(
           outputPrefix,
-          DefaultFilenamePolicy.constructUsingStandardParameters(
-              outputPrefix, shardTemplate, suffix, false),
+          DynamicFileDestinations.constant(
+              DefaultFilenamePolicy.fromStandardParameters(
+                  outputPrefix, shardTemplate, suffix, false)),
           writableByteChannelFactory(compressionType));
     }
 
@@ -571,7 +571,7 @@ public class TFRecordIO {
     }
 
     @Override
-    public WriteOperation<byte[]> createWriteOperation() {
+    public WriteOperation<byte[], Void> createWriteOperation() {
       return new TFRecordWriteOperation(this);
     }
 
@@ -590,30 +590,24 @@ public class TFRecordIO {
       return CompressionType.UNCOMPRESSED;
     }
 
-    /**
-     * A {@link WriteOperation
-     * WriteOperation} for TFRecord files.
-     */
-    private static class TFRecordWriteOperation extends WriteOperation<byte[]> {
+    /** A {@link WriteOperation WriteOperation} for TFRecord files. */
+    private static class TFRecordWriteOperation extends WriteOperation<byte[], Void> {
       private TFRecordWriteOperation(TFRecordSink sink) {
         super(sink);
       }
 
       @Override
-      public Writer<byte[]> createWriter() throws Exception {
+      public Writer<byte[], Void> createWriter() throws Exception {
         return new TFRecordWriter(this);
       }
     }
 
-    /**
-     * A {@link Writer Writer}
-     * for TFRecord files.
-     */
-    private static class TFRecordWriter extends Writer<byte[]> {
+    /** A {@link Writer Writer} for TFRecord files. */
+    private static class TFRecordWriter extends Writer<byte[], Void> {
       private WritableByteChannel outChannel;
       private TFRecordCodec codec;
 
-      private TFRecordWriter(WriteOperation<byte[]> writeOperation) {
+      private TFRecordWriter(WriteOperation<byte[], Void> writeOperation) {
         super(writeOperation, MimeTypes.BINARY);
       }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
index f1eb7c0..5241589 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
@@ -22,12 +22,15 @@ import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkState;
 
 import com.google.auto.value.AutoValue;
+import com.google.common.annotations.VisibleForTesting;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.annotations.Experimental;
 import org.apache.beam.sdk.annotations.Experimental.Kind;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.VoidCoder;
+import org.apache.beam.sdk.io.DefaultFilenamePolicy.Params;
+import org.apache.beam.sdk.io.FileBasedSink.DynamicDestinations;
 import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
 import org.apache.beam.sdk.io.FileBasedSink.WritableByteChannelFactory;
 import org.apache.beam.sdk.io.Read.Bounded;
@@ -37,6 +40,7 @@ import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.SerializableFunctions;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.values.PBegin;
 import org.apache.beam.sdk.values.PCollection;
@@ -65,19 +69,8 @@ import org.apache.beam.sdk.values.PDone;
  * <p>To write a {@link PCollection} to one or more text files, use {@code TextIO.write()}, using
  * {@link TextIO.Write#to(String)} to specify the output prefix of the files to write.
  *
- * <p>By default, all input is put into the global window before writing. If per-window writes are
- * desired - for example, when using a streaming runner -
- * {@link TextIO.Write#withWindowedWrites()} will cause windowing and triggering to be
- * preserved. When producing windowed writes, the number of output shards must be set explicitly
- * using {@link TextIO.Write#withNumShards(int)}; some runners may set this for you to a
- * runner-chosen value, so you may need not set it yourself. A {@link FilenamePolicy} can also be
- * set in case you need better control over naming files created by unique windows.
- * {@link DefaultFilenamePolicy} policy for producing unique filenames might not be appropriate
- * for your use case.
- *
- * <p>Any existing files with the same names as generated output files will be overwritten.
- *
  * <p>For example:
+ *
  * <pre>{@code
  * // A simple Write to a local file (only runs locally):
  * PCollection<String> lines = ...;
@@ -85,10 +78,49 @@ import org.apache.beam.sdk.values.PDone;
  *
  * // Same as above, only with Gzip compression:
  * PCollection<String> lines = ...;
- * lines.apply(TextIO.write().to("/path/to/file.txt"));
+ * lines.apply(TextIO.write().to("/path/to/file.txt"))
  *      .withSuffix(".txt")
  *      .withWritableByteChannelFactory(FileBasedSink.CompressionType.GZIP));
  * }</pre>
+ *
+ * <p>By default, all input is put into the global window before writing. If per-window writes are
+ * desired - for example, when using a streaming runner - {@link TextIO.Write#withWindowedWrites()}
+ * will cause windowing and triggering to be preserved. When producing windowed writes with a
+ * streaming runner that supports triggers, the number of output shards must be set explicitly using
+ * {@link TextIO.Write#withNumShards(int)}; some runners may set this for you to a runner-chosen
+ * value, so you may need not set it yourself. If setting an explicit template using {@link
+ * TextIO.Write#withShardNameTemplate(String)}, make sure that the template contains placeholders
+ * for the window and the pane; W is expanded into the window text, and P into the pane; the default
+ * template will include both the window and the pane in the filename.
+ *
+ * <p>If you want better control over how filenames are generated than the default policy allows, a
+ * custom {@link FilenamePolicy} can also be set using {@link TextIO.Write#to(FilenamePolicy)}.
+ *
+ * <p>TextIO also supports dynamic, value-dependent file destinations. The most general form of this
+ * is done via {@link TextIO.Write#to(DynamicDestinations)}. A {@link DynamicDestinations} class
+ * allows you to convert any input value into a custom destination object, and map that destination
+ * object to a {@link FilenamePolicy}. This allows using different filename policies (or more
+ * commonly, differently-configured instances of the same policy) based on the input record. Often
+ * this is used in conjunction with {@link TextIO#writeCustomType(SerializableFunction)}, which
+ * allows your {@link DynamicDestinations} object to examine the input type and takes a format
+ * function to convert that type to a string for writing.
+ *
+ * <p>A convenience shortcut is provided for the case where the default naming policy is used, but
+ * different configurations of this policy are wanted based on the input record. Default naming
+ * policies can be configured using the {@link DefaultFilenamePolicy.Params} object.
+ *
+ * <pre>{@code
+ * PCollection<UserEvent>> lines = ...;
+ * lines.apply(TextIO.<UserEvent>writeCustomType(new FormatEvent())
+ *      .to(new SerializableFunction<UserEvent, Params>() {
+ *         public String apply(UserEvent value) {
+ *           return new Params().withBaseFilename(baseDirectory + "/" + value.country());
+ *         }
+ *       }),
+ *       new Params().withBaseFilename(baseDirectory + "/empty");
+ * }</pre>
+ *
+ * <p>Any existing files with the same names as generated output files will be overwritten.
  */
 public class TextIO {
   /**
@@ -105,11 +137,29 @@ public class TextIO {
    * line.
    */
   public static Write write() {
-    return new AutoValue_TextIO_Write.Builder()
+    return new TextIO.Write();
+  }
+
+  /**
+   * A {@link PTransform} that writes a {@link PCollection} to a text file (or multiple text files
+   * matching a sharding pattern), with each element of the input collection encoded into its own
+   * line.
+   *
+   * <p>This version allows you to apply {@link TextIO} writes to a PCollection of a custom type
+   * {@link T}, along with a format function that converts the input type {@link T} to the String
+   * that will be written to the file. The advantage of this is it allows a user-provided {@link
+   * DynamicDestinations} object, set via {@link Write#to(DynamicDestinations)} to examine the
+   * user's custom type when choosing a destination.
+   */
+  public static <T> TypedWrite<T> writeCustomType(SerializableFunction<T, String> formatFunction) {
+    return new AutoValue_TextIO_TypedWrite.Builder<T>()
         .setFilenamePrefix(null)
+        .setTempDirectory(null)
         .setShardTemplate(null)
         .setFilenameSuffix(null)
         .setFilenamePolicy(null)
+        .setDynamicDestinations(null)
+        .setFormatFunction(formatFunction)
         .setWritableByteChannelFactory(FileBasedSink.CompressionType.UNCOMPRESSED)
         .setWindowedWrites(false)
         .setNumShards(0)
@@ -223,18 +273,21 @@ public class TextIO {
     }
   }
 
-
-  /////////////////////////////////////////////////////////////////////////////
+  // ///////////////////////////////////////////////////////////////////////////
 
   /** Implementation of {@link #write}. */
   @AutoValue
-  public abstract static class Write extends PTransform<PCollection<String>, PDone> {
+  public abstract static class TypedWrite<T> extends PTransform<PCollection<T>, PDone> {
     /** The prefix of each file written, combined with suffix and shardTemplate. */
     @Nullable abstract ValueProvider<ResourceId> getFilenamePrefix();
 
     /** The suffix of each file written, combined with prefix and shardTemplate. */
     @Nullable abstract String getFilenameSuffix();
 
+    /** The base directory used for generating temporary files. */
+    @Nullable
+    abstract ValueProvider<ResourceId> getTempDirectory();
+
     /** An optional header to add to each file. */
     @Nullable abstract String getHeader();
 
@@ -250,6 +303,13 @@ public class TextIO {
     /** A policy for naming output files. */
     @Nullable abstract FilenamePolicy getFilenamePolicy();
 
+    /** Allows for value-dependent {@link DynamicDestinations} to be vended. */
+    @Nullable
+    abstract DynamicDestinations<T, ?> getDynamicDestinations();
+
+    /** A function that converts T to a String, for writing to the file. */
+    abstract SerializableFunction<T, String> getFormatFunction();
+
     /** Whether to write windowed output files. */
     abstract boolean getWindowedWrites();
 
@@ -259,66 +319,68 @@ public class TextIO {
      */
     abstract WritableByteChannelFactory getWritableByteChannelFactory();
 
-    abstract Builder toBuilder();
+    abstract Builder<T> toBuilder();
 
     @AutoValue.Builder
-    abstract static class Builder {
-      abstract Builder setFilenamePrefix(ValueProvider<ResourceId> filenamePrefix);
-      abstract Builder setShardTemplate(@Nullable String shardTemplate);
-      abstract Builder setFilenameSuffix(@Nullable String filenameSuffix);
-      abstract Builder setHeader(@Nullable String header);
-      abstract Builder setFooter(@Nullable String footer);
-      abstract Builder setFilenamePolicy(@Nullable FilenamePolicy filenamePolicy);
-      abstract Builder setNumShards(int numShards);
-      abstract Builder setWindowedWrites(boolean windowedWrites);
-      abstract Builder setWritableByteChannelFactory(
+    abstract static class Builder<T> {
+      abstract Builder<T> setFilenamePrefix(ValueProvider<ResourceId> filenamePrefix);
+
+      abstract Builder<T> setTempDirectory(ValueProvider<ResourceId> tempDirectory);
+
+      abstract Builder<T> setShardTemplate(@Nullable String shardTemplate);
+
+      abstract Builder<T> setFilenameSuffix(@Nullable String filenameSuffix);
+
+      abstract Builder<T> setHeader(@Nullable String header);
+
+      abstract Builder<T> setFooter(@Nullable String footer);
+
+      abstract Builder<T> setFilenamePolicy(@Nullable FilenamePolicy filenamePolicy);
+
+      abstract Builder<T> setDynamicDestinations(
+          @Nullable DynamicDestinations<T, ?> dynamicDestinations);
+
+      abstract Builder<T> setFormatFunction(SerializableFunction<T, String> formatFunction);
+
+      abstract Builder<T> setNumShards(int numShards);
+
+      abstract Builder<T> setWindowedWrites(boolean windowedWrites);
+
+      abstract Builder<T> setWritableByteChannelFactory(
           WritableByteChannelFactory writableByteChannelFactory);
 
-      abstract Write build();
+      abstract TypedWrite<T> build();
     }
 
     /**
-     * Writes to text files with the given prefix. The given {@code prefix} can reference any
-     * {@link FileSystem} on the classpath.
-     *
-     * <p>The name of the output files will be determined by the {@link FilenamePolicy} used.
+     * Writes to text files with the given prefix. The given {@code prefix} can reference any {@link
+     * FileSystem} on the classpath. This prefix is used by the {@link DefaultFilenamePolicy} to
+     * generate filenames.
      *
      * <p>By default, a {@link DefaultFilenamePolicy} will be used built using the specified prefix
-     * to define the base output directory and file prefix, a shard identifier (see
-     * {@link #withNumShards(int)}), and a common suffix (if supplied using
-     * {@link #withSuffix(String)}).
+     * to define the base output directory and file prefix, a shard identifier (see {@link
+     * #withNumShards(int)}), and a common suffix (if supplied using {@link #withSuffix(String)}).
+     *
+     * <p>This default policy can be overridden using {@link #to(FilenamePolicy)}, in which case
+     * {@link #withShardNameTemplate(String)} and {@link #withSuffix(String)} should not be set.
+     * Custom filename policies do not automatically see this prefix - you should explicitly pass
+     * the prefix into your {@link FilenamePolicy} object if you need this.
      *
-     * <p>This default policy can be overridden using {@link #withFilenamePolicy(FilenamePolicy)},
-     * in which case {@link #withShardNameTemplate(String)} and {@link #withSuffix(String)} should
-     * not be set.
+     * <p>If {@link #withTempDirectory} has not been called, this filename prefix will be used to
+     * infer a directory for temporary files.
      */
-    public Write to(String filenamePrefix) {
+    public TypedWrite<T> to(String filenamePrefix) {
       return to(FileBasedSink.convertToFileResourceIfPossible(filenamePrefix));
     }
 
-    /**
-     * Writes to text files with prefix from the given resource.
-     *
-     * <p>The name of the output files will be determined by the {@link FilenamePolicy} used.
-     *
-     * <p>By default, a {@link DefaultFilenamePolicy} will be used built using the specified prefix
-     * to define the base output directory and file prefix, a shard identifier (see
-     * {@link #withNumShards(int)}), and a common suffix (if supplied using
-     * {@link #withSuffix(String)}).
-     *
-     * <p>This default policy can be overridden using {@link #withFilenamePolicy(FilenamePolicy)},
-     * in which case {@link #withShardNameTemplate(String)} and {@link #withSuffix(String)} should
-     * not be set.
-     */
+    /** Like {@link #to(String)}. */
     @Experimental(Kind.FILESYSTEM)
-    public Write to(ResourceId filenamePrefix) {
+    public TypedWrite<T> to(ResourceId filenamePrefix) {
       return toResource(StaticValueProvider.of(filenamePrefix));
     }
 
-    /**
-     * Like {@link #to(String)}.
-     */
-    public Write to(ValueProvider<String> outputPrefix) {
+    /** Like {@link #to(String)}. */
+    public TypedWrite<T> to(ValueProvider<String> outputPrefix) {
       return toResource(NestedValueProvider.of(outputPrefix,
           new SerializableFunction<String, ResourceId>() {
             @Override
@@ -329,43 +391,77 @@ public class TextIO {
     }
 
     /**
-     * Like {@link #to(ResourceId)}.
+     * Writes to files named according to the given {@link FileBasedSink.FilenamePolicy}. A
+     * directory for temporary files must be specified using {@link #withTempDirectory}.
      */
+    public TypedWrite<T> to(FilenamePolicy filenamePolicy) {
+      return toBuilder().setFilenamePolicy(filenamePolicy).build();
+    }
+
+    /**
+     * Use a {@link DynamicDestinations} object to vend {@link FilenamePolicy} objects. These
+     * objects can examine the input record when creating a {@link FilenamePolicy}. A directory for
+     * temporary files must be specified using {@link #withTempDirectory}.
+     */
+    public TypedWrite<T> to(DynamicDestinations<T, ?> dynamicDestinations) {
+      return toBuilder().setDynamicDestinations(dynamicDestinations).build();
+    }
+
+    /**
+     * Write to dynamic destinations using the default filename policy. The destinationFunction maps
+     * the input record to a {@link DefaultFilenamePolicy.Params} object that specifies where the
+     * records should be written (base filename, file suffix, and shard template). The
+     * emptyDestination parameter specified where empty files should be written for when the written
+     * {@link PCollection} is empty.
+     */
+    public TypedWrite<T> to(
+        SerializableFunction<T, Params> destinationFunction, Params emptyDestination) {
+      return to(DynamicFileDestinations.toDefaultPolicies(destinationFunction, emptyDestination));
+    }
+
+    /** Like {@link #to(ResourceId)}. */
     @Experimental(Kind.FILESYSTEM)
-    public Write toResource(ValueProvider<ResourceId> filenamePrefix) {
+    public TypedWrite<T> toResource(ValueProvider<ResourceId> filenamePrefix) {
       return toBuilder().setFilenamePrefix(filenamePrefix).build();
     }
 
+    /** Set the base directory used to generate temporary files. */
+    @Experimental(Kind.FILESYSTEM)
+    public TypedWrite<T> withTempDirectory(ValueProvider<ResourceId> tempDirectory) {
+      return toBuilder().setTempDirectory(tempDirectory).build();
+    }
+
+    /** Set the base directory used to generate temporary files. */
+    @Experimental(Kind.FILESYSTEM)
+    public TypedWrite<T> withTempDirectory(ResourceId tempDirectory) {
+      return withTempDirectory(StaticValueProvider.of(tempDirectory));
+    }
+
     /**
      * Uses the given {@link ShardNameTemplate} for naming output files. This option may only be
-     * used when {@link #withFilenamePolicy(FilenamePolicy)} has not been configured.
+     * used when using one of the default filename-prefix to() overrides - i.e. not when using
+     * either {@link #to(FilenamePolicy)} or {@link #to(DynamicDestinations)}.
      *
      * <p>See {@link DefaultFilenamePolicy} for how the prefix, shard name template, and suffix are
      * used.
      */
-    public Write withShardNameTemplate(String shardTemplate) {
+    public TypedWrite<T> withShardNameTemplate(String shardTemplate) {
       return toBuilder().setShardTemplate(shardTemplate).build();
     }
 
     /**
-     * Configures the filename suffix for written files. This option may only be used when
-     * {@link #withFilenamePolicy(FilenamePolicy)} has not been configured.
+     * Configures the filename suffix for written files. This option may only be used when using one
+     * of the default filename-prefix to() overrides - i.e. not when using either {@link
+     * #to(FilenamePolicy)} or {@link #to(DynamicDestinations)}.
      *
      * <p>See {@link DefaultFilenamePolicy} for how the prefix, shard name template, and suffix are
      * used.
      */
-    public Write withSuffix(String filenameSuffix) {
+    public TypedWrite<T> withSuffix(String filenameSuffix) {
       return toBuilder().setFilenameSuffix(filenameSuffix).build();
     }
 
     /**
-     * Configures the {@link FileBasedSink.FilenamePolicy} that will be used to name written files.
-     */
-    public Write withFilenamePolicy(FilenamePolicy filenamePolicy) {
-      return toBuilder().setFilenamePolicy(filenamePolicy).build();
-    }
-
-    /**
      * Configures the number of output shards produced overall (when using unwindowed writes) or
      * per-window (when using windowed writes).
      *
@@ -375,14 +471,13 @@ public class TextIO {
      *
      * @param numShards the number of shards to use, or 0 to let the system decide.
      */
-    public Write withNumShards(int numShards) {
+    public TypedWrite<T> withNumShards(int numShards) {
       checkArgument(numShards >= 0);
       return toBuilder().setNumShards(numShards).build();
     }
 
     /**
-     * Forces a single file as output and empty shard name template. This option is only compatible
-     * with unwindowed writes.
+     * Forces a single file as output and empty shard name template.
      *
      * <p>For unwindowed writes, constraining the number of shards is likely to reduce the
      * performance of a pipeline. Setting this value is not recommended unless you require a
@@ -390,7 +485,7 @@ public class TextIO {
      *
      * <p>This is equivalent to {@code .withNumShards(1).withShardNameTemplate("")}
      */
-    public Write withoutSharding() {
+    public TypedWrite<T> withoutSharding() {
       return withNumShards(1).withShardNameTemplate("");
     }
 
@@ -399,7 +494,7 @@ public class TextIO {
      *
      * <p>A {@code null} value will clear any previously configured header.
      */
-    public Write withHeader(@Nullable String header) {
+    public TypedWrite<T> withHeader(@Nullable String header) {
       return toBuilder().setHeader(header).build();
     }
 
@@ -408,48 +503,82 @@ public class TextIO {
      *
      * <p>A {@code null} value will clear any previously configured footer.
      */
-    public Write withFooter(@Nullable String footer) {
+    public TypedWrite<T> withFooter(@Nullable String footer) {
       return toBuilder().setFooter(footer).build();
     }
 
     /**
-     * Returns a transform for writing to text files like this one but that has the given
-     * {@link WritableByteChannelFactory} to be used by the {@link FileBasedSink} during output.
-     * The default is value is {@link FileBasedSink.CompressionType#UNCOMPRESSED}.
+     * Returns a transform for writing to text files like this one but that has the given {@link
+     * WritableByteChannelFactory} to be used by the {@link FileBasedSink} during output. The
+     * default is value is {@link FileBasedSink.CompressionType#UNCOMPRESSED}.
      *
      * <p>A {@code null} value will reset the value to the default value mentioned above.
      */
-    public Write withWritableByteChannelFactory(
+    public TypedWrite<T> withWritableByteChannelFactory(
         WritableByteChannelFactory writableByteChannelFactory) {
       return toBuilder().setWritableByteChannelFactory(writableByteChannelFactory).build();
     }
 
-    public Write withWindowedWrites() {
+    /**
+     * Preserves windowing of input elements and writes them to files based on the element's window.
+     *
+     * <p>If using {@link #to(FileBasedSink.FilenamePolicy)}. Filenames will be generated using
+     * {@link FilenamePolicy#windowedFilename}. See also {@link WriteFiles#withWindowedWrites()}.
+     */
+    public TypedWrite<T> withWindowedWrites() {
       return toBuilder().setWindowedWrites(true).build();
     }
 
+    private DynamicDestinations<T, ?> resolveDynamicDestinations() {
+      DynamicDestinations<T, ?> dynamicDestinations = getDynamicDestinations();
+      if (dynamicDestinations == null) {
+        FilenamePolicy usedFilenamePolicy = getFilenamePolicy();
+        if (usedFilenamePolicy == null) {
+          usedFilenamePolicy =
+              DefaultFilenamePolicy.fromStandardParameters(
+                  getFilenamePrefix(),
+                  getShardTemplate(),
+                  getFilenameSuffix(),
+                  getWindowedWrites());
+        }
+        dynamicDestinations = DynamicFileDestinations.constant(usedFilenamePolicy);
+      }
+      return dynamicDestinations;
+    }
+
     @Override
-    public PDone expand(PCollection<String> input) {
-      checkState(getFilenamePrefix() != null,
-          "Need to set the filename prefix of a TextIO.Write transform.");
+    public PDone expand(PCollection<T> input) {
+      checkState(
+          getFilenamePrefix() != null || getTempDirectory() != null,
+          "Need to set either the filename prefix or the tempDirectory of a TextIO.Write "
+              + "transform.");
       checkState(
-          (getFilenamePolicy() == null)
-              || (getShardTemplate() == null && getFilenameSuffix() == null),
-          "Cannot set a filename policy and also a filename template or suffix.");
-
-      FilenamePolicy usedFilenamePolicy = getFilenamePolicy();
-      if (usedFilenamePolicy == null) {
-        usedFilenamePolicy = DefaultFilenamePolicy.constructUsingStandardParameters(
-            getFilenamePrefix(), getShardTemplate(), getFilenameSuffix(), getWindowedWrites());
+          getFilenamePolicy() == null || getDynamicDestinations() == null,
+          "Cannot specify both a filename policy and dynamic destinations");
+      if (getFilenamePolicy() != null || getDynamicDestinations() != null) {
+        checkState(
+            getShardTemplate() == null && getFilenameSuffix() == null,
+            "shardTemplate and filenameSuffix should only be used with the default "
+                + "filename policy");
       }
-      WriteFiles<String> write =
+      return expandTyped(input, resolveDynamicDestinations());
+    }
+
+    public <DestinationT> PDone expandTyped(
+        PCollection<T> input, DynamicDestinations<T, DestinationT> dynamicDestinations) {
+      ValueProvider<ResourceId> tempDirectory = getTempDirectory();
+      if (tempDirectory == null) {
+        tempDirectory = getFilenamePrefix();
+      }
+      WriteFiles<T, DestinationT, String> write =
           WriteFiles.to(
-              new TextSink(
-                  getFilenamePrefix(),
-                  usedFilenamePolicy,
+              new TextSink<>(
+                  tempDirectory,
+                  dynamicDestinations,
                   getHeader(),
                   getFooter(),
-                  getWritableByteChannelFactory()));
+                  getWritableByteChannelFactory()),
+              getFormatFunction());
       if (getNumShards() > 0) {
         write = write.withNumShards(getNumShards());
       }
@@ -463,27 +592,26 @@ public class TextIO {
     public void populateDisplayData(DisplayData.Builder builder) {
       super.populateDisplayData(builder);
 
-      String prefixString = "";
-      if (getFilenamePrefix() != null) {
-        prefixString = getFilenamePrefix().isAccessible()
-            ? getFilenamePrefix().get().toString() : getFilenamePrefix().toString();
+      resolveDynamicDestinations().populateDisplayData(builder);
+      String tempDirectory = null;
+      if (getTempDirectory() != null) {
+        tempDirectory =
+            getTempDirectory().isAccessible()
+                ? getTempDirectory().get().toString()
+                : getTempDirectory().toString();
       }
       builder
-          .addIfNotNull(DisplayData.item("filePrefix", prefixString)
-            .withLabel("Output File Prefix"))
-          .addIfNotNull(DisplayData.item("fileSuffix", getFilenameSuffix())
-            .withLabel("Output File Suffix"))
-          .addIfNotNull(DisplayData.item("shardNameTemplate", getShardTemplate())
-            .withLabel("Output Shard Name Template"))
-          .addIfNotDefault(DisplayData.item("numShards", getNumShards())
-            .withLabel("Maximum Output Shards"), 0)
-          .addIfNotNull(DisplayData.item("fileHeader", getHeader())
-            .withLabel("File Header"))
-          .addIfNotNull(DisplayData.item("fileFooter", getFooter())
-              .withLabel("File Footer"))
-          .add(DisplayData
-              .item("writableByteChannelFactory", getWritableByteChannelFactory().toString())
-              .withLabel("Compression/Transformation Type"));
+          .addIfNotDefault(
+              DisplayData.item("numShards", getNumShards()).withLabel("Maximum Output Shards"), 0)
+          .addIfNotNull(
+              DisplayData.item("tempDirectory", tempDirectory)
+                  .withLabel("Directory for temporary files"))
+          .addIfNotNull(DisplayData.item("fileHeader", getHeader()).withLabel("File Header"))
+          .addIfNotNull(DisplayData.item("fileFooter", getFooter()).withLabel("File Footer"))
+          .add(
+              DisplayData.item(
+                      "writableByteChannelFactory", getWritableByteChannelFactory().toString())
+                  .withLabel("Compression/Transformation Type"));
     }
 
     @Override
@@ -493,6 +621,128 @@ public class TextIO {
   }
 
   /**
+   * This class is used as the default return value of {@link TextIO#write()}.
+   *
+   * <p>All methods in this class delegate to the appropriate method of {@link TextIO.TypedWrite}.
+   * This class exists for backwards compatibility, and will be removed in Beam 3.0.
+   */
+  public static class Write extends PTransform<PCollection<String>, PDone> {
+    @VisibleForTesting TypedWrite<String> inner;
+
+    Write() {
+      this(TextIO.writeCustomType(SerializableFunctions.<String>identity()));
+    }
+
+    Write(TypedWrite<String> inner) {
+      this.inner = inner;
+    }
+
+    /** See {@link TypedWrite#to(String)}. */
+    public Write to(String filenamePrefix) {
+      return new Write(inner.to(filenamePrefix));
+    }
+
+    /** See {@link TypedWrite#to(ResourceId)}. */
+    @Experimental(Kind.FILESYSTEM)
+    public Write to(ResourceId filenamePrefix) {
+      return new Write(inner.to(filenamePrefix));
+    }
+
+    /** See {@link TypedWrite#to(ValueProvider)}. */
+    public Write to(ValueProvider<String> outputPrefix) {
+      return new Write(inner.to(outputPrefix));
+    }
+
+    /** See {@link TypedWrite#toResource(ValueProvider)}. */
+    @Experimental(Kind.FILESYSTEM)
+    public Write toResource(ValueProvider<ResourceId> filenamePrefix) {
+      return new Write(inner.toResource(filenamePrefix));
+    }
+
+    /** See {@link TypedWrite#to(FilenamePolicy)}. */
+    @Experimental(Kind.FILESYSTEM)
+    public Write to(FilenamePolicy filenamePolicy) {
+      return new Write(inner.to(filenamePolicy));
+    }
+
+    /** See {@link TypedWrite#to(DynamicDestinations)}. */
+    @Experimental(Kind.FILESYSTEM)
+    public Write to(DynamicDestinations<String, ?> dynamicDestinations) {
+      return new Write(inner.to(dynamicDestinations));
+    }
+
+    /** See {@link TypedWrite#to(SerializableFunction, Params)}. */
+    @Experimental(Kind.FILESYSTEM)
+    public Write to(
+        SerializableFunction<String, Params> destinationFunction, Params emptyDestination) {
+      return new Write(inner.to(destinationFunction, emptyDestination));
+    }
+
+    /** See {@link TypedWrite#withTempDirectory(ValueProvider)}. */
+    @Experimental(Kind.FILESYSTEM)
+    public Write withTempDirectory(ValueProvider<ResourceId> tempDirectory) {
+      return new Write(inner.withTempDirectory(tempDirectory));
+    }
+
+    /** See {@link TypedWrite#withTempDirectory(ResourceId)}. */
+    @Experimental(Kind.FILESYSTEM)
+    public Write withTempDirectory(ResourceId tempDirectory) {
+      return new Write(inner.withTempDirectory(tempDirectory));
+    }
+
+    /** See {@link TypedWrite#withShardNameTemplate(String)}. */
+    public Write withShardNameTemplate(String shardTemplate) {
+      return new Write(inner.withShardNameTemplate(shardTemplate));
+    }
+
+    /** See {@link TypedWrite#withSuffix(String)}. */
+    public Write withSuffix(String filenameSuffix) {
+      return new Write(inner.withSuffix(filenameSuffix));
+    }
+
+    /** See {@link TypedWrite#withNumShards(int)}. */
+    public Write withNumShards(int numShards) {
+      return new Write(inner.withNumShards(numShards));
+    }
+
+    /** See {@link TypedWrite#withoutSharding()}. */
+    public Write withoutSharding() {
+      return new Write(inner.withoutSharding());
+    }
+
+    /** See {@link TypedWrite#withHeader(String)}. */
+    public Write withHeader(@Nullable String header) {
+      return new Write(inner.withHeader(header));
+    }
+
+    /** See {@link TypedWrite#withFooter(String)}. */
+    public Write withFooter(@Nullable String footer) {
+      return new Write(inner.withFooter(footer));
+    }
+
+    /** See {@link TypedWrite#withWritableByteChannelFactory(WritableByteChannelFactory)}. */
+    public Write withWritableByteChannelFactory(
+        WritableByteChannelFactory writableByteChannelFactory) {
+      return new Write(inner.withWritableByteChannelFactory(writableByteChannelFactory));
+    }
+
+    /** See {@link TypedWrite#withWindowedWrites}. */
+    public Write withWindowedWrites() {
+      return new Write(inner.withWindowedWrites());
+    }
+
+    @Override
+    public void populateDisplayData(DisplayData.Builder builder) {
+      inner.populateDisplayData(builder);
+    }
+
+    @Override
+    public PDone expand(PCollection<String> input) {
+      return inner.expand(input);
+    }
+  }
+
+  /**
    * Possible text file compression types.
    */
   public enum CompressionType {


[36/50] [abbrv] beam git commit: Adds DynamicDestinations support to FileBasedSink

Posted by ta...@apache.org.
Adds DynamicDestinations support to FileBasedSink


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4c336e84
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4c336e84
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4c336e84

Branch: refs/heads/DSL_SQL
Commit: 4c336e840e69e83e15d9ffb7e0a0178dd3ab8404
Parents: 1f6117f
Author: Reuven Lax <re...@google.com>
Authored: Fri Jun 9 17:11:32 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:01 2017 -0700

----------------------------------------------------------------------
 .../examples/common/WriteOneFilePerWindow.java  |  52 +-
 .../beam/examples/WindowedWordCountIT.java      |   4 +-
 .../complete/game/utils/WriteToText.java        |  43 +-
 .../construction/WriteFilesTranslation.java     |  67 +-
 .../construction/PTransformMatchersTest.java    |  22 +-
 .../construction/WriteFilesTranslationTest.java |  62 +-
 .../direct/WriteWithShardingFactory.java        |   6 +-
 .../direct/WriteWithShardingFactoryTest.java    |  18 +-
 .../beam/runners/dataflow/DataflowRunner.java   |  15 +-
 .../runners/dataflow/DataflowRunnerTest.java    |  35 +-
 .../runners/spark/SparkRunnerDebuggerTest.java  |  26 +-
 .../src/main/proto/beam_runner_api.proto        |   7 +-
 .../apache/beam/sdk/coders/ShardedKeyCoder.java |  66 ++
 .../java/org/apache/beam/sdk/io/AvroIO.java     | 220 ++++---
 .../java/org/apache/beam/sdk/io/AvroSink.java   |  32 +-
 .../beam/sdk/io/DefaultFilenamePolicy.java      | 274 +++++---
 .../beam/sdk/io/DynamicFileDestinations.java    | 115 ++++
 .../org/apache/beam/sdk/io/FileBasedSink.java   | 513 ++++++++-------
 .../java/org/apache/beam/sdk/io/TFRecordIO.java |  44 +-
 .../java/org/apache/beam/sdk/io/TextIO.java     | 488 ++++++++++----
 .../java/org/apache/beam/sdk/io/TextSink.java   |  22 +-
 .../java/org/apache/beam/sdk/io/WriteFiles.java | 640 +++++++++++--------
 .../sdk/transforms/SerializableFunctions.java   |  50 ++
 .../org/apache/beam/sdk/values/ShardedKey.java  |  65 ++
 .../java/org/apache/beam/sdk/io/AvroIOTest.java |  85 ++-
 .../beam/sdk/io/DefaultFilenamePolicyTest.java  | 135 ++--
 .../sdk/io/DrunkWritableByteChannelFactory.java |   2 +-
 .../apache/beam/sdk/io/FileBasedSinkTest.java   |  93 +--
 .../java/org/apache/beam/sdk/io/SimpleSink.java |  56 +-
 .../java/org/apache/beam/sdk/io/TextIOTest.java | 264 +++++++-
 .../org/apache/beam/sdk/io/WriteFilesTest.java  | 339 ++++++++--
 .../beam/sdk/io/gcp/bigquery/BatchLoads.java    |   2 +
 .../io/gcp/bigquery/DynamicDestinations.java    |  29 +-
 .../io/gcp/bigquery/GenerateShardedTable.java   |   1 +
 .../beam/sdk/io/gcp/bigquery/ShardedKey.java    |  67 --
 .../sdk/io/gcp/bigquery/ShardedKeyCoder.java    |  74 ---
 .../sdk/io/gcp/bigquery/StreamingWriteFn.java   |   1 +
 .../io/gcp/bigquery/StreamingWriteTables.java   |   2 +
 .../sdk/io/gcp/bigquery/TagWithUniqueIds.java   |   1 +
 .../io/gcp/bigquery/WriteBundlesToFiles.java    |   2 +
 .../bigquery/WriteGroupedRecordsToFiles.java    |   1 +
 .../sdk/io/gcp/bigquery/WritePartition.java     |   1 +
 .../beam/sdk/io/gcp/bigquery/WriteTables.java   |   1 +
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     |   2 +
 .../java/org/apache/beam/sdk/io/xml/XmlIO.java  |   4 +-
 .../org/apache/beam/sdk/io/xml/XmlSink.java     |  21 +-
 .../org/apache/beam/sdk/io/xml/XmlSinkTest.java |   4 +-
 47 files changed, 2710 insertions(+), 1363 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/examples/java/src/main/java/org/apache/beam/examples/common/WriteOneFilePerWindow.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/common/WriteOneFilePerWindow.java b/examples/java/src/main/java/org/apache/beam/examples/common/WriteOneFilePerWindow.java
index 5e6df9c..49865ba 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/common/WriteOneFilePerWindow.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/common/WriteOneFilePerWindow.java
@@ -17,11 +17,12 @@
  */
 package org.apache.beam.examples.common;
 
-import static com.google.common.base.Verify.verifyNotNull;
+import static com.google.common.base.MoreObjects.firstNonNull;
 
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.io.FileBasedSink;
 import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
+import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints;
 import org.apache.beam.sdk.io.TextIO;
 import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
 import org.apache.beam.sdk.io.fs.ResourceId;
@@ -53,22 +54,12 @@ public class WriteOneFilePerWindow extends PTransform<PCollection<String>, PDone
 
   @Override
   public PDone expand(PCollection<String> input) {
-    // filenamePrefix may contain a directory and a filename component. Pull out only the filename
-    // component from that path for the PerWindowFiles.
-    String prefix = "";
     ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix);
-    if (!resource.isDirectory()) {
-      prefix = verifyNotNull(
-          resource.getFilename(),
-          "A non-directory resource should have a non-null filename: %s",
-          resource);
-    }
-
-
-    TextIO.Write write = TextIO.write()
-        .to(resource.getCurrentDirectory())
-        .withFilenamePolicy(new PerWindowFiles(prefix))
-        .withWindowedWrites();
+    TextIO.Write write =
+        TextIO.write()
+            .to(new PerWindowFiles(resource))
+            .withTempDirectory(resource.getCurrentDirectory())
+            .withWindowedWrites();
     if (numShards != null) {
       write = write.withNumShards(numShards);
     }
@@ -83,31 +74,36 @@ public class WriteOneFilePerWindow extends PTransform<PCollection<String>, PDone
    */
   public static class PerWindowFiles extends FilenamePolicy {
 
-    private final String prefix;
+    private final ResourceId baseFilename;
 
-    public PerWindowFiles(String prefix) {
-      this.prefix = prefix;
+    public PerWindowFiles(ResourceId baseFilename) {
+      this.baseFilename = baseFilename;
     }
 
     public String filenamePrefixForWindow(IntervalWindow window) {
+      String prefix =
+          baseFilename.isDirectory() ? "" : firstNonNull(baseFilename.getFilename(), "");
       return String.format("%s-%s-%s",
           prefix, FORMATTER.print(window.start()), FORMATTER.print(window.end()));
     }
 
     @Override
-    public ResourceId windowedFilename(
-        ResourceId outputDirectory, WindowedContext context, String extension) {
+    public ResourceId windowedFilename(WindowedContext context, OutputFileHints outputFileHints) {
       IntervalWindow window = (IntervalWindow) context.getWindow();
-      String filename = String.format(
-          "%s-%s-of-%s%s",
-          filenamePrefixForWindow(window), context.getShardNumber(), context.getNumShards(),
-          extension);
-      return outputDirectory.resolve(filename, StandardResolveOptions.RESOLVE_FILE);
+      String filename =
+          String.format(
+              "%s-%s-of-%s%s",
+              filenamePrefixForWindow(window),
+              context.getShardNumber(),
+              context.getNumShards(),
+              outputFileHints.getSuggestedFilenameSuffix());
+      return baseFilename
+          .getCurrentDirectory()
+          .resolve(filename, StandardResolveOptions.RESOLVE_FILE);
     }
 
     @Override
-    public ResourceId unwindowedFilename(
-        ResourceId outputDirectory, Context context, String extension) {
+    public ResourceId unwindowedFilename(Context context, OutputFileHints outputFileHints) {
       throw new UnsupportedOperationException("Unsupported.");
     }
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java b/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
index eb7e4c4..bec7952 100644
--- a/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
+++ b/examples/java/src/test/java/org/apache/beam/examples/WindowedWordCountIT.java
@@ -32,6 +32,7 @@ import java.util.concurrent.ThreadLocalRandom;
 import org.apache.beam.examples.common.ExampleUtils;
 import org.apache.beam.examples.common.WriteOneFilePerWindow.PerWindowFiles;
 import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.io.FileBasedSink;
 import org.apache.beam.sdk.io.FileSystems;
 import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
@@ -149,7 +150,8 @@ public class WindowedWordCountIT {
 
     String outputPrefix = options.getOutput();
 
-    PerWindowFiles filenamePolicy = new PerWindowFiles(outputPrefix);
+    PerWindowFiles filenamePolicy =
+        new PerWindowFiles(FileBasedSink.convertToFileResourceIfPossible(outputPrefix));
 
     List<ShardedFile> expectedOutputFiles = Lists.newArrayListWithCapacity(6);
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/examples/java8/src/main/java/org/apache/beam/examples/complete/game/utils/WriteToText.java
----------------------------------------------------------------------
diff --git a/examples/java8/src/main/java/org/apache/beam/examples/complete/game/utils/WriteToText.java b/examples/java8/src/main/java/org/apache/beam/examples/complete/game/utils/WriteToText.java
index e6c8ddb..1d60198 100644
--- a/examples/java8/src/main/java/org/apache/beam/examples/complete/game/utils/WriteToText.java
+++ b/examples/java8/src/main/java/org/apache/beam/examples/complete/game/utils/WriteToText.java
@@ -18,7 +18,6 @@
 package org.apache.beam.examples.complete.game.utils;
 
 import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Verify.verifyNotNull;
 
 import java.io.Serializable;
 import java.util.ArrayList;
@@ -28,6 +27,7 @@ import java.util.TimeZone;
 import java.util.stream.Collectors;
 import org.apache.beam.sdk.io.FileBasedSink;
 import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
+import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints;
 import org.apache.beam.sdk.io.TextIO;
 import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
 import org.apache.beam.sdk.io.fs.ResourceId;
@@ -111,21 +111,12 @@ public class WriteToText<InputT>
       checkArgument(
           input.getWindowingStrategy().getWindowFn().windowCoder() == IntervalWindow.getCoder());
 
-      // filenamePrefix may contain a directory and a filename component. Pull out only the filename
-      // component from that path for the PerWindowFiles.
-      String prefix = "";
       ResourceId resource = FileBasedSink.convertToFileResourceIfPossible(filenamePrefix);
-      if (!resource.isDirectory()) {
-        prefix = verifyNotNull(
-            resource.getFilename(),
-            "A non-directory resource should have a non-null filename: %s",
-            resource);
-      }
 
       return input.apply(
           TextIO.write()
-              .to(resource.getCurrentDirectory())
-              .withFilenamePolicy(new PerWindowFiles(prefix))
+              .to(new PerWindowFiles(resource))
+              .withTempDirectory(resource.getCurrentDirectory())
               .withWindowedWrites()
               .withNumShards(3));
     }
@@ -139,31 +130,33 @@ public class WriteToText<InputT>
    */
   protected static class PerWindowFiles extends FilenamePolicy {
 
-    private final String prefix;
+    private final ResourceId prefix;
 
-    public PerWindowFiles(String prefix) {
+    public PerWindowFiles(ResourceId prefix) {
       this.prefix = prefix;
     }
 
     public String filenamePrefixForWindow(IntervalWindow window) {
-      return String.format("%s-%s-%s",
-          prefix, formatter.print(window.start()), formatter.print(window.end()));
+      String filePrefix = prefix.isDirectory() ? "" : prefix.getFilename();
+      return String.format(
+          "%s-%s-%s", filePrefix, formatter.print(window.start()), formatter.print(window.end()));
     }
 
     @Override
-    public ResourceId windowedFilename(
-        ResourceId outputDirectory, WindowedContext context, String extension) {
+    public ResourceId windowedFilename(WindowedContext context, OutputFileHints outputFileHints) {
       IntervalWindow window = (IntervalWindow) context.getWindow();
-      String filename = String.format(
-          "%s-%s-of-%s%s",
-          filenamePrefixForWindow(window), context.getShardNumber(), context.getNumShards(),
-          extension);
-      return outputDirectory.resolve(filename, StandardResolveOptions.RESOLVE_FILE);
+      String filename =
+          String.format(
+              "%s-%s-of-%s%s",
+              filenamePrefixForWindow(window),
+              context.getShardNumber(),
+              context.getNumShards(),
+              outputFileHints.getSuggestedFilenameSuffix());
+      return prefix.getCurrentDirectory().resolve(filename, StandardResolveOptions.RESOLVE_FILE);
     }
 
     @Override
-    public ResourceId unwindowedFilename(
-        ResourceId outputDirectory, Context context, String extension) {
+    public ResourceId unwindowedFilename(Context context, OutputFileHints outputFileHints) {
       throw new UnsupportedOperationException("Unsupported.");
     }
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WriteFilesTranslation.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WriteFilesTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WriteFilesTranslation.java
index 99b77ef..b1d2da4 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WriteFilesTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WriteFilesTranslation.java
@@ -26,6 +26,7 @@ import com.google.protobuf.Any;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.BytesValue;
 import java.io.IOException;
+import java.io.Serializable;
 import java.util.Collections;
 import java.util.Map;
 import org.apache.beam.runners.core.construction.PTransformTranslation.TransformPayloadTranslator;
@@ -37,6 +38,7 @@ import org.apache.beam.sdk.io.FileBasedSink;
 import org.apache.beam.sdk.io.WriteFiles;
 import org.apache.beam.sdk.runners.AppliedPTransform;
 import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.util.SerializableUtils;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PDone;
@@ -51,32 +53,45 @@ public class WriteFilesTranslation {
   public static final String CUSTOM_JAVA_FILE_BASED_SINK_URN =
       "urn:beam:file_based_sink:javasdk:0.1";
 
+  public static final String CUSTOM_JAVA_FILE_BASED_SINK_FORMAT_FUNCTION_URN =
+      "urn:beam:file_based_sink_format_function:javasdk:0.1";
+
   @VisibleForTesting
-  static WriteFilesPayload toProto(WriteFiles<?> transform) {
+  static WriteFilesPayload toProto(WriteFiles<?, ?, ?> transform) {
     return WriteFilesPayload.newBuilder()
         .setSink(toProto(transform.getSink()))
+        .setFormatFunction(toProto(transform.getFormatFunction()))
         .setWindowedWrites(transform.isWindowedWrites())
         .setRunnerDeterminedSharding(
             transform.getNumShards() == null && transform.getSharding() == null)
         .build();
   }
 
-  private static SdkFunctionSpec toProto(FileBasedSink<?> sink) {
+  private static SdkFunctionSpec toProto(FileBasedSink<?, ?> sink) {
+    return toProto(CUSTOM_JAVA_FILE_BASED_SINK_URN, sink);
+  }
+
+  private static SdkFunctionSpec toProto(SerializableFunction<?, ?> serializableFunction) {
+    return toProto(CUSTOM_JAVA_FILE_BASED_SINK_FORMAT_FUNCTION_URN, serializableFunction);
+  }
+
+  private static SdkFunctionSpec toProto(String urn, Serializable serializable) {
     return SdkFunctionSpec.newBuilder()
         .setSpec(
             FunctionSpec.newBuilder()
-                .setUrn(CUSTOM_JAVA_FILE_BASED_SINK_URN)
+                .setUrn(urn)
                 .setParameter(
                     Any.pack(
                         BytesValue.newBuilder()
                             .setValue(
-                                ByteString.copyFrom(SerializableUtils.serializeToByteArray(sink)))
+                                ByteString.copyFrom(
+                                    SerializableUtils.serializeToByteArray(serializable)))
                             .build())))
         .build();
   }
 
   @VisibleForTesting
-  static FileBasedSink<?> sinkFromProto(SdkFunctionSpec sinkProto) throws IOException {
+  static FileBasedSink<?, ?> sinkFromProto(SdkFunctionSpec sinkProto) throws IOException {
     checkArgument(
         sinkProto.getSpec().getUrn().equals(CUSTOM_JAVA_FILE_BASED_SINK_URN),
         "Cannot extract %s instance from %s with URN %s",
@@ -87,16 +102,44 @@ public class WriteFilesTranslation {
     byte[] serializedSink =
         sinkProto.getSpec().getParameter().unpack(BytesValue.class).getValue().toByteArray();
 
-    return (FileBasedSink<?>)
+    return (FileBasedSink<?, ?>)
         SerializableUtils.deserializeFromByteArray(
             serializedSink, FileBasedSink.class.getSimpleName());
   }
 
-  public static <T> FileBasedSink<T> getSink(
-      AppliedPTransform<PCollection<T>, PDone, ? extends PTransform<PCollection<T>, PDone>>
+  @VisibleForTesting
+  static <InputT, OutputT> SerializableFunction<InputT, OutputT> formatFunctionFromProto(
+      SdkFunctionSpec sinkProto) throws IOException {
+    checkArgument(
+        sinkProto.getSpec().getUrn().equals(CUSTOM_JAVA_FILE_BASED_SINK_FORMAT_FUNCTION_URN),
+        "Cannot extract %s instance from %s with URN %s",
+        SerializableFunction.class.getSimpleName(),
+        FunctionSpec.class.getSimpleName(),
+        sinkProto.getSpec().getUrn());
+
+    byte[] serializedFunction =
+        sinkProto.getSpec().getParameter().unpack(BytesValue.class).getValue().toByteArray();
+
+    return (SerializableFunction<InputT, OutputT>)
+        SerializableUtils.deserializeFromByteArray(
+            serializedFunction, FileBasedSink.class.getSimpleName());
+  }
+
+  public static <UserT, DestinationT, OutputT> FileBasedSink<OutputT, DestinationT> getSink(
+      AppliedPTransform<PCollection<UserT>, PDone, ? extends PTransform<PCollection<UserT>, PDone>>
+          transform)
+      throws IOException {
+    return (FileBasedSink<OutputT, DestinationT>)
+        sinkFromProto(getWriteFilesPayload(transform).getSink());
+  }
+
+  public static <InputT, OutputT> SerializableFunction<InputT, OutputT> getFormatFunction(
+      AppliedPTransform<
+              PCollection<InputT>, PDone, ? extends PTransform<PCollection<InputT>, PDone>>
           transform)
       throws IOException {
-    return (FileBasedSink<T>) sinkFromProto(getWriteFilesPayload(transform).getSink());
+    return formatFunctionFromProto(
+        getWriteFilesPayload(transform).<InputT, OutputT>getFormatFunction());
   }
 
   public static <T> boolean isWindowedWrites(
@@ -124,15 +167,15 @@ public class WriteFilesTranslation {
         .unpack(WriteFilesPayload.class);
   }
 
-  static class WriteFilesTranslator implements TransformPayloadTranslator<WriteFiles<?>> {
+  static class WriteFilesTranslator implements TransformPayloadTranslator<WriteFiles<?, ?, ?>> {
     @Override
-    public String getUrn(WriteFiles<?> transform) {
+    public String getUrn(WriteFiles<?, ?, ?> transform) {
       return PTransformTranslation.WRITE_FILES_TRANSFORM_URN;
     }
 
     @Override
     public FunctionSpec translate(
-        AppliedPTransform<?, ?, WriteFiles<?>> transform, SdkComponents components) {
+        AppliedPTransform<?, ?, WriteFiles<?, ?, ?>> transform, SdkComponents components) {
       return FunctionSpec.newBuilder()
           .setUrn(getUrn(transform.getTransform()))
           .setParameter(Any.pack(toProto(transform.getTransform())))

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PTransformMatchersTest.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PTransformMatchersTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PTransformMatchersTest.java
index 6459849..99d3dd1 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PTransformMatchersTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/PTransformMatchersTest.java
@@ -32,6 +32,7 @@ import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.VarIntCoder;
 import org.apache.beam.sdk.coders.VoidCoder;
 import org.apache.beam.sdk.io.DefaultFilenamePolicy;
+import org.apache.beam.sdk.io.DynamicFileDestinations;
 import org.apache.beam.sdk.io.FileBasedSink;
 import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
 import org.apache.beam.sdk.io.LocalResources;
@@ -55,6 +56,7 @@ import org.apache.beam.sdk.transforms.Materialization;
 import org.apache.beam.sdk.transforms.Materializations;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SerializableFunctions;
 import org.apache.beam.sdk.transforms.Sum;
 import org.apache.beam.sdk.transforms.View;
 import org.apache.beam.sdk.transforms.View.CreatePCollectionView;
@@ -537,30 +539,32 @@ public class PTransformMatchersTest implements Serializable {
   public void writeWithRunnerDeterminedSharding() {
     ResourceId outputDirectory = LocalResources.fromString("/foo/bar", true /* isDirectory */);
     FilenamePolicy policy =
-        DefaultFilenamePolicy.constructUsingStandardParameters(
+        DefaultFilenamePolicy.fromStandardParameters(
             StaticValueProvider.of(outputDirectory),
             DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE,
             "",
             false);
-    WriteFiles<Integer> write =
+    WriteFiles<Integer, Void, Integer> write =
         WriteFiles.to(
-            new FileBasedSink<Integer>(StaticValueProvider.of(outputDirectory), policy) {
+            new FileBasedSink<Integer, Void>(
+                StaticValueProvider.of(outputDirectory), DynamicFileDestinations.constant(null)) {
               @Override
-              public WriteOperation<Integer> createWriteOperation() {
+              public WriteOperation<Integer, Void> createWriteOperation() {
                 return null;
               }
-            });
+            },
+            SerializableFunctions.<Integer>identity());
     assertThat(
         PTransformMatchers.writeWithRunnerDeterminedSharding().matches(appliedWrite(write)),
         is(true));
 
-    WriteFiles<Integer> withStaticSharding = write.withNumShards(3);
+    WriteFiles<Integer, Void, Integer> withStaticSharding = write.withNumShards(3);
     assertThat(
         PTransformMatchers.writeWithRunnerDeterminedSharding()
             .matches(appliedWrite(withStaticSharding)),
         is(false));
 
-    WriteFiles<Integer> withCustomSharding =
+    WriteFiles<Integer, Void, Integer> withCustomSharding =
         write.withSharding(Sum.integersGlobally().asSingletonView());
     assertThat(
         PTransformMatchers.writeWithRunnerDeterminedSharding()
@@ -568,8 +572,8 @@ public class PTransformMatchersTest implements Serializable {
         is(false));
   }
 
-  private AppliedPTransform<?, ?, ?> appliedWrite(WriteFiles<Integer> write) {
-    return AppliedPTransform.<PCollection<Integer>, PDone, WriteFiles<Integer>>of(
+  private AppliedPTransform<?, ?, ?> appliedWrite(WriteFiles<Integer, Void, Integer> write) {
+    return AppliedPTransform.<PCollection<Integer>, PDone, WriteFiles<Integer, Void, Integer>>of(
         "WriteFiles",
         Collections.<TupleTag<?>, PValue>emptyMap(),
         Collections.<TupleTag<?>, PValue>emptyMap(),

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/WriteFilesTranslationTest.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/WriteFilesTranslationTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/WriteFilesTranslationTest.java
index 739034c..283df16 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/WriteFilesTranslationTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/WriteFilesTranslationTest.java
@@ -26,8 +26,10 @@ import java.util.Objects;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.common.runner.v1.RunnerApi;
 import org.apache.beam.sdk.common.runner.v1.RunnerApi.ParDoPayload;
+import org.apache.beam.sdk.io.DynamicFileDestinations;
 import org.apache.beam.sdk.io.FileBasedSink;
 import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
+import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints;
 import org.apache.beam.sdk.io.FileSystems;
 import org.apache.beam.sdk.io.WriteFiles;
 import org.apache.beam.sdk.io.fs.ResourceId;
@@ -36,6 +38,8 @@ import org.apache.beam.sdk.runners.AppliedPTransform;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.transforms.Create;
 import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.SerializableFunctions;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PDone;
 import org.junit.Test;
@@ -56,16 +60,17 @@ public class WriteFilesTranslationTest {
   @RunWith(Parameterized.class)
   public static class TestWriteFilesPayloadTranslation {
     @Parameters(name = "{index}: {0}")
-    public static Iterable<WriteFiles<?>> data() {
-      return ImmutableList.<WriteFiles<?>>of(
-          WriteFiles.to(new DummySink()),
-          WriteFiles.to(new DummySink()).withWindowedWrites(),
-          WriteFiles.to(new DummySink()).withNumShards(17),
-          WriteFiles.to(new DummySink()).withWindowedWrites().withNumShards(42));
+    public static Iterable<WriteFiles<Object, Void, Object>> data() {
+      SerializableFunction<Object, Object> format = SerializableFunctions.constant(null);
+      return ImmutableList.of(
+          WriteFiles.to(new DummySink(), format),
+          WriteFiles.to(new DummySink(), format).withWindowedWrites(),
+          WriteFiles.to(new DummySink(), format).withNumShards(17),
+          WriteFiles.to(new DummySink(), format).withWindowedWrites().withNumShards(42));
     }
 
     @Parameter(0)
-    public WriteFiles<String> writeFiles;
+    public WriteFiles<String, Void, String> writeFiles;
 
     public static TestPipeline p = TestPipeline.create().enableAbandonedNodeEnforcement(false);
 
@@ -80,7 +85,7 @@ public class WriteFilesTranslationTest {
       assertThat(payload.getWindowedWrites(), equalTo(writeFiles.isWindowedWrites()));
 
       assertThat(
-          (FileBasedSink<String>) WriteFilesTranslation.sinkFromProto(payload.getSink()),
+          (FileBasedSink<String, Void>) WriteFilesTranslation.sinkFromProto(payload.getSink()),
           equalTo(writeFiles.getSink()));
     }
 
@@ -89,9 +94,9 @@ public class WriteFilesTranslationTest {
       PCollection<String> input = p.apply(Create.of("hello"));
       PDone output = input.apply(writeFiles);
 
-      AppliedPTransform<PCollection<String>, PDone, WriteFiles<String>> appliedPTransform =
-          AppliedPTransform.<PCollection<String>, PDone, WriteFiles<String>>of(
-              "foo", input.expand(), output.expand(), writeFiles, p);
+      AppliedPTransform<PCollection<String>, PDone, WriteFiles<String, Void, String>>
+          appliedPTransform =
+              AppliedPTransform.of("foo", input.expand(), output.expand(), writeFiles, p);
 
       assertThat(
           WriteFilesTranslation.isRunnerDeterminedSharding(appliedPTransform),
@@ -101,7 +106,9 @@ public class WriteFilesTranslationTest {
           WriteFilesTranslation.isWindowedWrites(appliedPTransform),
           equalTo(writeFiles.isWindowedWrites()));
 
-      assertThat(WriteFilesTranslation.getSink(appliedPTransform), equalTo(writeFiles.getSink()));
+      assertThat(
+          WriteFilesTranslation.<String, Void, String>getSink(appliedPTransform),
+          equalTo(writeFiles.getSink()));
     }
   }
 
@@ -109,16 +116,16 @@ public class WriteFilesTranslationTest {
    * A simple {@link FileBasedSink} for testing serialization/deserialization. Not mocked to avoid
    * any issues serializing mocks.
    */
-  private static class DummySink extends FileBasedSink<String> {
+  private static class DummySink extends FileBasedSink<Object, Void> {
 
     DummySink() {
       super(
           StaticValueProvider.of(FileSystems.matchNewResource("nowhere", false)),
-          new DummyFilenamePolicy());
+          DynamicFileDestinations.constant(new DummyFilenamePolicy()));
     }
 
     @Override
-    public WriteOperation<String> createWriteOperation() {
+    public WriteOperation<Object, Void> createWriteOperation() {
       return new DummyWriteOperation(this);
     }
 
@@ -130,46 +137,39 @@ public class WriteFilesTranslationTest {
 
       DummySink that = (DummySink) other;
 
-      return getFilenamePolicy().equals(((DummySink) other).getFilenamePolicy())
-          && getBaseOutputDirectoryProvider().isAccessible()
-          && that.getBaseOutputDirectoryProvider().isAccessible()
-          && getBaseOutputDirectoryProvider()
-              .get()
-              .equals(that.getBaseOutputDirectoryProvider().get());
+      return getTempDirectoryProvider().isAccessible()
+          && that.getTempDirectoryProvider().isAccessible()
+          && getTempDirectoryProvider().get().equals(that.getTempDirectoryProvider().get());
     }
 
     @Override
     public int hashCode() {
       return Objects.hash(
           DummySink.class,
-          getFilenamePolicy(),
-          getBaseOutputDirectoryProvider().isAccessible()
-              ? getBaseOutputDirectoryProvider().get()
-              : null);
+          getTempDirectoryProvider().isAccessible() ? getTempDirectoryProvider().get() : null);
     }
   }
 
-  private static class DummyWriteOperation extends FileBasedSink.WriteOperation<String> {
-    public DummyWriteOperation(FileBasedSink<String> sink) {
+  private static class DummyWriteOperation extends FileBasedSink.WriteOperation<Object, Void> {
+    public DummyWriteOperation(FileBasedSink<Object, Void> sink) {
       super(sink);
     }
 
     @Override
-    public FileBasedSink.Writer<String> createWriter() throws Exception {
+    public FileBasedSink.Writer<Object, Void> createWriter() throws Exception {
       throw new UnsupportedOperationException("Should never be called.");
     }
   }
 
   private static class DummyFilenamePolicy extends FilenamePolicy {
     @Override
-    public ResourceId windowedFilename(
-        ResourceId outputDirectory, WindowedContext c, String extension) {
+    public ResourceId windowedFilename(WindowedContext c, OutputFileHints outputFileHints) {
       throw new UnsupportedOperationException("Should never be called.");
     }
 
     @Nullable
     @Override
-    public ResourceId unwindowedFilename(ResourceId outputDirectory, Context c, String extension) {
+    public ResourceId unwindowedFilename(Context c, OutputFileHints outputFileHints) {
       throw new UnsupportedOperationException("Should never be called.");
     }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/runners/direct-java/src/main/java/org/apache/beam/runners/direct/WriteWithShardingFactory.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/WriteWithShardingFactory.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/WriteWithShardingFactory.java
index d8734a1..ba796ae 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/WriteWithShardingFactory.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/WriteWithShardingFactory.java
@@ -60,9 +60,11 @@ class WriteWithShardingFactory<InputT>
   public PTransformReplacement<PCollection<InputT>, PDone> getReplacementTransform(
       AppliedPTransform<PCollection<InputT>, PDone, PTransform<PCollection<InputT>, PDone>>
           transform) {
-
     try {
-      WriteFiles<InputT> replacement = WriteFiles.to(WriteFilesTranslation.getSink(transform));
+      WriteFiles<InputT, ?, ?> replacement =
+          WriteFiles.to(
+              WriteFilesTranslation.getSink(transform),
+              WriteFilesTranslation.getFormatFunction(transform));
       if (WriteFilesTranslation.isWindowedWrites(transform)) {
         replacement = replacement.withWindowedWrites();
       }

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/runners/direct-java/src/test/java/org/apache/beam/runners/direct/WriteWithShardingFactoryTest.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/WriteWithShardingFactoryTest.java b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/WriteWithShardingFactoryTest.java
index 41d671f..546a181 100644
--- a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/WriteWithShardingFactoryTest.java
+++ b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/WriteWithShardingFactoryTest.java
@@ -39,9 +39,8 @@ import java.util.UUID;
 import org.apache.beam.runners.direct.WriteWithShardingFactory.CalculateShardsFn;
 import org.apache.beam.sdk.coders.VarLongCoder;
 import org.apache.beam.sdk.coders.VoidCoder;
-import org.apache.beam.sdk.io.DefaultFilenamePolicy;
+import org.apache.beam.sdk.io.DynamicFileDestinations;
 import org.apache.beam.sdk.io.FileBasedSink;
-import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
 import org.apache.beam.sdk.io.FileSystems;
 import org.apache.beam.sdk.io.LocalResources;
 import org.apache.beam.sdk.io.TextIO;
@@ -55,6 +54,7 @@ import org.apache.beam.sdk.transforms.Create;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.DoFnTester;
 import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.SerializableFunctions;
 import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionView;
@@ -137,21 +137,17 @@ public class WriteWithShardingFactoryTest implements Serializable {
   @Test
   public void withNoShardingSpecifiedReturnsNewTransform() {
     ResourceId outputDirectory = LocalResources.fromString("/foo", true /* isDirectory */);
-    FilenamePolicy policy =
-        DefaultFilenamePolicy.constructUsingStandardParameters(
-            StaticValueProvider.of(outputDirectory),
-            DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE,
-            "",
-            false);
 
     PTransform<PCollection<Object>, PDone> original =
         WriteFiles.to(
-            new FileBasedSink<Object>(StaticValueProvider.of(outputDirectory), policy) {
+            new FileBasedSink<Object, Void>(
+                StaticValueProvider.of(outputDirectory), DynamicFileDestinations.constant(null)) {
               @Override
-              public WriteOperation<Object> createWriteOperation() {
+              public WriteOperation<Object, Void> createWriteOperation() {
                 throw new IllegalArgumentException("Should not be used");
               }
-            });
+            },
+            SerializableFunctions.identity());
     @SuppressWarnings("unchecked")
     PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
index 5d9f0f3..8935759 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
@@ -1455,8 +1455,9 @@ public class DataflowRunner extends PipelineRunner<DataflowPipelineJob> {
   }
 
   @VisibleForTesting
-  static class StreamingShardedWriteFactory<T>
-      implements PTransformOverrideFactory<PCollection<T>, PDone, WriteFiles<T>> {
+  static class StreamingShardedWriteFactory<UserT, DestinationT, OutputT>
+      implements PTransformOverrideFactory<
+          PCollection<UserT>, PDone, WriteFiles<UserT, DestinationT, OutputT>> {
     // We pick 10 as a a default, as it works well with the default number of workers started
     // by Dataflow.
     static final int DEFAULT_NUM_SHARDS = 10;
@@ -1467,8 +1468,9 @@ public class DataflowRunner extends PipelineRunner<DataflowPipelineJob> {
     }
 
     @Override
-    public PTransformReplacement<PCollection<T>, PDone> getReplacementTransform(
-        AppliedPTransform<PCollection<T>, PDone, WriteFiles<T>> transform) {
+    public PTransformReplacement<PCollection<UserT>, PDone> getReplacementTransform(
+        AppliedPTransform<PCollection<UserT>, PDone, WriteFiles<UserT, DestinationT, OutputT>>
+            transform) {
       // By default, if numShards is not set WriteFiles will produce one file per bundle. In
       // streaming, there are large numbers of small bundles, resulting in many tiny files.
       // Instead we pick max workers * 2 to ensure full parallelism, but prevent too-many files.
@@ -1485,7 +1487,10 @@ public class DataflowRunner extends PipelineRunner<DataflowPipelineJob> {
       }
 
       try {
-        WriteFiles<T> replacement = WriteFiles.to(WriteFilesTranslation.getSink(transform));
+        WriteFiles<UserT, DestinationT, OutputT> replacement =
+            WriteFiles.<UserT, DestinationT, OutputT>to(
+                WriteFilesTranslation.<UserT, DestinationT, OutputT>getSink(transform),
+                WriteFilesTranslation.<UserT, OutputT>getFormatFunction(transform));
         if (WriteFilesTranslation.isWindowedWrites(transform)) {
           replacement = replacement.withWindowedWrites();
         }

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
index bc1a042..94985f8 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
@@ -76,6 +76,7 @@ import org.apache.beam.sdk.coders.VoidCoder;
 import org.apache.beam.sdk.extensions.gcp.auth.NoopCredentialFactory;
 import org.apache.beam.sdk.extensions.gcp.auth.TestCredential;
 import org.apache.beam.sdk.extensions.gcp.storage.NoopPathValidator;
+import org.apache.beam.sdk.io.DynamicFileDestinations;
 import org.apache.beam.sdk.io.FileBasedSink;
 import org.apache.beam.sdk.io.FileSystems;
 import org.apache.beam.sdk.io.TextIO;
@@ -100,6 +101,7 @@ import org.apache.beam.sdk.transforms.Create;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SerializableFunctions;
 import org.apache.beam.sdk.transforms.windowing.Sessions;
 import org.apache.beam.sdk.transforms.windowing.Window;
 import org.apache.beam.sdk.util.GcsUtil;
@@ -1263,30 +1265,39 @@ public class DataflowRunnerTest implements Serializable {
   private void testStreamingWriteOverride(PipelineOptions options, int expectedNumShards) {
     TestPipeline p = TestPipeline.fromOptions(options);
 
-    StreamingShardedWriteFactory<Object> factory =
+    StreamingShardedWriteFactory<Object, Void, Object> factory =
         new StreamingShardedWriteFactory<>(p.getOptions());
-    WriteFiles<Object> original = WriteFiles.to(new TestSink(tmpFolder.toString()));
+    WriteFiles<Object, Void, Object> original =
+        WriteFiles.to(new TestSink(tmpFolder.toString()), SerializableFunctions.identity());
     PCollection<Object> objs = (PCollection) p.apply(Create.empty(VoidCoder.of()));
-    AppliedPTransform<PCollection<Object>, PDone, WriteFiles<Object>> originalApplication =
-        AppliedPTransform.of(
-            "writefiles", objs.expand(), Collections.<TupleTag<?>, PValue>emptyMap(), original, p);
-
-    WriteFiles<Object> replacement = (WriteFiles<Object>)
-        factory.getReplacementTransform(originalApplication).getTransform();
+    AppliedPTransform<PCollection<Object>, PDone, WriteFiles<Object, Void, Object>>
+        originalApplication =
+            AppliedPTransform.of(
+                "writefiles",
+                objs.expand(),
+                Collections.<TupleTag<?>, PValue>emptyMap(),
+                original,
+                p);
+
+    WriteFiles<Object, Void, Object> replacement =
+        (WriteFiles<Object, Void, Object>)
+            factory.getReplacementTransform(originalApplication).getTransform();
     assertThat(replacement, not(equalTo((Object) original)));
     assertThat(replacement.getNumShards().get(), equalTo(expectedNumShards));
   }
 
-  private static class TestSink extends FileBasedSink<Object> {
+  private static class TestSink extends FileBasedSink<Object, Void> {
     @Override
     public void validate(PipelineOptions options) {}
 
     TestSink(String tmpFolder) {
-      super(StaticValueProvider.of(FileSystems.matchNewResource(tmpFolder, true)),
-          null);
+      super(
+          StaticValueProvider.of(FileSystems.matchNewResource(tmpFolder, true)),
+          DynamicFileDestinations.constant(null));
     }
+
     @Override
-    public WriteOperation<Object> createWriteOperation() {
+    public WriteOperation<Object, Void> createWriteOperation() {
       throw new IllegalArgumentException("Should not be used");
     }
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/runners/spark/src/test/java/org/apache/beam/runners/spark/SparkRunnerDebuggerTest.java
----------------------------------------------------------------------
diff --git a/runners/spark/src/test/java/org/apache/beam/runners/spark/SparkRunnerDebuggerTest.java b/runners/spark/src/test/java/org/apache/beam/runners/spark/SparkRunnerDebuggerTest.java
index 64ff98c..246eb81 100644
--- a/runners/spark/src/test/java/org/apache/beam/runners/spark/SparkRunnerDebuggerTest.java
+++ b/runners/spark/src/test/java/org/apache/beam/runners/spark/SparkRunnerDebuggerTest.java
@@ -52,7 +52,6 @@ import org.hamcrest.Matchers;
 import org.joda.time.Duration;
 import org.junit.Test;
 
-
 /**
  * Test {@link SparkRunnerDebugger} with different pipelines.
  */
@@ -85,17 +84,20 @@ public class SparkRunnerDebuggerTest {
         .apply(MapElements.via(new WordCount.FormatAsTextFn()))
         .apply(TextIO.write().to("!!PLACEHOLDER-OUTPUT-DIR!!").withNumShards(3).withSuffix(".txt"));
 
-    final String expectedPipeline = "sparkContext.parallelize(Arrays.asList(...))\n"
-        + "_.mapPartitions(new org.apache.beam.runners.spark.examples.WordCount$ExtractWordsFn())\n"
-        + "_.mapPartitions(new org.apache.beam.sdk.transforms.Count$PerElement$1())\n"
-        + "_.combineByKey(..., new org.apache.beam.sdk.transforms.Count$CountFn(), ...)\n"
-        + "_.groupByKey()\n"
-        + "_.map(new org.apache.beam.sdk.transforms.Sum$SumLongFn())\n"
-        + "_.mapPartitions(new org.apache.beam.runners.spark"
-        + ".SparkRunnerDebuggerTest$PlusOne())\n"
-        + "sparkContext.union(...)\n"
-        + "_.mapPartitions(new org.apache.beam.runners.spark.examples.WordCount$FormatAsTextFn())\n"
-        + "_.<org.apache.beam.sdk.io.AutoValue_TextIO_Write>";
+    final String expectedPipeline =
+        "sparkContext.parallelize(Arrays.asList(...))\n"
+            + "_.mapPartitions("
+            + "new org.apache.beam.runners.spark.examples.WordCount$ExtractWordsFn())\n"
+            + "_.mapPartitions(new org.apache.beam.sdk.transforms.Count$PerElement$1())\n"
+            + "_.combineByKey(..., new org.apache.beam.sdk.transforms.Count$CountFn(), ...)\n"
+            + "_.groupByKey()\n"
+            + "_.map(new org.apache.beam.sdk.transforms.Sum$SumLongFn())\n"
+            + "_.mapPartitions(new org.apache.beam.runners.spark"
+            + ".SparkRunnerDebuggerTest$PlusOne())\n"
+            + "sparkContext.union(...)\n"
+            + "_.mapPartitions("
+            + "new org.apache.beam.runners.spark.examples.WordCount$FormatAsTextFn())\n"
+            + "_.<org.apache.beam.sdk.io.TextIO$Write>";
 
     SparkRunnerDebugger.DebugSparkPipelineResult result =
         (SparkRunnerDebugger.DebugSparkPipelineResult) pipeline.run();

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
----------------------------------------------------------------------
diff --git a/sdks/common/runner-api/src/main/proto/beam_runner_api.proto b/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
index 24e907a..1f74afb 100644
--- a/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
+++ b/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
@@ -367,9 +367,12 @@ message WriteFilesPayload {
   // (Required) The SdkFunctionSpec of the FileBasedSink.
   SdkFunctionSpec sink = 1;
 
-  bool windowed_writes = 2;
+  // (Required) The format function.
+  SdkFunctionSpec format_function = 2;
 
-  bool runner_determined_sharding = 3;
+  bool windowed_writes = 3;
+
+  bool runner_determined_sharding = 4;
 }
 
 // A coder, the binary format for serialization and deserialization of data in

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/ShardedKeyCoder.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/ShardedKeyCoder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/ShardedKeyCoder.java
new file mode 100644
index 0000000..a86b198
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/ShardedKeyCoder.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.sdk.coders;
+
+import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.beam.sdk.values.ShardedKey;
+
+/** A {@link Coder} for {@link ShardedKey}, using a wrapped key {@link Coder}. */
+@VisibleForTesting
+public class ShardedKeyCoder<KeyT> extends StructuredCoder<ShardedKey<KeyT>> {
+  public static <KeyT> ShardedKeyCoder<KeyT> of(Coder<KeyT> keyCoder) {
+    return new ShardedKeyCoder<>(keyCoder);
+  }
+
+  private final Coder<KeyT> keyCoder;
+  private final VarIntCoder shardNumberCoder;
+
+  protected ShardedKeyCoder(Coder<KeyT> keyCoder) {
+    this.keyCoder = keyCoder;
+    this.shardNumberCoder = VarIntCoder.of();
+  }
+
+  @Override
+  public List<? extends Coder<?>> getCoderArguments() {
+    return Arrays.asList(keyCoder);
+  }
+
+  @Override
+  public void encode(ShardedKey<KeyT> key, OutputStream outStream)
+      throws IOException {
+    keyCoder.encode(key.getKey(), outStream);
+    shardNumberCoder.encode(key.getShardNumber(), outStream);
+  }
+
+  @Override
+  public ShardedKey<KeyT> decode(InputStream inStream)
+      throws IOException {
+    return ShardedKey.of(keyCoder.decode(inStream), shardNumberCoder.decode(inStream));
+  }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    keyCoder.verifyDeterministic();
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroIO.java
index 4143db2..89cadbd 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroIO.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroIO.java
@@ -18,7 +18,6 @@
 package org.apache.beam.sdk.io;
 
 import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkState;
 
 import com.google.auto.value.AutoValue;
 import com.google.common.collect.ImmutableMap;
@@ -35,6 +34,7 @@ import org.apache.beam.sdk.annotations.Experimental.Kind;
 import org.apache.beam.sdk.coders.AvroCoder;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.VoidCoder;
+import org.apache.beam.sdk.io.FileBasedSink.DynamicDestinations;
 import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
 import org.apache.beam.sdk.io.Read.Bounded;
 import org.apache.beam.sdk.io.fs.ResourceId;
@@ -43,6 +43,7 @@ import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.SerializableFunctions;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.transforms.display.HasDisplayData;
 import org.apache.beam.sdk.values.PBegin;
@@ -52,18 +53,19 @@ import org.apache.beam.sdk.values.PDone;
 /**
  * {@link PTransform}s for reading and writing Avro files.
  *
- * <p>To read a {@link PCollection} from one or more Avro files, use {@code AvroIO.read()},
- * using {@link AvroIO.Read#from} to specify the filename or filepattern to read from.
- * See {@link FileSystems} for information on supported file systems and filepatterns.
+ * <p>To read a {@link PCollection} from one or more Avro files, use {@code AvroIO.read()}, using
+ * {@link AvroIO.Read#from} to specify the filename or filepattern to read from. See {@link
+ * FileSystems} for information on supported file systems and filepatterns.
  *
- * <p>To read specific records, such as Avro-generated classes, use {@link #read(Class)}.
- * To read {@link GenericRecord GenericRecords}, use {@link #readGenericRecords(Schema)} which takes
- * a {@link Schema} object, or {@link #readGenericRecords(String)} which takes an Avro schema in a
+ * <p>To read specific records, such as Avro-generated classes, use {@link #read(Class)}. To read
+ * {@link GenericRecord GenericRecords}, use {@link #readGenericRecords(Schema)} which takes a
+ * {@link Schema} object, or {@link #readGenericRecords(String)} which takes an Avro schema in a
  * JSON-encoded string form. An exception will be thrown if a record doesn't match the specified
  * schema.
  *
  * <p>For example:
- * <pre> {@code
+ *
+ * <pre>{@code
  * Pipeline p = ...;
  *
  * // A simple Read of a local file (only runs locally):
@@ -75,34 +77,33 @@ import org.apache.beam.sdk.values.PDone;
  * PCollection<GenericRecord> records =
  *     p.apply(AvroIO.readGenericRecords(schema)
  *                .from("gs://my_bucket/path/to/records-*.avro"));
- * } </pre>
+ * }</pre>
  *
  * <p>To write a {@link PCollection} to one or more Avro files, use {@link AvroIO.Write}, using
- * {@code AvroIO.write().to(String)} to specify the output filename prefix. The default
- * {@link DefaultFilenamePolicy} will use this prefix, in conjunction with a
- * {@link ShardNameTemplate} (set via {@link Write#withShardNameTemplate(String)}) and optional
- * filename suffix (set via {@link Write#withSuffix(String)}, to generate output filenames in a
- * sharded way. You can override this default write filename policy using
- * {@link Write#withFilenamePolicy(FileBasedSink.FilenamePolicy)} to specify a custom file naming
- * policy.
+ * {@code AvroIO.write().to(String)} to specify the output filename prefix. The default {@link
+ * DefaultFilenamePolicy} will use this prefix, in conjunction with a {@link ShardNameTemplate} (set
+ * via {@link Write#withShardNameTemplate(String)}) and optional filename suffix (set via {@link
+ * Write#withSuffix(String)}, to generate output filenames in a sharded way. You can override this
+ * default write filename policy using {@link Write#to(FileBasedSink.FilenamePolicy)} to specify a
+ * custom file naming policy.
  *
  * <p>By default, all input is put into the global window before writing. If per-window writes are
- * desired - for example, when using a streaming runner -
- * {@link AvroIO.Write#withWindowedWrites()} will cause windowing and triggering to be
- * preserved. When producing windowed writes, the number of output shards must be set explicitly
- * using {@link AvroIO.Write#withNumShards(int)}; some runners may set this for you to a
- * runner-chosen value, so you may need not set it yourself. A
- * {@link FileBasedSink.FilenamePolicy} must be set, and unique windows and triggers must produce
- * unique filenames.
+ * desired - for example, when using a streaming runner - {@link AvroIO.Write#withWindowedWrites()}
+ * will cause windowing and triggering to be preserved. When producing windowed writes with a
+ * streaming runner that supports triggers, the number of output shards must be set explicitly using
+ * {@link AvroIO.Write#withNumShards(int)}; some runners may set this for you to a runner-chosen
+ * value, so you may need not set it yourself. A {@link FileBasedSink.FilenamePolicy} must be set,
+ * and unique windows and triggers must produce unique filenames.
  *
- * <p>To write specific records, such as Avro-generated classes, use {@link #write(Class)}.
- * To write {@link GenericRecord GenericRecords}, use either {@link #writeGenericRecords(Schema)}
- * which takes a {@link Schema} object, or {@link #writeGenericRecords(String)} which takes a schema
- * in a JSON-encoded string form. An exception will be thrown if a record doesn't match the
- * specified schema.
+ * <p>To write specific records, such as Avro-generated classes, use {@link #write(Class)}. To write
+ * {@link GenericRecord GenericRecords}, use either {@link #writeGenericRecords(Schema)} which takes
+ * a {@link Schema} object, or {@link #writeGenericRecords(String)} which takes a schema in a
+ * JSON-encoded string form. An exception will be thrown if a record doesn't match the specified
+ * schema.
  *
  * <p>For example:
- * <pre> {@code
+ *
+ * <pre>{@code
  * // A simple Write to a local file (only runs locally):
  * PCollection<AvroAutoGenClass> records = ...;
  * records.apply(AvroIO.write(AvroAutoGenClass.class).to("/path/to/file.avro"));
@@ -113,11 +114,11 @@ import org.apache.beam.sdk.values.PDone;
  * records.apply("WriteToAvro", AvroIO.writeGenericRecords(schema)
  *     .to("gs://my_bucket/path/to/numbers")
  *     .withSuffix(".avro"));
- * } </pre>
+ * }</pre>
  *
- * <p>By default, {@link AvroIO.Write} produces output files that are compressed using the
- * {@link org.apache.avro.file.Codec CodecFactory.deflateCodec(6)}. This default can
- * be changed or overridden using {@link AvroIO.Write#withCodec}.
+ * <p>By default, {@link AvroIO.Write} produces output files that are compressed using the {@link
+ * org.apache.avro.file.Codec CodecFactory.deflateCodec(6)}. This default can be changed or
+ * overridden using {@link AvroIO.Write#withCodec}.
  */
 public class AvroIO {
   /**
@@ -258,11 +259,16 @@ public class AvroIO {
     @Nullable abstract ValueProvider<ResourceId> getFilenamePrefix();
     @Nullable abstract String getShardTemplate();
     @Nullable abstract String getFilenameSuffix();
+
+    @Nullable
+    abstract ValueProvider<ResourceId> getTempDirectory();
+
     abstract int getNumShards();
     @Nullable abstract Class<T> getRecordClass();
     @Nullable abstract Schema getSchema();
     abstract boolean getWindowedWrites();
     @Nullable abstract FilenamePolicy getFilenamePolicy();
+
     /**
      * The codec used to encode the blocks in the Avro file. String value drawn from those in
      * https://avro.apache.org/docs/1.7.7/api/java/org/apache/avro/file/CodecFactory.html
@@ -277,6 +283,9 @@ public class AvroIO {
     abstract static class Builder<T> {
       abstract Builder<T> setFilenamePrefix(ValueProvider<ResourceId> filenamePrefix);
       abstract Builder<T> setFilenameSuffix(String filenameSuffix);
+
+      abstract Builder<T> setTempDirectory(ValueProvider<ResourceId> tempDirectory);
+
       abstract Builder<T> setNumShards(int numShards);
       abstract Builder<T> setShardTemplate(String shardTemplate);
       abstract Builder<T> setRecordClass(Class<T> recordClass);
@@ -296,9 +305,9 @@ public class AvroIO {
      * <p>The name of the output files will be determined by the {@link FilenamePolicy} used.
      *
      * <p>By default, a {@link DefaultFilenamePolicy} will build output filenames using the
-     * specified prefix, a shard name template (see {@link #withShardNameTemplate(String)}, and
-     * a common suffix (if supplied using {@link #withSuffix(String)}). This default can be
-     * overridden using {@link #withFilenamePolicy(FilenamePolicy)}.
+     * specified prefix, a shard name template (see {@link #withShardNameTemplate(String)}, and a
+     * common suffix (if supplied using {@link #withSuffix(String)}). This default can be overridden
+     * using {@link #to(FilenamePolicy)}.
      */
     public Write<T> to(String outputPrefix) {
       return to(FileBasedSink.convertToFileResourceIfPossible(outputPrefix));
@@ -306,14 +315,21 @@ public class AvroIO {
 
     /**
      * Writes to file(s) with the given output prefix. See {@link FileSystems} for information on
-     * supported file systems.
-     *
-     * <p>The name of the output files will be determined by the {@link FilenamePolicy} used.
+     * supported file systems. This prefix is used by the {@link DefaultFilenamePolicy} to generate
+     * filenames.
      *
      * <p>By default, a {@link DefaultFilenamePolicy} will build output filenames using the
-     * specified prefix, a shard name template (see {@link #withShardNameTemplate(String)}, and
-     * a common suffix (if supplied using {@link #withSuffix(String)}). This default can be
-     * overridden using {@link #withFilenamePolicy(FilenamePolicy)}.
+     * specified prefix, a shard name template (see {@link #withShardNameTemplate(String)}, and a
+     * common suffix (if supplied using {@link #withSuffix(String)}). This default can be overridden
+     * using {@link #to(FilenamePolicy)}.
+     *
+     * <p>This default policy can be overridden using {@link #to(FilenamePolicy)}, in which case
+     * {@link #withShardNameTemplate(String)} and {@link #withSuffix(String)} should not be set.
+     * Custom filename policies do not automatically see this prefix - you should explicitly pass
+     * the prefix into your {@link FilenamePolicy} object if you need this.
+     *
+     * <p>If {@link #withTempDirectory} has not been called, this filename prefix will be used to
+     * infer a directory for temporary files.
      */
     @Experimental(Kind.FILESYSTEM)
     public Write<T> to(ResourceId outputPrefix) {
@@ -342,15 +358,22 @@ public class AvroIO {
     }
 
     /**
-     * Configures the {@link FileBasedSink.FilenamePolicy} that will be used to name written files.
+     * Writes to files named according to the given {@link FileBasedSink.FilenamePolicy}. A
+     * directory for temporary files must be specified using {@link #withTempDirectory}.
      */
-    public Write<T> withFilenamePolicy(FilenamePolicy filenamePolicy) {
+    public Write<T> to(FilenamePolicy filenamePolicy) {
       return toBuilder().setFilenamePolicy(filenamePolicy).build();
     }
 
+    /** Set the base directory used to generate temporary files. */
+    @Experimental(Kind.FILESYSTEM)
+    public Write<T> withTempDirectory(ValueProvider<ResourceId> tempDirectory) {
+      return toBuilder().setTempDirectory(tempDirectory).build();
+    }
+
     /**
      * Uses the given {@link ShardNameTemplate} for naming output files. This option may only be
-     * used when {@link #withFilenamePolicy(FilenamePolicy)} has not been configured.
+     * used when using one of the default filename-prefix to() overrides.
      *
      * <p>See {@link DefaultFilenamePolicy} for how the prefix, shard name template, and suffix are
      * used.
@@ -360,8 +383,8 @@ public class AvroIO {
     }
 
     /**
-     * Configures the filename suffix for written files. This option may only be used when
-     * {@link #withFilenamePolicy(FilenamePolicy)} has not been configured.
+     * Configures the filename suffix for written files. This option may only be used when using one
+     * of the default filename-prefix to() overrides.
      *
      * <p>See {@link DefaultFilenamePolicy} for how the prefix, shard name template, and suffix are
      * used.
@@ -402,9 +425,8 @@ public class AvroIO {
     /**
      * Preserves windowing of input elements and writes them to files based on the element's window.
      *
-     * <p>Requires use of {@link #withFilenamePolicy(FileBasedSink.FilenamePolicy)}. Filenames will
-     * be generated using {@link FilenamePolicy#windowedFilename}. See also
-     * {@link WriteFiles#withWindowedWrites()}.
+     * <p>If using {@link #to(FileBasedSink.FilenamePolicy)}. Filenames will be generated using
+     * {@link FilenamePolicy#windowedFilename}. See also {@link WriteFiles#withWindowedWrites()}.
      */
     public Write<T> withWindowedWrites() {
       return toBuilder().setWindowedWrites(true).build();
@@ -435,32 +457,46 @@ public class AvroIO {
       return toBuilder().setMetadata(ImmutableMap.copyOf(metadata)).build();
     }
 
-    @Override
-    public PDone expand(PCollection<T> input) {
-      checkState(getFilenamePrefix() != null,
-          "Need to set the filename prefix of an AvroIO.Write transform.");
-      checkState(
-          (getFilenamePolicy() == null)
-              || (getShardTemplate() == null && getFilenameSuffix() == null),
-          "Cannot set a filename policy and also a filename template or suffix.");
-      checkState(getSchema() != null,
-          "Need to set the schema of an AvroIO.Write transform.");
-      checkState(!getWindowedWrites() || (getFilenamePolicy() != null),
-          "When using windowed writes, a filename policy must be set via withFilenamePolicy().");
-
+    DynamicDestinations<T, Void> resolveDynamicDestinations() {
       FilenamePolicy usedFilenamePolicy = getFilenamePolicy();
       if (usedFilenamePolicy == null) {
-        usedFilenamePolicy = DefaultFilenamePolicy.constructUsingStandardParameters(
-            getFilenamePrefix(), getShardTemplate(), getFilenameSuffix(), getWindowedWrites());
+        usedFilenamePolicy =
+            DefaultFilenamePolicy.fromStandardParameters(
+                getFilenamePrefix(), getShardTemplate(), getFilenameSuffix(), getWindowedWrites());
+      }
+      return DynamicFileDestinations.constant(usedFilenamePolicy);
+    }
+
+    @Override
+    public PDone expand(PCollection<T> input) {
+      checkArgument(
+          getFilenamePrefix() != null || getTempDirectory() != null,
+          "Need to set either the filename prefix or the tempDirectory of a AvroIO.Write "
+              + "transform.");
+      if (getFilenamePolicy() != null) {
+        checkArgument(
+            getShardTemplate() == null && getFilenameSuffix() == null,
+            "shardTemplate and filenameSuffix should only be used with the default "
+                + "filename policy");
       }
+      return expandTyped(input, resolveDynamicDestinations());
+    }
 
-      WriteFiles<T> write = WriteFiles.to(
-            new AvroSink<>(
-                getFilenamePrefix(),
-                usedFilenamePolicy,
-                AvroCoder.of(getRecordClass(), getSchema()),
-                getCodec(),
-                getMetadata()));
+    public <DestinationT> PDone expandTyped(
+        PCollection<T> input, DynamicDestinations<T, DestinationT> dynamicDestinations) {
+      ValueProvider<ResourceId> tempDirectory = getTempDirectory();
+      if (tempDirectory == null) {
+        tempDirectory = getFilenamePrefix();
+      }
+      WriteFiles<T, DestinationT, T> write =
+          WriteFiles.to(
+              new AvroSink<>(
+                  tempDirectory,
+                  dynamicDestinations,
+                  AvroCoder.of(getRecordClass(), getSchema()),
+                  getCodec(),
+                  getMetadata()),
+              SerializableFunctions.<T>identity());
       if (getNumShards() > 0) {
         write = write.withNumShards(getNumShards());
       }
@@ -473,31 +509,25 @@ public class AvroIO {
     @Override
     public void populateDisplayData(DisplayData.Builder builder) {
       super.populateDisplayData(builder);
-      checkState(
-          getFilenamePrefix() != null,
-          "Unable to populate DisplayData for invalid AvroIO.Write (unset output prefix).");
-      String outputPrefixString = null;
-      if (getFilenamePrefix().isAccessible()) {
-        ResourceId dir = getFilenamePrefix().get();
-        outputPrefixString = dir.toString();
-      } else {
-        outputPrefixString = getFilenamePrefix().toString();
+      resolveDynamicDestinations().populateDisplayData(builder);
+
+      String tempDirectory = null;
+      if (getTempDirectory() != null) {
+        tempDirectory =
+            getTempDirectory().isAccessible()
+                ? getTempDirectory().get().toString()
+                : getTempDirectory().toString();
       }
       builder
-          .add(DisplayData.item("schema", getRecordClass())
-            .withLabel("Record Schema"))
-          .addIfNotNull(DisplayData.item("filePrefix", outputPrefixString)
-            .withLabel("Output File Prefix"))
-          .addIfNotNull(DisplayData.item("shardNameTemplate", getShardTemplate())
-              .withLabel("Output Shard Name Template"))
-          .addIfNotNull(DisplayData.item("fileSuffix", getFilenameSuffix())
-              .withLabel("Output File Suffix"))
-          .addIfNotDefault(DisplayData.item("numShards", getNumShards())
-              .withLabel("Maximum Output Shards"),
-              0)
-          .addIfNotDefault(DisplayData.item("codec", getCodec().toString())
-              .withLabel("Avro Compression Codec"),
-              DEFAULT_CODEC.toString());
+          .add(DisplayData.item("schema", getRecordClass()).withLabel("Record Schema"))
+          .addIfNotDefault(
+              DisplayData.item("numShards", getNumShards()).withLabel("Maximum Output Shards"), 0)
+          .addIfNotDefault(
+              DisplayData.item("codec", getCodec().toString()).withLabel("Avro Compression Codec"),
+              DEFAULT_CODEC.toString())
+          .addIfNotNull(
+              DisplayData.item("tempDirectory", tempDirectory)
+                  .withLabel("Directory for temporary files"));
       builder.include("Metadata", new Metadata());
     }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSink.java
index 6c36266..c78870b 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSink.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSink.java
@@ -32,39 +32,40 @@ import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.util.MimeTypes;
 
 /** A {@link FileBasedSink} for Avro files. */
-class AvroSink<T> extends FileBasedSink<T> {
+class AvroSink<T, DestinationT> extends FileBasedSink<T, DestinationT> {
   private final AvroCoder<T> coder;
   private final SerializableAvroCodecFactory codec;
   private final ImmutableMap<String, Object> metadata;
 
   AvroSink(
       ValueProvider<ResourceId> outputPrefix,
-      FilenamePolicy filenamePolicy,
+      DynamicDestinations<T, DestinationT> dynamicDestinations,
       AvroCoder<T> coder,
       SerializableAvroCodecFactory codec,
       ImmutableMap<String, Object> metadata) {
     // Avro handle compression internally using the codec.
-    super(outputPrefix, filenamePolicy, CompressionType.UNCOMPRESSED);
+    super(outputPrefix, dynamicDestinations, CompressionType.UNCOMPRESSED);
     this.coder = coder;
     this.codec = codec;
     this.metadata = metadata;
   }
 
   @Override
-  public WriteOperation<T> createWriteOperation() {
+  public WriteOperation<T, DestinationT> createWriteOperation() {
     return new AvroWriteOperation<>(this, coder, codec, metadata);
   }
 
   /** A {@link WriteOperation WriteOperation} for Avro files. */
-  private static class AvroWriteOperation<T> extends WriteOperation<T> {
+  private static class AvroWriteOperation<T, DestinationT> extends WriteOperation<T, DestinationT> {
     private final AvroCoder<T> coder;
     private final SerializableAvroCodecFactory codec;
     private final ImmutableMap<String, Object> metadata;
 
-    private AvroWriteOperation(AvroSink<T> sink,
-                               AvroCoder<T> coder,
-                               SerializableAvroCodecFactory codec,
-                               ImmutableMap<String, Object> metadata) {
+    private AvroWriteOperation(
+        AvroSink<T, DestinationT> sink,
+        AvroCoder<T> coder,
+        SerializableAvroCodecFactory codec,
+        ImmutableMap<String, Object> metadata) {
       super(sink);
       this.coder = coder;
       this.codec = codec;
@@ -72,22 +73,23 @@ class AvroSink<T> extends FileBasedSink<T> {
     }
 
     @Override
-    public Writer<T> createWriter() throws Exception {
+    public Writer<T, DestinationT> createWriter() throws Exception {
       return new AvroWriter<>(this, coder, codec, metadata);
     }
   }
 
   /** A {@link Writer Writer} for Avro files. */
-  private static class AvroWriter<T> extends Writer<T> {
+  private static class AvroWriter<T, DestinationT> extends Writer<T, DestinationT> {
     private final AvroCoder<T> coder;
     private DataFileWriter<T> dataFileWriter;
     private SerializableAvroCodecFactory codec;
     private final ImmutableMap<String, Object> metadata;
 
-    public AvroWriter(WriteOperation<T> writeOperation,
-                      AvroCoder<T> coder,
-                      SerializableAvroCodecFactory codec,
-                      ImmutableMap<String, Object> metadata) {
+    public AvroWriter(
+        WriteOperation<T, DestinationT> writeOperation,
+        AvroCoder<T> coder,
+        SerializableAvroCodecFactory codec,
+        ImmutableMap<String, Object> metadata) {
       super(writeOperation, MimeTypes.BINARY);
       this.coder = coder;
       this.codec = codec;

http://git-wip-us.apache.org/repos/asf/beam/blob/4c336e84/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DefaultFilenamePolicy.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DefaultFilenamePolicy.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DefaultFilenamePolicy.java
index f9e4ac4..7a60e49 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DefaultFilenamePolicy.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/DefaultFilenamePolicy.java
@@ -20,25 +20,31 @@ package org.apache.beam.sdk.io;
 import static com.google.common.base.MoreObjects.firstNonNull;
 
 import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
 import java.text.DecimalFormat;
 import java.util.Arrays;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import javax.annotation.Nullable;
+import org.apache.beam.sdk.coders.AtomicCoder;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
+import org.apache.beam.sdk.io.FileBasedSink.OutputFileHints;
 import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
 import org.apache.beam.sdk.io.fs.ResourceId;
 import org.apache.beam.sdk.options.ValueProvider;
-import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
-import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
 import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
 import org.apache.beam.sdk.transforms.windowing.PaneInfo;
 import org.apache.beam.sdk.transforms.windowing.PaneInfo.Timing;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 /**
  * A default {@link FilenamePolicy} for windowed and unwindowed files. This policy is constructed
@@ -51,10 +57,7 @@ import org.slf4j.LoggerFactory;
  * {@code WriteOneFilePerWindow} example pipeline.
  */
 public final class DefaultFilenamePolicy extends FilenamePolicy {
-
-  private static final Logger LOG = LoggerFactory.getLogger(DefaultFilenamePolicy.class);
-
-  /** The default sharding name template used in {@link #constructUsingStandardParameters}. */
+  /** The default sharding name template. */
   public static final String DEFAULT_UNWINDOWED_SHARD_TEMPLATE = ShardNameTemplate.INDEX_OF_MAX;
 
   /** The default windowed sharding name template used when writing windowed files.
@@ -67,75 +70,184 @@ public final class DefaultFilenamePolicy extends FilenamePolicy {
       "W-P" + DEFAULT_UNWINDOWED_SHARD_TEMPLATE;
 
   /*
-   * pattern for both windowed and non-windowed file names
+   * pattern for both windowed and non-windowed file names.
    */
   private static final Pattern SHARD_FORMAT_RE = Pattern.compile("(S+|N+|W|P)");
 
   /**
+   * Encapsulates constructor parameters to {@link DefaultFilenamePolicy}.
+   *
+   * <p>This is used as the {@code DestinationT} argument to allow {@link DefaultFilenamePolicy}
+   * objects to be dynamically generated.
+   */
+  public static class Params implements Serializable {
+    private final ValueProvider<ResourceId> baseFilename;
+    private final String shardTemplate;
+    private final boolean explicitTemplate;
+    private final String suffix;
+
+    /**
+     * Construct a default Params object. The shard template will be set to the default {@link
+     * #DEFAULT_UNWINDOWED_SHARD_TEMPLATE} value.
+     */
+    public Params() {
+      this.baseFilename = null;
+      this.shardTemplate = DEFAULT_UNWINDOWED_SHARD_TEMPLATE;
+      this.suffix = "";
+      this.explicitTemplate = false;
+    }
+
+    private Params(
+        ValueProvider<ResourceId> baseFilename,
+        String shardTemplate,
+        String suffix,
+        boolean explicitTemplate) {
+      this.baseFilename = baseFilename;
+      this.shardTemplate = shardTemplate;
+      this.suffix = suffix;
+      this.explicitTemplate = explicitTemplate;
+    }
+
+    /**
+     * Specify that writes are windowed. This affects the default shard template, changing it to
+     * {@link #DEFAULT_WINDOWED_SHARD_TEMPLATE}.
+     */
+    public Params withWindowedWrites() {
+      String template = this.shardTemplate;
+      if (!explicitTemplate) {
+        template = DEFAULT_WINDOWED_SHARD_TEMPLATE;
+      }
+      return new Params(baseFilename, template, suffix, explicitTemplate);
+    }
+
+    /** Sets the base filename. */
+    public Params withBaseFilename(ResourceId baseFilename) {
+      return withBaseFilename(StaticValueProvider.of(baseFilename));
+    }
+
+    /** Like {@link #withBaseFilename(ResourceId)}, but takes in a {@link ValueProvider}. */
+    public Params withBaseFilename(ValueProvider<ResourceId> baseFilename) {
+      return new Params(baseFilename, shardTemplate, suffix, explicitTemplate);
+    }
+
+    /** Sets the shard template. */
+    public Params withShardTemplate(String shardTemplate) {
+      return new Params(baseFilename, shardTemplate, suffix, true);
+    }
+
+    /** Sets the suffix. */
+    public Params withSuffix(String suffix) {
+      return new Params(baseFilename, shardTemplate, suffix, explicitTemplate);
+    }
+  }
+
+  /** A Coder for {@link Params}. */
+  public static class ParamsCoder extends AtomicCoder<Params> {
+    private static final ParamsCoder INSTANCE = new ParamsCoder();
+    private Coder<String> stringCoder = StringUtf8Coder.of();
+
+    public static ParamsCoder of() {
+      return INSTANCE;
+    }
+
+    @Override
+    public void encode(Params value, OutputStream outStream) throws IOException {
+      if (value == null) {
+        throw new CoderException("cannot encode a null value");
+      }
+      stringCoder.encode(value.baseFilename.get().toString(), outStream);
+      stringCoder.encode(value.shardTemplate, outStream);
+      stringCoder.encode(value.suffix, outStream);
+    }
+
+    @Override
+    public Params decode(InputStream inStream) throws IOException {
+      ResourceId prefix =
+          FileBasedSink.convertToFileResourceIfPossible(stringCoder.decode(inStream));
+      String shardTemplate = stringCoder.decode(inStream);
+      String suffix = stringCoder.decode(inStream);
+      return new Params()
+          .withBaseFilename(prefix)
+          .withShardTemplate(shardTemplate)
+          .withSuffix(suffix);
+    }
+  }
+
+  private final Params params;
+  /**
    * Constructs a new {@link DefaultFilenamePolicy}.
    *
    * @see DefaultFilenamePolicy for more information on the arguments to this function.
    */
   @VisibleForTesting
-  DefaultFilenamePolicy(ValueProvider<String> prefix, String shardTemplate, String suffix) {
-    this.prefix = prefix;
-    this.shardTemplate = shardTemplate;
-    this.suffix = suffix;
+  DefaultFilenamePolicy(Params params) {
+    this.params = params;
   }
 
   /**
-   * A helper function to construct a {@link DefaultFilenamePolicy} using the standard filename
-   * parameters, namely a provided {@link ResourceId} for the output prefix, and possibly-null
-   * shard name template and suffix.
+   * Construct a {@link DefaultFilenamePolicy}.
    *
-   * <p>Any filename component of the provided resource will be used as the filename prefix.
+   * <p>This is a shortcut for:
    *
-   * <p>If provided, the shard name template will be used; otherwise
-   * {@link #DEFAULT_UNWINDOWED_SHARD_TEMPLATE} will be used for non-windowed file names and
-   * {@link #DEFAULT_WINDOWED_SHARD_TEMPLATE} will be used for windowed file names.
+   * <pre>{@code
+   *   DefaultFilenamePolicy.fromParams(new Params()
+   *     .withBaseFilename(baseFilename)
+   *     .withShardTemplate(shardTemplate)
+   *     .withSuffix(filenameSuffix)
+   *     .withWindowedWrites())
+   * }</pre>
    *
-   * <p>If provided, the suffix will be used; otherwise the files will have an empty suffix.
+   * <p>Where the respective {@code with} methods are invoked only if the value is non-null or true.
    */
-  public static DefaultFilenamePolicy constructUsingStandardParameters(
-      ValueProvider<ResourceId> outputPrefix,
+  public static DefaultFilenamePolicy fromStandardParameters(
+      ValueProvider<ResourceId> baseFilename,
       @Nullable String shardTemplate,
       @Nullable String filenameSuffix,
       boolean windowedWrites) {
-    // Pick the appropriate default policy based on whether windowed writes are being performed.
-    String defaultTemplate =
-        windowedWrites ? DEFAULT_WINDOWED_SHARD_TEMPLATE : DEFAULT_UNWINDOWED_SHARD_TEMPLATE;
-    return new DefaultFilenamePolicy(
-        NestedValueProvider.of(outputPrefix, new ExtractFilename()),
-        firstNonNull(shardTemplate, defaultTemplate),
-        firstNonNull(filenameSuffix, ""));
+    Params params = new Params().withBaseFilename(baseFilename);
+    if (shardTemplate != null) {
+      params = params.withShardTemplate(shardTemplate);
+    }
+    if (filenameSuffix != null) {
+      params = params.withSuffix(filenameSuffix);
+    }
+    if (windowedWrites) {
+      params = params.withWindowedWrites();
+    }
+    return fromParams(params);
   }
 
-  private final ValueProvider<String> prefix;
-  private final String shardTemplate;
-  private final String suffix;
+  /** Construct a {@link DefaultFilenamePolicy} from a {@link Params} object. */
+  public static DefaultFilenamePolicy fromParams(Params params) {
+    return new DefaultFilenamePolicy(params);
+  }
 
   /**
    * Constructs a fully qualified name from components.
    *
-   * <p>The name is built from a prefix, shard template (with shard numbers
-   * applied), and a suffix.  All components are required, but may be empty
-   * strings.
+   * <p>The name is built from a base filename, shard template (with shard numbers applied), and a
+   * suffix. All components are required, but may be empty strings.
    *
-   * <p>Within a shard template, repeating sequences of the letters "S" or "N"
-   * are replaced with the shard number, or number of shards respectively.
-   * "P" is replaced with by stringification of current pane.
-   * "W" is replaced by stringification of current window.
+   * <p>Within a shard template, repeating sequences of the letters "S" or "N" are replaced with the
+   * shard number, or number of shards respectively. "P" is replaced with by stringification of
+   * current pane. "W" is replaced by stringification of current window.
    *
-   * <p>The numbers are formatted with leading zeros to match the length of the
-   * repeated sequence of letters.
+   * <p>The numbers are formatted with leading zeros to match the length of the repeated sequence of
+   * letters.
    *
-   * <p>For example, if prefix = "output", shardTemplate = "-SSS-of-NNN", and
-   * suffix = ".txt", with shardNum = 1 and numShards = 100, the following is
-   * produced:  "output-001-of-100.txt".
+   * <p>For example, if baseFilename = "path/to/output", shardTemplate = "-SSS-of-NNN", and suffix =
+   * ".txt", with shardNum = 1 and numShards = 100, the following is produced:
+   * "path/to/output-001-of-100.txt".
    */
-  static String constructName(
-      String prefix, String shardTemplate, String suffix, int shardNum, int numShards,
-      String paneStr, String windowStr) {
+  static ResourceId constructName(
+      ResourceId baseFilename,
+      String shardTemplate,
+      String suffix,
+      int shardNum,
+      int numShards,
+      String paneStr,
+      String windowStr) {
+    String prefix = extractFilename(baseFilename);
     // Matcher API works with StringBuffer, rather than StringBuilder.
     StringBuffer sb = new StringBuffer();
     sb.append(prefix);
@@ -165,27 +277,37 @@ public final class DefaultFilenamePolicy extends FilenamePolicy {
     m.appendTail(sb);
 
     sb.append(suffix);
-    return sb.toString();
+    return baseFilename
+        .getCurrentDirectory()
+        .resolve(sb.toString(), StandardResolveOptions.RESOLVE_FILE);
   }
 
   @Override
   @Nullable
-  public ResourceId unwindowedFilename(ResourceId outputDirectory, Context context,
-      String extension) {
-    String filename = constructName(prefix.get(), shardTemplate, suffix, context.getShardNumber(),
-        context.getNumShards(), null, null) + extension;
-    return outputDirectory.resolve(filename, StandardResolveOptions.RESOLVE_FILE);
+  public ResourceId unwindowedFilename(Context context, OutputFileHints outputFileHints) {
+    return constructName(
+        params.baseFilename.get(),
+        params.shardTemplate,
+        params.suffix + outputFileHints.getSuggestedFilenameSuffix(),
+        context.getShardNumber(),
+        context.getNumShards(),
+        null,
+        null);
   }
 
   @Override
-  public ResourceId windowedFilename(ResourceId outputDirectory,
-      WindowedContext context, String extension) {
+  public ResourceId windowedFilename(WindowedContext context, OutputFileHints outputFileHints) {
     final PaneInfo paneInfo = context.getPaneInfo();
     String paneStr = paneInfoToString(paneInfo);
     String windowStr = windowToString(context.getWindow());
-    String filename = constructName(prefix.get(), shardTemplate, suffix, context.getShardNumber(),
-        context.getNumShards(), paneStr, windowStr) + extension;
-    return outputDirectory.resolve(filename, StandardResolveOptions.RESOLVE_FILE);
+    return constructName(
+        params.baseFilename.get(),
+        params.shardTemplate,
+        params.suffix + outputFileHints.getSuggestedFilenameSuffix(),
+        context.getShardNumber(),
+        context.getNumShards(),
+        paneStr,
+        windowStr);
   }
 
   /*
@@ -216,24 +338,32 @@ public final class DefaultFilenamePolicy extends FilenamePolicy {
   @Override
   public void populateDisplayData(DisplayData.Builder builder) {
     String filenamePattern;
-    if (prefix.isAccessible()) {
-      filenamePattern = String.format("%s%s%s", prefix.get(), shardTemplate, suffix);
+    if (params.baseFilename.isAccessible()) {
+      filenamePattern =
+          String.format("%s%s%s", params.baseFilename.get(), params.shardTemplate, params.suffix);
     } else {
-      filenamePattern = String.format("%s%s%s", prefix, shardTemplate, suffix);
+      filenamePattern =
+          String.format("%s%s%s", params.baseFilename, params.shardTemplate, params.suffix);
     }
+
+    String outputPrefixString = null;
+    outputPrefixString =
+        params.baseFilename.isAccessible()
+            ? params.baseFilename.get().toString()
+            : params.baseFilename.toString();
+    builder.add(DisplayData.item("filenamePattern", filenamePattern).withLabel("Filename Pattern"));
+    builder.add(DisplayData.item("filePrefix", outputPrefixString).withLabel("Output File Prefix"));
+    builder.add(DisplayData.item("fileSuffix", params.suffix).withLabel("Output file Suffix"));
     builder.add(
-        DisplayData.item("filenamePattern", filenamePattern)
-            .withLabel("Filename Pattern"));
+        DisplayData.item("shardNameTemplate", params.shardTemplate)
+            .withLabel("Output Shard Name Template"));
   }
 
-  private static class ExtractFilename implements SerializableFunction<ResourceId, String> {
-    @Override
-    public String apply(ResourceId input) {
-      if (input.isDirectory()) {
-        return "";
-      } else {
-        return firstNonNull(input.getFilename(), "");
-      }
+  private static String extractFilename(ResourceId input) {
+    if (input.isDirectory()) {
+      return "";
+    } else {
+      return firstNonNull(input.getFilename(), "");
     }
   }
 }


[13/50] [abbrv] beam git commit: Fix javadoc generation for AmqpIO, CassandraIO and HCatalogIO

Posted by ta...@apache.org.
Fix javadoc generation for AmqpIO, CassandraIO and HCatalogIO


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/699d59a9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/699d59a9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/699d59a9

Branch: refs/heads/DSL_SQL
Commit: 699d59a96e8e11d8f617e76657e22d4afe2bfa12
Parents: 8512153
Author: Ismaël Mejía <ie...@gmail.com>
Authored: Thu Jul 6 17:37:33 2017 +0200
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 pom.xml                   | 18 ++++++++++++++++++
 sdks/java/javadoc/pom.xml | 15 +++++++++++++++
 2 files changed, 33 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/699d59a9/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index fd01781..987760f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -429,6 +429,18 @@
 
       <dependency>
         <groupId>org.apache.beam</groupId>
+        <artifactId>beam-sdks-java-io-amqp</artifactId>
+        <version>${project.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.beam</groupId>
+        <artifactId>beam-sdks-java-io-cassandra</artifactId>
+        <version>${project.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.beam</groupId>
         <artifactId>beam-sdks-java-io-elasticsearch</artifactId>
         <version>${project.version}</version>
       </dependency>
@@ -466,6 +478,12 @@
 
       <dependency>
         <groupId>org.apache.beam</groupId>
+        <artifactId>beam-sdks-java-io-hcatalog</artifactId>
+        <version>${project.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.beam</groupId>
         <artifactId>beam-sdks-java-io-jdbc</artifactId>
         <version>${project.version}</version>
       </dependency>

http://git-wip-us.apache.org/repos/asf/beam/blob/699d59a9/sdks/java/javadoc/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/javadoc/pom.xml b/sdks/java/javadoc/pom.xml
index ddb92cf..51109fb 100644
--- a/sdks/java/javadoc/pom.xml
+++ b/sdks/java/javadoc/pom.xml
@@ -99,6 +99,16 @@
 
     <dependency>
       <groupId>org.apache.beam</groupId>
+      <artifactId>beam-sdks-java-io-amqp</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-sdks-java-io-cassandra</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.beam</groupId>
       <artifactId>beam-sdks-java-io-elasticsearch</artifactId>
     </dependency>
 
@@ -124,6 +134,11 @@
 
     <dependency>
       <groupId>org.apache.beam</groupId>
+      <artifactId>beam-sdks-java-io-hcatalog</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.beam</groupId>
       <artifactId>beam-sdks-java-io-jdbc</artifactId>
     </dependency>
 


[09/50] [abbrv] beam git commit: Fix PValue input in _PubSubReadEvaluator

Posted by ta...@apache.org.
Fix PValue input in _PubSubReadEvaluator


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/da3206c6
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/da3206c6
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/da3206c6

Branch: refs/heads/DSL_SQL
Commit: da3206c61d3e0c59ef8ac2cac85e2097f5db116a
Parents: d4fa33e
Author: Charles Chen <cc...@google.com>
Authored: Wed Jul 5 16:18:51 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:59 2017 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/runners/direct/transform_evaluator.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/da3206c6/sdks/python/apache_beam/runners/direct/transform_evaluator.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/direct/transform_evaluator.py b/sdks/python/apache_beam/runners/direct/transform_evaluator.py
index 641291d..cb2ace2 100644
--- a/sdks/python/apache_beam/runners/direct/transform_evaluator.py
+++ b/sdks/python/apache_beam/runners/direct/transform_evaluator.py
@@ -436,8 +436,9 @@ class _PubSubReadEvaluator(_TransformEvaluator):
       bundles = [bundle]
     else:
       bundles = []
-    input_pvalue = self._applied_ptransform.inputs
-    if not input_pvalue:
+    if self._applied_ptransform.inputs:
+      input_pvalue = self._applied_ptransform.inputs[0]
+    else:
       input_pvalue = pvalue.PBegin(self._applied_ptransform.transform.pipeline)
     unprocessed_bundle = self._evaluation_context.create_bundle(
         input_pvalue)


[17/50] [abbrv] beam git commit: Move DirectRunner knob for suppressing runner-determined sharding out of core SDK

Posted by ta...@apache.org.
Move DirectRunner knob for suppressing runner-determined sharding out of core SDK


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/81a96ab0
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/81a96ab0
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/81a96ab0

Branch: refs/heads/DSL_SQL
Commit: 81a96ab029584e08495d461fc573b49de97d18c5
Parents: 92eec58
Author: Kenneth Knowles <kl...@google.com>
Authored: Fri Jul 7 08:49:08 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 runners/direct-java/pom.xml                     |  2 +-
 .../beam/runners/direct/DirectRegistrar.java    |  2 +-
 .../beam/runners/direct/DirectRunner.java       |  5 +--
 .../beam/runners/direct/DirectTestOptions.java  | 42 ++++++++++++++++++++
 .../runners/direct/DirectRegistrarTest.java     |  2 +-
 .../beam/sdk/testing/TestPipelineOptions.java   | 10 -----
 6 files changed, 47 insertions(+), 16 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/81a96ab0/runners/direct-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/direct-java/pom.xml b/runners/direct-java/pom.xml
index 0e1f73a..e14e813 100644
--- a/runners/direct-java/pom.xml
+++ b/runners/direct-java/pom.xml
@@ -156,7 +156,7 @@
                 <beamTestPipelineOptions>
                   [
                     "--runner=DirectRunner",
-                    "--unitTest"
+                    "--runnerDeterminedSharding=false"
                   ]
                 </beamTestPipelineOptions>
               </systemPropertyVariables>

http://git-wip-us.apache.org/repos/asf/beam/blob/81a96ab0/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRegistrar.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRegistrar.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRegistrar.java
index 0e6fbab..53fb2f2 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRegistrar.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRegistrar.java
@@ -50,7 +50,7 @@ public class DirectRegistrar {
     @Override
     public Iterable<Class<? extends PipelineOptions>> getPipelineOptions() {
       return ImmutableList.<Class<? extends PipelineOptions>>of(
-          DirectOptions.class);
+          DirectOptions.class, DirectTestOptions.class);
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/81a96ab0/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
index a16e24d..7a221c4 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectRunner.java
@@ -43,7 +43,6 @@ import org.apache.beam.sdk.metrics.MetricResults;
 import org.apache.beam.sdk.metrics.MetricsEnvironment;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.runners.PTransformOverride;
-import org.apache.beam.sdk.testing.TestPipelineOptions;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.ParDo.MultiOutput;
@@ -222,9 +221,9 @@ public class DirectRunner extends PipelineRunner<DirectPipelineResult> {
   @SuppressWarnings("rawtypes")
   @VisibleForTesting
   List<PTransformOverride> defaultTransformOverrides() {
-    TestPipelineOptions testOptions = options.as(TestPipelineOptions.class);
+    DirectTestOptions testOptions = options.as(DirectTestOptions.class);
     ImmutableList.Builder<PTransformOverride> builder = ImmutableList.builder();
-    if (!testOptions.isUnitTest()) {
+    if (testOptions.isRunnerDeterminedSharding()) {
       builder.add(
           PTransformOverride.of(
               PTransformMatchers.writeWithRunnerDeterminedSharding(),

http://git-wip-us.apache.org/repos/asf/beam/blob/81a96ab0/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTestOptions.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTestOptions.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTestOptions.java
new file mode 100644
index 0000000..a426443
--- /dev/null
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTestOptions.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.direct;
+
+import org.apache.beam.sdk.annotations.Internal;
+import org.apache.beam.sdk.options.ApplicationNameOptions;
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.Hidden;
+import org.apache.beam.sdk.options.PipelineOptions;
+
+/**
+ * Internal-only options for tweaking the behavior of the {@link DirectRunner} in ways that users
+ * should never do.
+ *
+ * <p>Currently, the only use is to disable user-friendly overrides that prevent fully testing
+ * certain composite transforms.
+ */
+@Internal
+@Hidden
+public interface DirectTestOptions extends PipelineOptions, ApplicationNameOptions {
+  @Default.Boolean(true)
+  @Description(
+      "Indicates whether this is an automatically-run unit test.")
+  boolean isRunnerDeterminedSharding();
+  void setRunnerDeterminedSharding(boolean goAheadAndDetermineSharding);
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/81a96ab0/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DirectRegistrarTest.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DirectRegistrarTest.java b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DirectRegistrarTest.java
index 603e43e..4b909bc 100644
--- a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DirectRegistrarTest.java
+++ b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DirectRegistrarTest.java
@@ -37,7 +37,7 @@ public class DirectRegistrarTest {
   @Test
   public void testCorrectOptionsAreReturned() {
     assertEquals(
-        ImmutableList.of(DirectOptions.class),
+        ImmutableList.of(DirectOptions.class, DirectTestOptions.class),
         new Options().getPipelineOptions());
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/81a96ab0/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipelineOptions.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipelineOptions.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipelineOptions.java
index 904f3a2..206bc1f 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipelineOptions.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipelineOptions.java
@@ -20,10 +20,8 @@ package org.apache.beam.sdk.testing;
 import com.fasterxml.jackson.annotation.JsonIgnore;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.PipelineResult;
-import org.apache.beam.sdk.annotations.Internal;
 import org.apache.beam.sdk.options.Default;
 import org.apache.beam.sdk.options.DefaultValueFactory;
-import org.apache.beam.sdk.options.Hidden;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.hamcrest.BaseMatcher;
 import org.hamcrest.Description;
@@ -52,14 +50,6 @@ public interface TestPipelineOptions extends PipelineOptions {
   Long getTestTimeoutSeconds();
   void setTestTimeoutSeconds(Long value);
 
-  @Default.Boolean(false)
-  @Internal
-  @Hidden
-  @org.apache.beam.sdk.options.Description(
-      "Indicates whether this is an automatically-run unit test.")
-  boolean isUnitTest();
-  void setUnitTest(boolean unitTest);
-
   /**
    * Factory for {@link PipelineResult} matchers which always pass.
    */


[26/50] [abbrv] beam git commit: Enable SplittableParDo on rehydrated ParDo transform

Posted by ta...@apache.org.
Enable SplittableParDo on rehydrated ParDo transform


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e5ca058b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e5ca058b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e5ca058b

Branch: refs/heads/DSL_SQL
Commit: e5ca058bd7ad5f2150fef3e57649bcfb487a711f
Parents: bdece9d
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Jun 8 14:27:02 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:01 2017 -0700

----------------------------------------------------------------------
 .../core/construction/SplittableParDo.java      | 25 ++++++++++++++
 .../direct/ParDoMultiOverrideFactory.java       | 36 ++++++++++++++------
 .../flink/FlinkStreamingPipelineTranslator.java |  2 +-
 3 files changed, 52 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/e5ca058b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java
index f31b495..e71187b 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java
@@ -19,6 +19,7 @@ package org.apache.beam.runners.core.construction;
 
 import static com.google.common.base.Preconditions.checkArgument;
 
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import java.util.UUID;
@@ -26,6 +27,7 @@ import org.apache.beam.runners.core.construction.PTransformTranslation.RawPTrans
 import org.apache.beam.sdk.annotations.Experimental;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.runners.AppliedPTransform;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
@@ -103,6 +105,9 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
   public static <InputT, OutputT> SplittableParDo<InputT, OutputT, ?> forJavaParDo(
       ParDo.MultiOutput<InputT, OutputT> parDo) {
     checkArgument(parDo != null, "parDo must not be null");
+    checkArgument(
+        DoFnSignatures.getSignature(parDo.getFn().getClass()).processElement().isSplittable(),
+        "fn must be a splittable DoFn");
     return new SplittableParDo(
         parDo.getFn(),
         parDo.getMainOutputTag(),
@@ -110,6 +115,26 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
         parDo.getAdditionalOutputTags());
   }
 
+  /**
+   * Creates the transform for a {@link ParDo}-compatible {@link AppliedPTransform}.
+   *
+   * <p>The input may generally be a deserialized transform so it may not actually be a {@link
+   * ParDo}. Instead {@link ParDoTranslation} will be used to extract fields.
+   */
+  public static SplittableParDo<?, ?, ?> forAppliedParDo(AppliedPTransform<?, ?, ?> parDo) {
+    checkArgument(parDo != null, "parDo must not be null");
+
+    try {
+      return new SplittableParDo<>(
+          ParDoTranslation.getDoFn(parDo),
+          (TupleTag) ParDoTranslation.getMainOutputTag(parDo),
+          ParDoTranslation.getSideInputs(parDo),
+          ParDoTranslation.getAdditionalOutputTags(parDo));
+    } catch (IOException exc) {
+      throw new RuntimeException(exc);
+    }
+  }
+
   @Override
   public PCollectionTuple expand(PCollection<InputT> input) {
     Coder<RestrictionT> restrictionCoder =

http://git-wip-us.apache.org/repos/asf/beam/blob/e5ca058b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoMultiOverrideFactory.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoMultiOverrideFactory.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoMultiOverrideFactory.java
index 2904bc1..8881967 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoMultiOverrideFactory.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoMultiOverrideFactory.java
@@ -19,6 +19,7 @@ package org.apache.beam.runners.direct;
 
 import static com.google.common.base.Preconditions.checkState;
 
+import java.io.IOException;
 import java.util.List;
 import java.util.Map;
 import org.apache.beam.runners.core.KeyedWorkItem;
@@ -26,6 +27,7 @@ import org.apache.beam.runners.core.KeyedWorkItemCoder;
 import org.apache.beam.runners.core.KeyedWorkItems;
 import org.apache.beam.runners.core.construction.PTransformReplacements;
 import org.apache.beam.runners.core.construction.PTransformTranslation;
+import org.apache.beam.runners.core.construction.ParDoTranslation;
 import org.apache.beam.runners.core.construction.ReplacementOutputs;
 import org.apache.beam.runners.core.construction.SplittableParDo;
 import org.apache.beam.sdk.coders.Coder;
@@ -62,29 +64,43 @@ import org.apache.beam.sdk.values.WindowingStrategy;
  */
 class ParDoMultiOverrideFactory<InputT, OutputT>
     implements PTransformOverrideFactory<
-        PCollection<? extends InputT>, PCollectionTuple, MultiOutput<InputT, OutputT>> {
+        PCollection<? extends InputT>, PCollectionTuple,
+        PTransform<PCollection<? extends InputT>, PCollectionTuple>> {
   @Override
   public PTransformReplacement<PCollection<? extends InputT>, PCollectionTuple>
       getReplacementTransform(
           AppliedPTransform<
-                  PCollection<? extends InputT>, PCollectionTuple, MultiOutput<InputT, OutputT>>
-              transform) {
+                  PCollection<? extends InputT>, PCollectionTuple,
+                  PTransform<PCollection<? extends InputT>, PCollectionTuple>>
+              application) {
     return PTransformReplacement.of(
-        PTransformReplacements.getSingletonMainInput(transform),
-        getReplacementTransform(transform.getTransform()));
+        PTransformReplacements.getSingletonMainInput(application),
+        getReplacementForApplication(application));
   }
 
   @SuppressWarnings("unchecked")
-  private PTransform<PCollection<? extends InputT>, PCollectionTuple> getReplacementTransform(
-      MultiOutput<InputT, OutputT> transform) {
+  private PTransform<PCollection<? extends InputT>, PCollectionTuple> getReplacementForApplication(
+      AppliedPTransform<
+              PCollection<? extends InputT>, PCollectionTuple,
+              PTransform<PCollection<? extends InputT>, PCollectionTuple>>
+          application) {
+
+    DoFn<InputT, OutputT> fn;
+    try {
+      fn = (DoFn<InputT, OutputT>) ParDoTranslation.getDoFn(application);
+    } catch (IOException exc) {
+      throw new RuntimeException(exc);
+    }
 
-    DoFn<InputT, OutputT> fn = transform.getFn();
     DoFnSignature signature = DoFnSignatures.getSignature(fn.getClass());
     if (signature.processElement().isSplittable()) {
-      return (PTransform) SplittableParDo.forJavaParDo(transform);
+      return (PTransform) SplittableParDo.forAppliedParDo(application);
     } else if (signature.stateDeclarations().size() > 0
         || signature.timerDeclarations().size() > 0) {
 
+      MultiOutput<InputT, OutputT> transform =
+          (MultiOutput<InputT, OutputT>) application.getTransform();
+
       // Based on the fact that the signature is stateful, DoFnSignatures ensures
       // that it is also keyed
       return new GbkThenStatefulParDo(
@@ -93,7 +109,7 @@ class ParDoMultiOverrideFactory<InputT, OutputT>
           transform.getAdditionalOutputTags(),
           transform.getSideInputs());
     } else {
-      return transform;
+      return application.getTransform();
     }
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/e5ca058b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java
index ebc9345..f733e2e 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java
@@ -188,7 +188,7 @@ class FlinkStreamingPipelineTranslator extends FlinkPipelineTranslator {
                 transform) {
       return PTransformReplacement.of(
           PTransformReplacements.getSingletonMainInput(transform),
-          SplittableParDo.forJavaParDo(transform.getTransform()));
+          (SplittableParDo<InputT, OutputT, ?>) SplittableParDo.forAppliedParDo(transform));
     }
 
     @Override


[45/50] [abbrv] beam git commit: Reflect #assignsToOneWindow in WindowingStrategy

Posted by ta...@apache.org.
Reflect #assignsToOneWindow in WindowingStrategy


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/8ae2a790
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/8ae2a790
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/8ae2a790

Branch: refs/heads/DSL_SQL
Commit: 8ae2a790978267ea48e9c3601ba6551d1141a11e
Parents: 83f31e9
Author: Thomas Groh <tg...@google.com>
Authored: Tue Jun 27 15:03:11 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:02 2017 -0700

----------------------------------------------------------------------
 .../runners/core/construction/WindowingStrategyTranslation.java | 1 +
 .../core/construction/WindowingStrategyTranslationTest.java     | 3 +++
 sdks/common/runner-api/src/main/proto/beam_runner_api.proto     | 5 +++++
 3 files changed, 9 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/8ae2a790/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslation.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslation.java
index 88ebc01..1456a3f 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslation.java
@@ -307,6 +307,7 @@ public class WindowingStrategyTranslation implements Serializable {
             .setAllowedLateness(windowingStrategy.getAllowedLateness().getMillis())
             .setTrigger(TriggerTranslation.toProto(windowingStrategy.getTrigger()))
             .setWindowFn(windowFnSpec)
+            .setAssignsToOneWindow(windowingStrategy.getWindowFn().assignsToOneWindow())
             .setWindowCoderId(
                 components.registerCoder(windowingStrategy.getWindowFn().windowCoder()));
 

http://git-wip-us.apache.org/repos/asf/beam/blob/8ae2a790/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslationTest.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslationTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslationTest.java
index e406545..7a57fd7 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslationTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/WindowingStrategyTranslationTest.java
@@ -116,5 +116,8 @@ public class WindowingStrategyTranslationTest {
 
     protoComponents.getCodersOrThrow(
         components.registerCoder(windowingStrategy.getWindowFn().windowCoder()));
+    assertThat(
+        proto.getAssignsToOneWindow(),
+        equalTo(windowingStrategy.getWindowFn().assignsToOneWindow()));
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/8ae2a790/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
----------------------------------------------------------------------
diff --git a/sdks/common/runner-api/src/main/proto/beam_runner_api.proto b/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
index 1f74afb..711da2a 100644
--- a/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
+++ b/sdks/common/runner-api/src/main/proto/beam_runner_api.proto
@@ -439,6 +439,11 @@ message WindowingStrategy {
 
   // (Required) Indicate whether empty on-time panes should be omitted.
   OnTimeBehavior OnTimeBehavior = 9;
+
+  // (Required) Whether or not the window fn assigns inputs to exactly one window
+  //
+  // This knowledge is required for some optimizations
+  bool assigns_to_one_window = 10;
 }
 
 // Whether or not a PCollection's WindowFn is non-merging, merging, or


[41/50] [abbrv] beam git commit: Remove dead (and wrong) viewFromProto overload

Posted by ta...@apache.org.
Remove dead (and wrong) viewFromProto overload


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2e42ae41
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2e42ae41
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2e42ae41

Branch: refs/heads/DSL_SQL
Commit: 2e42ae41babcf42db5df8320f0823d6040f559cb
Parents: f1defd1
Author: Kenneth Knowles <kl...@google.com>
Authored: Tue Jul 11 10:09:12 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:02 2017 -0700

----------------------------------------------------------------------
 .../core/construction/ParDoTranslation.java     | 21 --------------------
 .../core/construction/ParDoTranslationTest.java |  2 +-
 2 files changed, 1 insertion(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/2e42ae41/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
index 90c9aad..03f29ff 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
@@ -41,7 +41,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import org.apache.beam.runners.core.construction.PTransformTranslation.TransformPayloadTranslator;
-import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.IterableCoder;
 import org.apache.beam.sdk.common.runner.v1.RunnerApi;
@@ -509,26 +508,6 @@ public class ParDoTranslation {
     return builder.build();
   }
 
-  public static PCollectionView<?> viewFromProto(
-      Pipeline pipeline,
-      SideInput sideInput,
-      String localName,
-      RunnerApi.PTransform parDoTransform,
-      Components components)
-      throws IOException {
-
-    String pCollectionId = parDoTransform.getInputsOrThrow(localName);
-
-    // This may be a PCollection defined in another language, but we should be
-    // able to rehydrate it enough to stick it in a side input. The coder may not
-    // be grokkable in Java.
-    PCollection<?> pCollection =
-        PCollectionTranslation.fromProto(
-            pipeline, components.getPcollectionsOrThrow(pCollectionId), components);
-
-    return viewFromProto(sideInput, localName, pCollection, parDoTransform, components);
-  }
-
   /**
    * Create a {@link PCollectionView} from a side input spec and an already-deserialized {@link
    * PCollection} that should be wired up.

http://git-wip-us.apache.org/repos/asf/beam/blob/2e42ae41/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ParDoTranslationTest.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ParDoTranslationTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ParDoTranslationTest.java
index 6fdf9d6..a87a16d 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ParDoTranslationTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ParDoTranslationTest.java
@@ -162,9 +162,9 @@ public class ParDoTranslationTest {
         SideInput sideInput = parDoPayload.getSideInputsOrThrow(view.getTagInternal().getId());
         PCollectionView<?> restoredView =
             ParDoTranslation.viewFromProto(
-                rehydratedPipeline,
                 sideInput,
                 view.getTagInternal().getId(),
+                view.getPCollection(),
                 protoTransform,
                 protoComponents);
         assertThat(restoredView.getTagInternal(), equalTo(view.getTagInternal()));


[11/50] [abbrv] beam git commit: Update SDK dependencies

Posted by ta...@apache.org.
Update SDK dependencies


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a7cad601
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a7cad601
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a7cad601

Branch: refs/heads/DSL_SQL
Commit: a7cad6016ea1471afeeb64885a8d8bb60a8fcd59
Parents: 4bf1615
Author: Ahmet Altay <al...@google.com>
Authored: Wed Jul 5 14:34:07 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:59 2017 -0700

----------------------------------------------------------------------
 sdks/python/setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/a7cad601/sdks/python/setup.py
----------------------------------------------------------------------
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 6646a58..8a0c9ae 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -120,9 +120,9 @@ GCP_REQUIREMENTS = [
   'google-apitools>=0.5.10,<=0.5.11',
   'proto-google-cloud-datastore-v1>=0.90.0,<=0.90.4',
   'googledatastore==7.0.1',
-  'google-cloud-pubsub==0.25.0',
+  'google-cloud-pubsub==0.26.0',
   # GCP packages required by tests
-  'google-cloud-bigquery>=0.23.0,<0.25.0',
+  'google-cloud-bigquery>=0.23.0,<0.26.0',
 ]
 
 


[50/50] [abbrv] beam git commit: [BEAM-2610] This closes #3553

Posted by ta...@apache.org.
[BEAM-2610] This closes #3553


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/ec494f67
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/ec494f67
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/ec494f67

Branch: refs/heads/DSL_SQL
Commit: ec494f675aa73fbdc7929f9592f33951941962b0
Parents: d89d1ee 4f7f169
Author: Tyler Akidau <ta...@apache.org>
Authored: Wed Jul 12 20:02:22 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:02:22 2017 -0700

----------------------------------------------------------------------
 .gitignore                                      |   2 +-
 .../jenkins/common_job_properties.groovy        |   9 +-
 .../job_beam_PerformanceTests_Python.groovy     |  58 ++
 ..._beam_PostCommit_Java_JDKVersionsTest.groovy |   2 +
 ..._PostCommit_Java_MavenInstall_Windows.groovy |   3 +-
 .../job_beam_PreCommit_Website_Merge.groovy     |  59 ++
 examples/java/pom.xml                           |  20 +-
 .../examples/common/WriteOneFilePerWindow.java  |  52 +-
 .../beam/examples/WindowedWordCountIT.java      |   4 +-
 examples/java8/pom.xml                          |  20 +-
 .../complete/game/utils/WriteToText.java        |  43 +-
 .../examples/complete/game/LeaderBoardTest.java |   2 +
 examples/pom.xml                                |   2 +-
 pom.xml                                         | 123 +++-
 runners/apex/pom.xml                            |  20 +-
 .../apache/beam/runners/apex/ApexRunner.java    |  61 +-
 .../translation/ApexPipelineTranslator.java     |  16 +-
 .../apex/translation/TranslationContext.java    |   4 +-
 .../operators/ApexParDoOperator.java            |  21 +-
 .../runners/apex/examples/WordCountTest.java    |   8 +-
 .../utils/ApexStateInternalsTest.java           | 411 +++--------
 runners/core-construction-java/pom.xml          |   2 +-
 .../CreatePCollectionViewTranslation.java       |   4 +-
 .../construction/ElementAndRestriction.java     |  42 --
 .../ElementAndRestrictionCoder.java             |  88 ---
 .../construction/PCollectionTranslation.java    |  16 +
 .../core/construction/PTransformMatchers.java   | 109 ++-
 .../construction/PTransformTranslation.java     |   7 +-
 .../core/construction/ParDoTranslation.java     |  82 ++-
 .../construction/RunnerPCollectionView.java     |  31 +-
 .../core/construction/SplittableParDo.java      | 124 +++-
 .../construction/TestStreamTranslation.java     |  49 +-
 .../core/construction/TransformInputs.java      |  50 ++
 .../WindowingStrategyTranslation.java           |  27 +-
 .../construction/WriteFilesTranslation.java     |  67 +-
 .../ElementAndRestrictionCoderTest.java         | 126 ----
 .../PCollectionTranslationTest.java             |  22 +
 .../construction/PTransformMatchersTest.java    |  54 +-
 .../core/construction/ParDoTranslationTest.java |  28 +-
 .../core/construction/SplittableParDoTest.java  |  18 +-
 .../core/construction/TransformInputsTest.java  | 166 +++++
 .../WindowingStrategyTranslationTest.java       |   3 +
 .../construction/WriteFilesTranslationTest.java |  62 +-
 runners/core-java/pom.xml                       |   2 +-
 .../core/LateDataDroppingDoFnRunner.java        |  33 +-
 ...eBoundedSplittableProcessElementInvoker.java |  40 +-
 .../beam/runners/core/ProcessFnRunner.java      |  16 +-
 .../beam/runners/core/ReduceFnRunner.java       | 135 ++--
 .../beam/runners/core/SimpleDoFnRunner.java     |  20 +
 .../core/SplittableParDoViaKeyedWorkItems.java  |  58 +-
 .../core/SplittableProcessElementInvoker.java   |  25 +-
 .../beam/runners/core/SystemReduceFn.java       |   6 +
 .../core/triggers/AfterAllStateMachine.java     |  25 +-
 .../AfterDelayFromFirstElementStateMachine.java |   6 +-
 .../core/triggers/AfterFirstStateMachine.java   |  20 +-
 .../core/triggers/AfterPaneStateMachine.java    |   6 +-
 .../triggers/AfterWatermarkStateMachine.java    |   7 +-
 .../triggers/ExecutableTriggerStateMachine.java |  23 +-
 .../core/triggers/NeverStateMachine.java        |   5 +-
 .../core/triggers/TriggerStateMachine.java      |  27 -
 .../core/InMemoryStateInternalsTest.java        | 569 ++-------------
 ...ndedSplittableProcessElementInvokerTest.java |  47 +-
 .../beam/runners/core/ReduceFnRunnerTest.java   | 374 +++++++++-
 .../beam/runners/core/ReduceFnTester.java       |  48 +-
 .../core/SplittableParDoProcessFnTest.java      | 117 ++-
 .../beam/runners/core/StateInternalsTest.java   | 613 ++++++++++++++++
 .../beam/runners/core/WindowMatchers.java       |  15 +
 .../triggers/AfterFirstStateMachineTest.java    |   5 +-
 .../AfterWatermarkStateMachineTest.java         |   7 +-
 .../core/triggers/StubTriggerStateMachine.java  |   7 +-
 runners/direct-java/pom.xml                     |   7 +-
 .../beam/runners/direct/CommittedResult.java    |  12 +-
 .../apache/beam/runners/direct/DirectGraph.java |  38 +-
 .../beam/runners/direct/DirectGraphVisitor.java |  48 +-
 .../beam/runners/direct/DirectGroupByKey.java   |  13 +-
 .../direct/DirectGroupByKeyOverrideFactory.java |  14 +-
 .../beam/runners/direct/DirectRegistrar.java    |   2 +-
 .../beam/runners/direct/DirectRunner.java       |  64 +-
 .../beam/runners/direct/DirectTestOptions.java  |  42 ++
 .../beam/runners/direct/EvaluationContext.java  |  26 +-
 .../direct/ExecutorServiceParallelExecutor.java |  27 +-
 .../runners/direct/ParDoEvaluatorFactory.java   |   9 +-
 .../direct/ParDoMultiOverrideFactory.java       | 121 +++-
 ...littableProcessElementsEvaluatorFactory.java |  37 +-
 .../direct/StatefulParDoEvaluatorFactory.java   |  12 +-
 .../direct/TestStreamEvaluatorFactory.java      |  20 +-
 .../runners/direct/ViewEvaluatorFactory.java    |   8 +-
 .../runners/direct/ViewOverrideFactory.java     |  69 +-
 .../beam/runners/direct/WatermarkManager.java   |  18 +-
 .../direct/WriteWithShardingFactory.java        |  34 +-
 .../runners/direct/CommittedResultTest.java     |  17 +-
 .../runners/direct/DirectGraphVisitorTest.java  |  10 +-
 .../beam/runners/direct/DirectGraphs.java       |   7 +
 .../runners/direct/DirectRegistrarTest.java     |   2 +-
 .../runners/direct/EvaluationContextTest.java   |   7 +-
 .../ImmutabilityEnforcementFactoryTest.java     |   4 +-
 .../beam/runners/direct/ParDoEvaluatorTest.java |   7 +-
 .../StatefulParDoEvaluatorFactoryTest.java      |  65 +-
 .../runners/direct/TransformExecutorTest.java   |  12 +-
 .../direct/ViewEvaluatorFactoryTest.java        |   8 +-
 .../runners/direct/ViewOverrideFactoryTest.java |  37 +-
 .../direct/WatermarkCallbackExecutorTest.java   |   1 +
 .../runners/direct/WatermarkManagerTest.java    |  16 +-
 .../direct/WriteWithShardingFactoryTest.java    |  44 +-
 runners/flink/pom.xml                           |  11 +-
 .../runners/flink/CreateStreamingFlinkView.java | 154 ++++
 .../flink/FlinkBatchTranslationContext.java     |   3 +-
 .../FlinkPipelineExecutionEnvironment.java      |   2 +
 .../flink/FlinkStreamingPipelineTranslator.java |  86 +--
 .../FlinkStreamingTransformTranslators.java     |  36 +-
 .../flink/FlinkStreamingTranslationContext.java |   3 +-
 .../flink/FlinkStreamingViewOverrides.java      | 372 ----------
 .../runners/flink/FlinkTransformOverrides.java  |  53 ++
 .../streaming/SplittableDoFnOperator.java       |  16 +-
 .../streaming/state/FlinkStateInternals.java    | 425 +++++------
 .../FlinkBroadcastStateInternalsTest.java       | 242 ++-----
 .../FlinkKeyGroupStateInternalsTest.java        | 359 +++++-----
 .../streaming/FlinkSplitStateInternalsTest.java | 132 ++--
 .../streaming/FlinkStateInternalsTest.java      | 343 +--------
 runners/google-cloud-dataflow-java/pom.xml      |  10 +-
 .../dataflow/BatchStatefulParDoOverrides.java   |   4 +
 .../runners/dataflow/BatchViewOverrides.java    | 182 ++---
 .../runners/dataflow/CreateDataflowView.java    |   8 +-
 .../dataflow/DataflowPipelineTranslator.java    |  62 +-
 .../beam/runners/dataflow/DataflowRunner.java   | 133 +++-
 .../dataflow/SplittableParDoOverrides.java      |  76 ++
 .../dataflow/StreamingViewOverrides.java        |  10 +-
 .../runners/dataflow/TransformTranslator.java   |   4 +-
 .../runners/dataflow/util/PropertyNames.java    |   1 +
 .../beam/runners/dataflow/util/TimeUtil.java    |  24 +-
 .../DataflowPipelineTranslatorTest.java         |  95 ++-
 .../runners/dataflow/DataflowRunnerTest.java    | 198 +++++-
 .../runners/dataflow/util/TimeUtilTest.java     |   6 +
 runners/pom.xml                                 |   2 +-
 runners/spark/pom.xml                           |  70 +-
 .../spark/SparkNativePipelineVisitor.java       |   3 +-
 .../apache/beam/runners/spark/SparkRunner.java  |   9 +-
 .../beam/runners/spark/TestSparkRunner.java     |   2 +-
 .../SparkGroupAlsoByWindowViaWindowSet.java     |   6 +-
 .../spark/stateful/SparkTimerInternals.java     |  18 +-
 .../spark/translation/EvaluationContext.java    |   4 +-
 .../spark/translation/TransformTranslator.java  |  50 +-
 .../spark/util/GlobalWatermarkHolder.java       | 127 +++-
 .../spark/GlobalWatermarkHolderTest.java        |  18 +-
 .../runners/spark/SparkRunnerDebuggerTest.java  |  26 +-
 .../spark/stateful/SparkStateInternalsTest.java |  66 ++
 .../spark/translation/StorageLevelTest.java     |   4 +-
 sdks/common/fn-api/pom.xml                      |   2 +-
 .../fn-api/src/main/proto/beam_fn_api.proto     | 237 ++----
 sdks/common/pom.xml                             |   2 +-
 sdks/common/runner-api/pom.xml                  |   2 +-
 .../src/main/proto/beam_runner_api.proto        |  26 +-
 sdks/java/build-tools/pom.xml                   |   2 +-
 .../src/main/resources/beam/findbugs-filter.xml |   9 +
 sdks/java/core/pom.xml                          |   2 +-
 .../apache/beam/sdk/coders/ShardedKeyCoder.java |  66 ++
 .../java/org/apache/beam/sdk/io/AvroIO.java     | 220 +++---
 .../java/org/apache/beam/sdk/io/AvroSink.java   |  32 +-
 .../apache/beam/sdk/io/CompressedSource.java    |  40 +-
 .../beam/sdk/io/DefaultFilenamePolicy.java      | 274 +++++--
 .../beam/sdk/io/DynamicFileDestinations.java    | 115 +++
 .../org/apache/beam/sdk/io/FileBasedSink.java   | 513 +++++++------
 .../apache/beam/sdk/io/OffsetBasedSource.java   |  22 +-
 .../java/org/apache/beam/sdk/io/TFRecordIO.java |  44 +-
 .../java/org/apache/beam/sdk/io/TextIO.java     | 712 +++++++++++++++----
 .../java/org/apache/beam/sdk/io/TextSink.java   |  22 +-
 .../java/org/apache/beam/sdk/io/WriteFiles.java | 647 +++++++++++------
 .../beam/sdk/io/range/ByteKeyRangeTracker.java  |  22 +-
 .../apache/beam/sdk/io/range/OffsetRange.java   | 101 +++
 .../beam/sdk/io/range/OffsetRangeTracker.java   |   3 +
 .../sdk/options/PipelineOptionsFactory.java     |  18 +-
 .../sdk/options/PipelineOptionsValidator.java   |  34 +-
 .../sdk/options/ProxyInvocationHandler.java     |  19 +-
 .../beam/sdk/runners/TransformHierarchy.java    | 165 ++++-
 .../apache/beam/sdk/testing/StaticWindows.java  |   5 +
 .../org/apache/beam/sdk/testing/TestStream.java |  12 +
 .../org/apache/beam/sdk/transforms/Combine.java |  30 +-
 .../org/apache/beam/sdk/transforms/DoFn.java    |  52 +-
 .../apache/beam/sdk/transforms/DoFnTester.java  |  21 +-
 .../org/apache/beam/sdk/transforms/ParDo.java   |  41 +-
 .../sdk/transforms/SerializableFunctions.java   |  50 ++
 .../org/apache/beam/sdk/transforms/View.java    |  38 +-
 .../reflect/ByteBuddyDoFnInvokerFactory.java    |  27 +
 .../reflect/ByteBuddyOnTimerInvokerFactory.java |  73 +-
 .../sdk/transforms/reflect/DoFnInvoker.java     |  17 +-
 .../sdk/transforms/reflect/DoFnSignature.java   |  33 +-
 .../sdk/transforms/reflect/DoFnSignatures.java  |  44 +-
 .../reflect/OnTimerMethodSpecifier.java         |  37 +
 .../transforms/splittabledofn/OffsetRange.java  |  77 --
 .../splittabledofn/OffsetRangeTracker.java      |  11 +
 .../splittabledofn/RestrictionTracker.java      |  11 +-
 .../sdk/transforms/windowing/GlobalWindows.java |   5 +
 .../windowing/PartitioningWindowFn.java         |   5 +
 .../transforms/windowing/SlidingWindows.java    |   5 +
 .../beam/sdk/transforms/windowing/Window.java   |  32 +
 .../beam/sdk/transforms/windowing/WindowFn.java |  11 +
 .../apache/beam/sdk/util/IdentityWindowFn.java  |   5 +
 .../org/apache/beam/sdk/values/PCollection.java |  12 +
 .../beam/sdk/values/PCollectionViews.java       |  38 +
 .../org/apache/beam/sdk/values/PValueBase.java  |  12 -
 .../org/apache/beam/sdk/values/ShardedKey.java  |  65 ++
 .../beam/sdk/values/WindowingStrategy.java      |  46 +-
 .../java/org/apache/beam/sdk/io/AvroIOTest.java |  85 ++-
 .../beam/sdk/io/DefaultFilenamePolicyTest.java  | 135 ++--
 .../sdk/io/DrunkWritableByteChannelFactory.java |   2 +-
 .../apache/beam/sdk/io/FileBasedSinkTest.java   |  93 ++-
 .../java/org/apache/beam/sdk/io/SimpleSink.java |  56 +-
 .../java/org/apache/beam/sdk/io/TextIOTest.java | 326 ++++++++-
 .../org/apache/beam/sdk/io/WriteFilesTest.java  | 366 ++++++++--
 .../options/PipelineOptionsValidatorTest.java   |  44 ++
 .../sdk/options/ProxyInvocationHandlerTest.java |  19 +
 .../sdk/runners/TransformHierarchyTest.java     | 197 +++++
 .../sdk/testing/PCollectionViewTesting.java     |   8 +
 .../apache/beam/sdk/transforms/CombineTest.java | 365 ++++++----
 .../beam/sdk/transforms/DoFnTesterTest.java     |  32 +
 .../beam/sdk/transforms/GroupByKeyTest.java     |  39 +
 .../apache/beam/sdk/transforms/ParDoTest.java   | 165 +++++
 .../beam/sdk/transforms/SplittableDoFnTest.java | 155 +++-
 .../transforms/reflect/DoFnInvokersTest.java    |  93 ++-
 .../DoFnSignaturesProcessElementTest.java       |   2 +-
 .../DoFnSignaturesSplittableDoFnTest.java       |  83 ++-
 .../transforms/reflect/DoFnSignaturesTest.java  |  14 +
 .../splittabledofn/OffsetRangeTrackerTest.java  |   1 +
 .../windowing/SlidingWindowsTest.java           |  30 +-
 .../google-cloud-platform-core/pom.xml          |   2 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |   2 +-
 .../sdk/util/RetryHttpRequestInitializer.java   | 147 ++--
 .../extensions/gcp/GcpCoreApiSurfaceTest.java   |  48 +-
 .../util/RetryHttpRequestInitializerTest.java   |  31 +-
 sdks/java/extensions/jackson/pom.xml            |   2 +-
 sdks/java/extensions/join-library/pom.xml       |   2 +-
 sdks/java/extensions/pom.xml                    |   2 +-
 sdks/java/extensions/protobuf/pom.xml           |   2 +-
 sdks/java/extensions/sorter/pom.xml             |   8 +-
 sdks/java/harness/pom.xml                       |  18 +-
 .../harness/control/ProcessBundleHandler.java   | 295 ++------
 .../fn/harness/control/RegisterHandler.java     |   2 +-
 .../beam/runners/core/BeamFnDataReadRunner.java |  70 +-
 .../runners/core/BeamFnDataWriteRunner.java     |  67 +-
 .../beam/runners/core/BoundedSourceRunner.java  |  74 +-
 .../beam/runners/core/FnApiDoFnRunner.java      | 547 ++++++++++++++
 .../runners/core/PTransformRunnerFactory.java   |  81 +++
 .../control/ProcessBundleHandlerTest.java       | 521 ++------------
 .../fn/harness/control/RegisterHandlerTest.java |   8 +-
 .../runners/core/BeamFnDataReadRunnerTest.java  | 112 ++-
 .../runners/core/BeamFnDataWriteRunnerTest.java | 120 +++-
 .../runners/core/BoundedSourceRunnerTest.java   | 124 +++-
 .../beam/runners/core/FnApiDoFnRunnerTest.java  | 210 ++++++
 sdks/java/io/amqp/pom.xml                       | 100 +++
 .../org/apache/beam/sdk/io/amqp/AmqpIO.java     | 399 +++++++++++
 .../beam/sdk/io/amqp/AmqpMessageCoder.java      |  79 ++
 .../amqp/AmqpMessageCoderProviderRegistrar.java |  44 ++
 .../apache/beam/sdk/io/amqp/package-info.java   |  22 +
 .../org/apache/beam/sdk/io/amqp/AmqpIOTest.java | 148 ++++
 .../beam/sdk/io/amqp/AmqpMessageCoderTest.java  |  89 +++
 sdks/java/io/cassandra/pom.xml                  |   2 +-
 .../beam/sdk/io/cassandra/CassandraIO.java      |   2 +-
 sdks/java/io/common/pom.xml                     |   2 +-
 .../sdk/io/common/IOTestPipelineOptions.java    |   6 +-
 sdks/java/io/elasticsearch/pom.xml              |  10 +-
 .../sdk/io/elasticsearch/ElasticsearchIO.java   |  17 +-
 .../elasticsearch/ElasticSearchIOTestUtils.java |  81 ++-
 .../sdk/io/elasticsearch/ElasticsearchIOIT.java |  14 +-
 .../io/elasticsearch/ElasticsearchIOTest.java   |  36 +-
 .../elasticsearch/ElasticsearchTestDataSet.java |  37 +-
 sdks/java/io/google-cloud-platform/pom.xml      |  14 +-
 .../beam/sdk/io/gcp/bigquery/BatchLoads.java    |   2 +
 .../io/gcp/bigquery/DynamicDestinations.java    |  29 +-
 .../io/gcp/bigquery/GenerateShardedTable.java   |   1 +
 .../beam/sdk/io/gcp/bigquery/ShardedKey.java    |  67 --
 .../sdk/io/gcp/bigquery/ShardedKeyCoder.java    |  74 --
 .../sdk/io/gcp/bigquery/StreamingWriteFn.java   |   1 +
 .../io/gcp/bigquery/StreamingWriteTables.java   |   2 +
 .../sdk/io/gcp/bigquery/TagWithUniqueIds.java   |   1 +
 .../io/gcp/bigquery/WriteBundlesToFiles.java    |   2 +
 .../bigquery/WriteGroupedRecordsToFiles.java    |   1 +
 .../sdk/io/gcp/bigquery/WritePartition.java     |   1 +
 .../beam/sdk/io/gcp/bigquery/WriteTables.java   |   1 +
 .../beam/sdk/io/gcp/bigtable/BigtableIO.java    |   8 +-
 .../io/gcp/bigtable/BigtableServiceImpl.java    |   9 +-
 .../sdk/io/gcp/datastore/AdaptiveThrottler.java | 103 +++
 .../beam/sdk/io/gcp/datastore/DatastoreV1.java  | 149 +++-
 .../sdk/io/gcp/datastore/MovingAverage.java     |  50 ++
 .../sdk/io/gcp/spanner/AbstractSpannerFn.java   |  58 ++
 .../sdk/io/gcp/spanner/CreateTransactionFn.java |  51 ++
 .../beam/sdk/io/gcp/spanner/MutationGroup.java  |  67 ++
 .../io/gcp/spanner/MutationSizeEstimator.java   |   9 +
 .../sdk/io/gcp/spanner/NaiveSpannerReadFn.java  |  65 ++
 .../beam/sdk/io/gcp/spanner/SpannerConfig.java  | 137 ++++
 .../beam/sdk/io/gcp/spanner/SpannerIO.java      | 616 +++++++++++-----
 .../sdk/io/gcp/spanner/SpannerWriteGroupFn.java | 125 ++++
 .../beam/sdk/io/gcp/spanner/Transaction.java    |  33 +
 .../beam/sdk/io/gcp/GcpApiSurfaceTest.java      |  10 +
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     |   2 +
 .../sdk/io/gcp/bigtable/BigtableReadIT.java     |   5 +-
 .../io/gcp/bigtable/BigtableTestOptions.java    |   5 -
 .../sdk/io/gcp/bigtable/BigtableWriteIT.java    |   4 +-
 .../io/gcp/datastore/AdaptiveThrottlerTest.java | 111 +++
 .../sdk/io/gcp/datastore/DatastoreV1Test.java   |  92 ++-
 .../beam/sdk/io/gcp/datastore/V1TestUtil.java   |   2 +-
 .../sdk/io/gcp/spanner/FakeServiceFactory.java  |  82 +++
 .../gcp/spanner/MutationSizeEstimatorTest.java  |  12 +
 .../beam/sdk/io/gcp/spanner/RandomUtils.java    |  41 ++
 .../sdk/io/gcp/spanner/SpannerIOReadTest.java   | 281 ++++++++
 .../beam/sdk/io/gcp/spanner/SpannerIOTest.java  | 244 -------
 .../sdk/io/gcp/spanner/SpannerIOWriteTest.java  | 258 +++++++
 .../beam/sdk/io/gcp/spanner/SpannerReadIT.java  | 166 +++++
 .../beam/sdk/io/gcp/spanner/SpannerWriteIT.java |  26 +-
 sdks/java/io/hadoop-common/pom.xml              |   2 +-
 sdks/java/io/hadoop-file-system/pom.xml         |  33 +-
 sdks/java/io/hadoop/input-format/pom.xml        |   2 +-
 .../hadoop/inputformat/HadoopInputFormatIO.java |   2 +-
 sdks/java/io/hadoop/jdk1.8-tests/pom.xml        |   4 +-
 .../inputformat/HIFIOWithElasticTest.java       |  11 +-
 sdks/java/io/hadoop/pom.xml                     |   2 +-
 sdks/java/io/hbase/pom.xml                      |  26 +-
 .../io/hbase/HBaseCoderProviderRegistrar.java   |  40 ++
 .../org/apache/beam/sdk/io/hbase/HBaseIO.java   |  48 +-
 .../beam/sdk/io/hbase/HBaseMutationCoder.java   |  42 ++
 .../hbase/HBaseCoderProviderRegistrarTest.java  |  45 ++
 .../apache/beam/sdk/io/hbase/HBaseIOTest.java   |  49 +-
 sdks/java/io/hcatalog/pom.xml                   | 175 +++++
 .../apache/beam/sdk/io/hcatalog/HCatalogIO.java | 492 +++++++++++++
 .../beam/sdk/io/hcatalog/package-info.java      |  22 +
 .../io/hcatalog/EmbeddedMetastoreService.java   |  87 +++
 .../beam/sdk/io/hcatalog/HCatalogIOTest.java    | 277 ++++++++
 .../sdk/io/hcatalog/HCatalogIOTestUtils.java    | 108 +++
 .../hcatalog/src/test/resources/hive-site.xml   | 301 ++++++++
 sdks/java/io/jdbc/pom.xml                       |   4 +-
 .../org/apache/beam/sdk/io/jdbc/JdbcIO.java     |   2 +-
 sdks/java/io/jms/pom.xml                        |   2 +-
 .../java/org/apache/beam/sdk/io/jms/JmsIO.java  |   2 +-
 sdks/java/io/kafka/pom.xml                      |   2 +-
 .../org/apache/beam/sdk/io/kafka/KafkaIO.java   | 132 ++--
 .../apache/beam/sdk/io/kafka/KafkaIOTest.java   |  30 +
 sdks/java/io/kinesis/pom.xml                    |   2 +-
 .../sdk/io/kinesis/CheckpointGenerator.java     |   6 +-
 .../beam/sdk/io/kinesis/CustomOptional.java     | 111 +--
 .../io/kinesis/DynamicCheckpointGenerator.java  |  52 +-
 .../sdk/io/kinesis/GetKinesisRecordsResult.java |  49 +-
 .../sdk/io/kinesis/KinesisClientProvider.java   |   4 +-
 .../apache/beam/sdk/io/kinesis/KinesisIO.java   | 281 ++++----
 .../beam/sdk/io/kinesis/KinesisReader.java      | 206 +++---
 .../sdk/io/kinesis/KinesisReaderCheckpoint.java |  97 +--
 .../beam/sdk/io/kinesis/KinesisRecord.java      | 177 ++---
 .../beam/sdk/io/kinesis/KinesisRecordCoder.java |  68 +-
 .../beam/sdk/io/kinesis/KinesisSource.java      | 147 ++--
 .../beam/sdk/io/kinesis/RecordFilter.java       |  18 +-
 .../apache/beam/sdk/io/kinesis/RoundRobin.java  |  37 +-
 .../beam/sdk/io/kinesis/ShardCheckpoint.java    | 241 +++----
 .../sdk/io/kinesis/ShardRecordsIterator.java    | 106 +--
 .../sdk/io/kinesis/SimplifiedKinesisClient.java | 215 +++---
 .../beam/sdk/io/kinesis/StartingPoint.java      |  84 +--
 .../io/kinesis/StaticCheckpointGenerator.java   |  27 +-
 .../io/kinesis/TransientKinesisException.java   |   7 +-
 .../beam/sdk/io/kinesis/AmazonKinesisMock.java  | 539 +++++++-------
 .../beam/sdk/io/kinesis/CustomOptionalTest.java |  27 +-
 .../kinesis/DynamicCheckpointGeneratorTest.java |  33 +-
 .../sdk/io/kinesis/KinesisMockReadTest.java     |  97 +--
 .../io/kinesis/KinesisReaderCheckpointTest.java |  52 +-
 .../beam/sdk/io/kinesis/KinesisReaderIT.java    | 127 ++--
 .../beam/sdk/io/kinesis/KinesisReaderTest.java  | 166 ++---
 .../sdk/io/kinesis/KinesisRecordCoderTest.java  |  34 +-
 .../beam/sdk/io/kinesis/KinesisTestOptions.java |  43 +-
 .../beam/sdk/io/kinesis/KinesisUploader.java    |  70 +-
 .../beam/sdk/io/kinesis/RecordFilterTest.java   |  52 +-
 .../beam/sdk/io/kinesis/RoundRobinTest.java     |  42 +-
 .../sdk/io/kinesis/ShardCheckpointTest.java     | 203 +++---
 .../io/kinesis/ShardRecordsIteratorTest.java    | 216 +++---
 .../io/kinesis/SimplifiedKinesisClientTest.java | 351 ++++-----
 sdks/java/io/mongodb/pom.xml                    |   2 +-
 .../beam/sdk/io/mongodb/MongoDbGridFSIO.java    |   2 +-
 .../apache/beam/sdk/io/mongodb/MongoDbIO.java   | 317 +++++++--
 .../beam/sdk/io/mongodb/MongoDbIOTest.java      |  37 +
 sdks/java/io/mqtt/pom.xml                       |   2 +-
 .../org/apache/beam/sdk/io/mqtt/MqttIO.java     |   2 +-
 sdks/java/io/pom.xml                            |  35 +-
 sdks/java/io/xml/pom.xml                        |   2 +-
 .../java/org/apache/beam/sdk/io/xml/XmlIO.java  |   4 +-
 .../org/apache/beam/sdk/io/xml/XmlSink.java     |  21 +-
 .../org/apache/beam/sdk/io/xml/XmlSinkTest.java |   4 +-
 sdks/java/java8tests/pom.xml                    |   2 +-
 sdks/java/javadoc/pom.xml                       |  19 +-
 .../maven-archetypes/examples-java8/pom.xml     |   2 +-
 .../main/resources/archetype-resources/pom.xml  |   1 -
 sdks/java/maven-archetypes/examples/pom.xml     |   2 +-
 .../main/resources/archetype-resources/pom.xml  |   1 -
 sdks/java/maven-archetypes/pom.xml              |   2 +-
 sdks/java/maven-archetypes/starter/pom.xml      |   2 +-
 .../resources/projects/basic/reference/pom.xml  |   2 +-
 sdks/java/pom.xml                               |   2 +-
 sdks/pom.xml                                    |   2 +-
 sdks/python/apache_beam/coders/coder_impl.py    |   4 +
 sdks/python/apache_beam/coders/coders.py        |   7 +-
 .../apache_beam/coders/coders_test_common.py    |   8 +
 .../examples/snippets/snippets_test.py          |  16 +
 .../apache_beam/examples/streaming_wordcount.py |  25 +-
 .../apache_beam/examples/windowed_wordcount.py  |  93 +++
 sdks/python/apache_beam/io/filesystem.py        |  22 +-
 sdks/python/apache_beam/io/gcp/gcsio.py         |  10 +-
 sdks/python/apache_beam/io/gcp/pubsub.py        | 180 +++--
 sdks/python/apache_beam/io/gcp/pubsub_test.py   | 101 ++-
 .../io/gcp/tests/bigquery_matcher.py            |   6 +-
 .../io/gcp/tests/bigquery_matcher_test.py       |   2 +-
 sdks/python/apache_beam/io/range_trackers.py    | 130 ----
 .../apache_beam/io/range_trackers_test.py       | 186 -----
 .../apache_beam/options/pipeline_options.py     |  35 +-
 .../options/pipeline_options_test.py            |  39 +-
 .../apache_beam/options/value_provider_test.py  |  93 +--
 sdks/python/apache_beam/pipeline.py             | 230 +++++-
 sdks/python/apache_beam/pipeline_test.py        |  53 ++
 sdks/python/apache_beam/portability/__init__.py |  18 +
 .../apache_beam/portability/api/__init__.py     |  21 +
 sdks/python/apache_beam/pvalue.py               |   2 +-
 sdks/python/apache_beam/runners/api/__init__.py |  21 -
 .../runners/dataflow/dataflow_runner.py         |  91 ++-
 .../runners/dataflow/dataflow_runner_test.py    |  24 +-
 .../runners/dataflow/internal/apiclient.py      |  35 +-
 .../runners/dataflow/internal/apiclient_test.py |  29 +-
 .../runners/dataflow/internal/dependency.py     |  69 +-
 .../runners/dataflow/native_io/iobase_test.py   |  39 +-
 .../dataflow/native_io/streaming_create.py      |  72 ++
 .../runners/dataflow/ptransform_overrides.py    |  52 ++
 .../runners/direct/bundle_factory.py            |   2 +-
 .../apache_beam/runners/direct/direct_runner.py | 108 +++
 .../runners/direct/evaluation_context.py        |  73 +-
 .../apache_beam/runners/direct/executor.py      | 135 ++--
 .../runners/direct/transform_evaluator.py       | 447 +++++++++++-
 .../runners/direct/transform_result.py          |  41 --
 sdks/python/apache_beam/runners/direct/util.py  |  67 ++
 .../runners/direct/watermark_manager.py         | 100 ++-
 .../apache_beam/runners/pipeline_context.py     |  19 +-
 .../runners/portability/fn_api_runner.py        | 306 ++++----
 .../runners/portability/fn_api_runner_test.py   |  31 +-
 .../runners/worker/bundle_processor.py          | 426 +++++++++++
 .../apache_beam/runners/worker/data_plane.py    |  28 +-
 .../runners/worker/data_plane_test.py           |   2 +-
 .../apache_beam/runners/worker/log_handler.py   |   2 +-
 .../runners/worker/log_handler_test.py          |   2 +-
 .../runners/worker/operation_specs.py           |   9 +-
 .../apache_beam/runners/worker/operations.py    |   1 +
 .../apache_beam/runners/worker/sdk_worker.py    | 370 +---------
 .../runners/worker/sdk_worker_main.py           |   2 +-
 .../runners/worker/sdk_worker_test.py           |  95 +--
 sdks/python/apache_beam/testing/test_stream.py  |   5 +
 .../apache_beam/testing/test_stream_test.py     |  68 ++
 sdks/python/apache_beam/transforms/combiners.py |   8 +
 .../apache_beam/transforms/combiners_test.py    |   7 +-
 sdks/python/apache_beam/transforms/core.py      | 102 +--
 .../python/apache_beam/transforms/ptransform.py |  43 +-
 sdks/python/apache_beam/transforms/trigger.py   |  28 +-
 sdks/python/apache_beam/transforms/window.py    |   4 +-
 .../apache_beam/typehints/trivial_inference.py  |   3 +-
 .../typehints/trivial_inference_test.py         |   7 +
 sdks/python/apache_beam/utils/plugin.py         |  42 ++
 sdks/python/apache_beam/utils/timestamp.py      |   5 +
 sdks/python/apache_beam/utils/urns.py           |   2 +-
 sdks/python/apache_beam/version.py              |   2 +-
 sdks/python/gen_protos.py                       |   2 +-
 sdks/python/pom.xml                             |   2 +-
 sdks/python/run_pylint.sh                       |   2 +-
 sdks/python/setup.py                            |   5 +-
 462 files changed, 21718 insertions(+), 10754 deletions(-)
----------------------------------------------------------------------



[10/50] [abbrv] beam git commit: Disallow Combiner Lifting for multi-window WindowFns

Posted by ta...@apache.org.
Disallow Combiner Lifting for multi-window WindowFns


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d4fa33e3
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d4fa33e3
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d4fa33e3

Branch: refs/heads/DSL_SQL
Commit: d4fa33e346185395577aa3ce537bfd4a1eb8b4f7
Parents: a7cad60
Author: Thomas Groh <tg...@google.com>
Authored: Wed Jul 5 14:16:50 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:59 2017 -0700

----------------------------------------------------------------------
 .../apache/beam/runners/dataflow/DataflowPipelineTranslator.java    | 1 +
 1 file changed, 1 insertion(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/d4fa33e3/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslator.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslator.java
index 28fd1bb..f1783de 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslator.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslator.java
@@ -793,6 +793,7 @@ public class DataflowPipelineTranslator {
                 context.getPipelineOptions().as(StreamingOptions.class).isStreaming();
             boolean disallowCombinerLifting =
                 !windowingStrategy.getWindowFn().isNonMerging()
+                    || !windowingStrategy.getWindowFn().assignsToOneWindow()
                     || (isStreaming && !transform.fewKeys())
                     // TODO: Allow combiner lifting on the non-default trigger, as appropriate.
                     || !(windowingStrategy.getTrigger() instanceof DefaultTrigger);


[42/50] [abbrv] beam git commit: Split bundle processor into separate class.

Posted by ta...@apache.org.
Split bundle processor into separate class.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4abd7141
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4abd7141
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4abd7141

Branch: refs/heads/DSL_SQL
Commit: 4abd7141673f4aead669efd4d2a87fc163764a2d
Parents: 6a61f15
Author: Robert Bradshaw <ro...@gmail.com>
Authored: Wed Jun 28 18:20:12 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:02 2017 -0700

----------------------------------------------------------------------
 .../runners/portability/fn_api_runner.py        |  20 +-
 .../runners/worker/bundle_processor.py          | 426 +++++++++++++++++++
 .../apache_beam/runners/worker/sdk_worker.py    | 398 +----------------
 3 files changed, 444 insertions(+), 400 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/4abd7141/sdks/python/apache_beam/runners/portability/fn_api_runner.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index f522864..f88fe53 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -38,6 +38,7 @@ from apache_beam.portability.api import beam_fn_api_pb2
 from apache_beam.portability.api import beam_runner_api_pb2
 from apache_beam.runners import pipeline_context
 from apache_beam.runners.portability import maptask_executor_runner
+from apache_beam.runners.worker import bundle_processor
 from apache_beam.runners.worker import data_plane
 from apache_beam.runners.worker import operation_specs
 from apache_beam.runners.worker import sdk_worker
@@ -186,7 +187,7 @@ class FnApiRunner(maptask_executor_runner.MapTaskExecutorRunner):
         target_name = only_element(get_inputs(operation).keys())
         runner_sinks[(transform_id, target_name)] = operation
         transform_spec = beam_runner_api_pb2.FunctionSpec(
-            urn=sdk_worker.DATA_OUTPUT_URN,
+            urn=bundle_processor.DATA_OUTPUT_URN,
             parameter=proto_utils.pack_Any(data_operation_spec))
 
       elif isinstance(operation, operation_specs.WorkerRead):
@@ -200,7 +201,7 @@ class FnApiRunner(maptask_executor_runner.MapTaskExecutorRunner):
               operation.source.source.read(None),
               operation.source.source.default_output_coder())
           transform_spec = beam_runner_api_pb2.FunctionSpec(
-              urn=sdk_worker.DATA_INPUT_URN,
+              urn=bundle_processor.DATA_INPUT_URN,
               parameter=proto_utils.pack_Any(data_operation_spec))
 
         else:
@@ -209,7 +210,7 @@ class FnApiRunner(maptask_executor_runner.MapTaskExecutorRunner):
           # The Dataflow runner harness strips the base64 encoding. do the same
           # here until we get the same thing back that we sent in.
           transform_spec = beam_runner_api_pb2.FunctionSpec(
-              urn=sdk_worker.PYTHON_SOURCE_URN,
+              urn=bundle_processor.PYTHON_SOURCE_URN,
               parameter=proto_utils.pack_Any(
                   wrappers_pb2.BytesValue(
                       value=base64.b64decode(
@@ -223,21 +224,22 @@ class FnApiRunner(maptask_executor_runner.MapTaskExecutorRunner):
           element_coder = si.source.default_output_coder()
           # TODO(robertwb): Actually flesh out the ViewFn API.
           side_input_extras.append((si.tag, element_coder))
-          side_input_data[sdk_worker.side_input_tag(transform_id, si.tag)] = (
-              self._reencode_elements(
-                  si.source.read(si.source.get_range_tracker(None, None)),
-                  element_coder))
+          side_input_data[
+              bundle_processor.side_input_tag(transform_id, si.tag)] = (
+                  self._reencode_elements(
+                      si.source.read(si.source.get_range_tracker(None, None)),
+                      element_coder))
         augmented_serialized_fn = pickler.dumps(
             (operation.serialized_fn, side_input_extras))
         transform_spec = beam_runner_api_pb2.FunctionSpec(
-            urn=sdk_worker.PYTHON_DOFN_URN,
+            urn=bundle_processor.PYTHON_DOFN_URN,
             parameter=proto_utils.pack_Any(
                 wrappers_pb2.BytesValue(value=augmented_serialized_fn)))
 
       elif isinstance(operation, operation_specs.WorkerFlatten):
         # Flatten is nice and simple.
         transform_spec = beam_runner_api_pb2.FunctionSpec(
-            urn=sdk_worker.IDENTITY_DOFN_URN)
+            urn=bundle_processor.IDENTITY_DOFN_URN)
 
       else:
         raise NotImplementedError(operation)

http://git-wip-us.apache.org/repos/asf/beam/blob/4abd7141/sdks/python/apache_beam/runners/worker/bundle_processor.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/worker/bundle_processor.py b/sdks/python/apache_beam/runners/worker/bundle_processor.py
new file mode 100644
index 0000000..2669bfc
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/bundle_processor.py
@@ -0,0 +1,426 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""SDK harness for executing Python Fns via the Fn API."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import base64
+import collections
+import json
+import logging
+
+from google.protobuf import wrappers_pb2
+
+from apache_beam.coders import coder_impl
+from apache_beam.coders import WindowedValueCoder
+from apache_beam.internal import pickler
+from apache_beam.io import iobase
+from apache_beam.portability.api import beam_fn_api_pb2
+from apache_beam.runners.dataflow.native_io import iobase as native_iobase
+from apache_beam.runners import pipeline_context
+from apache_beam.runners.worker import operation_specs
+from apache_beam.runners.worker import operations
+from apache_beam.utils import counters
+from apache_beam.utils import proto_utils
+
+# This module is experimental. No backwards-compatibility guarantees.
+
+
+try:
+  from apache_beam.runners.worker import statesampler
+except ImportError:
+  from apache_beam.runners.worker import statesampler_fake as statesampler
+
+
+DATA_INPUT_URN = 'urn:org.apache.beam:source:runner:0.1'
+DATA_OUTPUT_URN = 'urn:org.apache.beam:sink:runner:0.1'
+IDENTITY_DOFN_URN = 'urn:org.apache.beam:dofn:identity:0.1'
+PYTHON_ITERABLE_VIEWFN_URN = 'urn:org.apache.beam:viewfn:iterable:python:0.1'
+PYTHON_CODER_URN = 'urn:org.apache.beam:coder:python:0.1'
+# TODO(vikasrk): Fix this once runner sends appropriate python urns.
+PYTHON_DOFN_URN = 'urn:org.apache.beam:dofn:java:0.1'
+PYTHON_SOURCE_URN = 'urn:org.apache.beam:source:java:0.1'
+
+
+def side_input_tag(transform_id, tag):
+  return str("%d[%s][%s]" % (len(transform_id), transform_id, tag))
+
+
+class RunnerIOOperation(operations.Operation):
+  """Common baseclass for runner harness IO operations."""
+
+  def __init__(self, operation_name, step_name, consumers, counter_factory,
+               state_sampler, windowed_coder, target, data_channel):
+    super(RunnerIOOperation, self).__init__(
+        operation_name, None, counter_factory, state_sampler)
+    self.windowed_coder = windowed_coder
+    self.step_name = step_name
+    # target represents the consumer for the bytes in the data plane for a
+    # DataInputOperation or a producer of these bytes for a DataOutputOperation.
+    self.target = target
+    self.data_channel = data_channel
+    for _, consumer_ops in consumers.items():
+      for consumer in consumer_ops:
+        self.add_receiver(consumer, 0)
+
+
+class DataOutputOperation(RunnerIOOperation):
+  """A sink-like operation that gathers outputs to be sent back to the runner.
+  """
+
+  def set_output_stream(self, output_stream):
+    self.output_stream = output_stream
+
+  def process(self, windowed_value):
+    self.windowed_coder.get_impl().encode_to_stream(
+        windowed_value, self.output_stream, True)
+
+  def finish(self):
+    self.output_stream.close()
+    super(DataOutputOperation, self).finish()
+
+
+class DataInputOperation(RunnerIOOperation):
+  """A source-like operation that gathers input from the runner.
+  """
+
+  def __init__(self, operation_name, step_name, consumers, counter_factory,
+               state_sampler, windowed_coder, input_target, data_channel):
+    super(DataInputOperation, self).__init__(
+        operation_name, step_name, consumers, counter_factory, state_sampler,
+        windowed_coder, target=input_target, data_channel=data_channel)
+    # We must do this manually as we don't have a spec or spec.output_coders.
+    self.receivers = [
+        operations.ConsumerSet(self.counter_factory, self.step_name, 0,
+                               consumers.itervalues().next(),
+                               self.windowed_coder)]
+
+  def process(self, windowed_value):
+    self.output(windowed_value)
+
+  def process_encoded(self, encoded_windowed_values):
+    input_stream = coder_impl.create_InputStream(encoded_windowed_values)
+    while input_stream.size() > 0:
+      decoded_value = self.windowed_coder.get_impl().decode_from_stream(
+          input_stream, True)
+      self.output(decoded_value)
+
+
+# TODO(robertwb): Revise side input API to not be in terms of native sources.
+# This will enable lookups, but there's an open question as to how to handle
+# custom sources without forcing intermediate materialization.  This seems very
+# related to the desire to inject key and window preserving [Splittable]DoFns
+# into the view computation.
+class SideInputSource(native_iobase.NativeSource,
+                      native_iobase.NativeSourceReader):
+  """A 'source' for reading side inputs via state API calls.
+  """
+
+  def __init__(self, state_handler, state_key, coder):
+    self._state_handler = state_handler
+    self._state_key = state_key
+    self._coder = coder
+
+  def reader(self):
+    return self
+
+  @property
+  def returns_windowed_values(self):
+    return True
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, *exn_info):
+    pass
+
+  def __iter__(self):
+    # TODO(robertwb): Support pagination.
+    input_stream = coder_impl.create_InputStream(
+        self._state_handler.Get(self._state_key).data)
+    while input_stream.size() > 0:
+      yield self._coder.get_impl().decode_from_stream(input_stream, True)
+
+
+def memoize(func):
+  cache = {}
+  missing = object()
+
+  def wrapper(*args):
+    result = cache.get(args, missing)
+    if result is missing:
+      result = cache[args] = func(*args)
+    return result
+  return wrapper
+
+
+def only_element(iterable):
+  element, = iterable
+  return element
+
+
+class BundleProcessor(object):
+  """A class for processing bundles of elements.
+  """
+  def __init__(
+      self, process_bundle_descriptor, state_handler, data_channel_factory):
+    self.process_bundle_descriptor = process_bundle_descriptor
+    self.state_handler = state_handler
+    self.data_channel_factory = data_channel_factory
+
+  def create_execution_tree(self, descriptor):
+    # TODO(robertwb): Figure out the correct prefix to use for output counters
+    # from StateSampler.
+    counter_factory = counters.CounterFactory()
+    state_sampler = statesampler.StateSampler(
+        'fnapi-step%s-' % descriptor.id, counter_factory)
+
+    transform_factory = BeamTransformFactory(
+        descriptor, self.data_channel_factory, counter_factory, state_sampler,
+        self.state_handler)
+
+    pcoll_consumers = collections.defaultdict(list)
+    for transform_id, transform_proto in descriptor.transforms.items():
+      for pcoll_id in transform_proto.inputs.values():
+        pcoll_consumers[pcoll_id].append(transform_id)
+
+    @memoize
+    def get_operation(transform_id):
+      transform_consumers = {
+          tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
+          for tag, pcoll_id
+          in descriptor.transforms[transform_id].outputs.items()
+      }
+      return transform_factory.create_operation(
+          transform_id, transform_consumers)
+
+    # Operations must be started (hence returned) in order.
+    @memoize
+    def topological_height(transform_id):
+      return 1 + max(
+          [0] +
+          [topological_height(consumer)
+           for pcoll in descriptor.transforms[transform_id].outputs.values()
+           for consumer in pcoll_consumers[pcoll]])
+
+    return [get_operation(transform_id)
+            for transform_id in sorted(
+                descriptor.transforms, key=topological_height, reverse=True)]
+
+  def process_bundle(self, instruction_id):
+    ops = self.create_execution_tree(self.process_bundle_descriptor)
+
+    expected_inputs = []
+    for op in ops:
+      if isinstance(op, DataOutputOperation):
+        # TODO(robertwb): Is there a better way to pass the instruction id to
+        # the operation?
+        op.set_output_stream(op.data_channel.output_stream(
+            instruction_id, op.target))
+      elif isinstance(op, DataInputOperation):
+        # We must wait until we receive "end of stream" for each of these ops.
+        expected_inputs.append(op)
+
+    # Start all operations.
+    for op in reversed(ops):
+      logging.info('start %s', op)
+      op.start()
+
+    # Inject inputs from data plane.
+    for input_op in expected_inputs:
+      for data in input_op.data_channel.input_elements(
+          instruction_id, [input_op.target]):
+        # ignores input name
+        input_op.process_encoded(data.data)
+
+    # Finish all operations.
+    for op in ops:
+      logging.info('finish %s', op)
+      op.finish()
+
+
+class BeamTransformFactory(object):
+  """Factory for turning transform_protos into executable operations."""
+  def __init__(self, descriptor, data_channel_factory, counter_factory,
+               state_sampler, state_handler):
+    self.descriptor = descriptor
+    self.data_channel_factory = data_channel_factory
+    self.counter_factory = counter_factory
+    self.state_sampler = state_sampler
+    self.state_handler = state_handler
+    self.context = pipeline_context.PipelineContext(descriptor)
+
+  _known_urns = {}
+
+  @classmethod
+  def register_urn(cls, urn, parameter_type):
+    def wrapper(func):
+      cls._known_urns[urn] = func, parameter_type
+      return func
+    return wrapper
+
+  def create_operation(self, transform_id, consumers):
+    transform_proto = self.descriptor.transforms[transform_id]
+    creator, parameter_type = self._known_urns[transform_proto.spec.urn]
+    parameter = proto_utils.unpack_Any(
+        transform_proto.spec.parameter, parameter_type)
+    return creator(self, transform_id, transform_proto, parameter, consumers)
+
+  def get_coder(self, coder_id):
+    coder_proto = self.descriptor.coders[coder_id]
+    if coder_proto.spec.spec.urn:
+      return self.context.coders.get_by_id(coder_id)
+    else:
+      # No URN, assume cloud object encoding json bytes.
+      return operation_specs.get_coder_from_spec(
+          json.loads(
+              proto_utils.unpack_Any(coder_proto.spec.spec.parameter,
+                                     wrappers_pb2.BytesValue).value))
+
+  def get_output_coders(self, transform_proto):
+    return {
+        tag: self.get_coder(self.descriptor.pcollections[pcoll_id].coder_id)
+        for tag, pcoll_id in transform_proto.outputs.items()
+    }
+
+  def get_only_output_coder(self, transform_proto):
+    return only_element(self.get_output_coders(transform_proto).values())
+
+  def get_input_coders(self, transform_proto):
+    return {
+        tag: self.get_coder(self.descriptor.pcollections[pcoll_id].coder_id)
+        for tag, pcoll_id in transform_proto.inputs.items()
+    }
+
+  def get_only_input_coder(self, transform_proto):
+    return only_element(self.get_input_coders(transform_proto).values())
+
+  # TODO(robertwb): Update all operations to take these in the constructor.
+  @staticmethod
+  def augment_oldstyle_op(op, step_name, consumers, tag_list=None):
+    op.step_name = step_name
+    for tag, op_consumers in consumers.items():
+      for consumer in op_consumers:
+        op.add_receiver(consumer, tag_list.index(tag) if tag_list else 0)
+    return op
+
+
+@BeamTransformFactory.register_urn(
+    DATA_INPUT_URN, beam_fn_api_pb2.RemoteGrpcPort)
+def create(factory, transform_id, transform_proto, grpc_port, consumers):
+  target = beam_fn_api_pb2.Target(
+      primitive_transform_reference=transform_id,
+      name=only_element(transform_proto.outputs.keys()))
+  return DataInputOperation(
+      transform_proto.unique_name,
+      transform_proto.unique_name,
+      consumers,
+      factory.counter_factory,
+      factory.state_sampler,
+      factory.get_only_output_coder(transform_proto),
+      input_target=target,
+      data_channel=factory.data_channel_factory.create_data_channel(grpc_port))
+
+
+@BeamTransformFactory.register_urn(
+    DATA_OUTPUT_URN, beam_fn_api_pb2.RemoteGrpcPort)
+def create(factory, transform_id, transform_proto, grpc_port, consumers):
+  target = beam_fn_api_pb2.Target(
+      primitive_transform_reference=transform_id,
+      name=only_element(transform_proto.inputs.keys()))
+  return DataOutputOperation(
+      transform_proto.unique_name,
+      transform_proto.unique_name,
+      consumers,
+      factory.counter_factory,
+      factory.state_sampler,
+      # TODO(robertwb): Perhaps this could be distinct from the input coder?
+      factory.get_only_input_coder(transform_proto),
+      target=target,
+      data_channel=factory.data_channel_factory.create_data_channel(grpc_port))
+
+
+@BeamTransformFactory.register_urn(PYTHON_SOURCE_URN, wrappers_pb2.BytesValue)
+def create(factory, transform_id, transform_proto, parameter, consumers):
+  # The Dataflow runner harness strips the base64 encoding.
+  source = pickler.loads(base64.b64encode(parameter.value))
+  spec = operation_specs.WorkerRead(
+      iobase.SourceBundle(1.0, source, None, None),
+      [WindowedValueCoder(source.default_output_coder())])
+  return factory.augment_oldstyle_op(
+      operations.ReadOperation(
+          transform_proto.unique_name,
+          spec,
+          factory.counter_factory,
+          factory.state_sampler),
+      transform_proto.unique_name,
+      consumers)
+
+
+@BeamTransformFactory.register_urn(PYTHON_DOFN_URN, wrappers_pb2.BytesValue)
+def create(factory, transform_id, transform_proto, parameter, consumers):
+  dofn_data = pickler.loads(parameter.value)
+  if len(dofn_data) == 2:
+    # Has side input data.
+    serialized_fn, side_input_data = dofn_data
+  else:
+    # No side input data.
+    serialized_fn, side_input_data = parameter.value, []
+
+  def create_side_input(tag, coder):
+    # TODO(robertwb): Extract windows (and keys) out of element data.
+    # TODO(robertwb): Extract state key from ParDoPayload.
+    return operation_specs.WorkerSideInputSource(
+        tag=tag,
+        source=SideInputSource(
+            factory.state_handler,
+            beam_fn_api_pb2.StateKey.MultimapSideInput(
+                key=side_input_tag(transform_id, tag)),
+            coder=coder))
+  output_tags = list(transform_proto.outputs.keys())
+  output_coders = factory.get_output_coders(transform_proto)
+  spec = operation_specs.WorkerDoFn(
+      serialized_fn=serialized_fn,
+      output_tags=output_tags,
+      input=None,
+      side_inputs=[
+          create_side_input(tag, coder) for tag, coder in side_input_data],
+      output_coders=[output_coders[tag] for tag in output_tags])
+  return factory.augment_oldstyle_op(
+      operations.DoOperation(
+          transform_proto.unique_name,
+          spec,
+          factory.counter_factory,
+          factory.state_sampler),
+      transform_proto.unique_name,
+      consumers,
+      output_tags)
+
+
+@BeamTransformFactory.register_urn(IDENTITY_DOFN_URN, None)
+def create(factory, transform_id, transform_proto, unused_parameter, consumers):
+  return factory.augment_oldstyle_op(
+      operations.FlattenOperation(
+          transform_proto.unique_name,
+          None,
+          factory.counter_factory,
+          factory.state_sampler),
+      transform_proto.unique_name,
+      consumers)

http://git-wip-us.apache.org/repos/asf/beam/blob/4abd7141/sdks/python/apache_beam/runners/worker/sdk_worker.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py
index ae86830..6a23680 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py
@@ -21,170 +21,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import base64
-import collections
-import json
 import logging
 import Queue as queue
 import threading
 import traceback
 
-from google.protobuf import wrappers_pb2
-
-from apache_beam.coders import coder_impl
-from apache_beam.coders import WindowedValueCoder
-from apache_beam.internal import pickler
-from apache_beam.io import iobase
 from apache_beam.portability.api import beam_fn_api_pb2
-from apache_beam.runners.dataflow.native_io import iobase as native_iobase
-from apache_beam.runners import pipeline_context
-from apache_beam.runners.worker import operation_specs
-from apache_beam.runners.worker import operations
-from apache_beam.utils import counters
-from apache_beam.utils import proto_utils
-
-# This module is experimental. No backwards-compatibility guarantees.
-
-
-try:
-  from apache_beam.runners.worker import statesampler
-except ImportError:
-  from apache_beam.runners.worker import statesampler_fake as statesampler
-from apache_beam.runners.worker.data_plane import GrpcClientDataChannelFactory
-
-
-DATA_INPUT_URN = 'urn:org.apache.beam:source:runner:0.1'
-DATA_OUTPUT_URN = 'urn:org.apache.beam:sink:runner:0.1'
-IDENTITY_DOFN_URN = 'urn:org.apache.beam:dofn:identity:0.1'
-PYTHON_ITERABLE_VIEWFN_URN = 'urn:org.apache.beam:viewfn:iterable:python:0.1'
-PYTHON_CODER_URN = 'urn:org.apache.beam:coder:python:0.1'
-# TODO(vikasrk): Fix this once runner sends appropriate python urns.
-PYTHON_DOFN_URN = 'urn:org.apache.beam:dofn:java:0.1'
-PYTHON_SOURCE_URN = 'urn:org.apache.beam:source:java:0.1'
-
-
-def side_input_tag(transform_id, tag):
-  return str("%d[%s][%s]" % (len(transform_id), transform_id, tag))
-
-
-class RunnerIOOperation(operations.Operation):
-  """Common baseclass for runner harness IO operations."""
-
-  def __init__(self, operation_name, step_name, consumers, counter_factory,
-               state_sampler, windowed_coder, target, data_channel):
-    super(RunnerIOOperation, self).__init__(
-        operation_name, None, counter_factory, state_sampler)
-    self.windowed_coder = windowed_coder
-    self.step_name = step_name
-    # target represents the consumer for the bytes in the data plane for a
-    # DataInputOperation or a producer of these bytes for a DataOutputOperation.
-    self.target = target
-    self.data_channel = data_channel
-    for _, consumer_ops in consumers.items():
-      for consumer in consumer_ops:
-        self.add_receiver(consumer, 0)
-
-
-class DataOutputOperation(RunnerIOOperation):
-  """A sink-like operation that gathers outputs to be sent back to the runner.
-  """
-
-  def set_output_stream(self, output_stream):
-    self.output_stream = output_stream
-
-  def process(self, windowed_value):
-    self.windowed_coder.get_impl().encode_to_stream(
-        windowed_value, self.output_stream, True)
-
-  def finish(self):
-    self.output_stream.close()
-    super(DataOutputOperation, self).finish()
-
-
-class DataInputOperation(RunnerIOOperation):
-  """A source-like operation that gathers input from the runner.
-  """
-
-  def __init__(self, operation_name, step_name, consumers, counter_factory,
-               state_sampler, windowed_coder, input_target, data_channel):
-    super(DataInputOperation, self).__init__(
-        operation_name, step_name, consumers, counter_factory, state_sampler,
-        windowed_coder, target=input_target, data_channel=data_channel)
-    # We must do this manually as we don't have a spec or spec.output_coders.
-    self.receivers = [
-        operations.ConsumerSet(self.counter_factory, self.step_name, 0,
-                               consumers.itervalues().next(),
-                               self.windowed_coder)]
-
-  def process(self, windowed_value):
-    self.output(windowed_value)
-
-  def process_encoded(self, encoded_windowed_values):
-    input_stream = coder_impl.create_InputStream(encoded_windowed_values)
-    while input_stream.size() > 0:
-      decoded_value = self.windowed_coder.get_impl().decode_from_stream(
-          input_stream, True)
-      self.output(decoded_value)
-
-
-# TODO(robertwb): Revise side input API to not be in terms of native sources.
-# This will enable lookups, but there's an open question as to how to handle
-# custom sources without forcing intermediate materialization.  This seems very
-# related to the desire to inject key and window preserving [Splittable]DoFns
-# into the view computation.
-class SideInputSource(native_iobase.NativeSource,
-                      native_iobase.NativeSourceReader):
-  """A 'source' for reading side inputs via state API calls.
-  """
-
-  def __init__(self, state_handler, state_key, coder):
-    self._state_handler = state_handler
-    self._state_key = state_key
-    self._coder = coder
-
-  def reader(self):
-    return self
-
-  @property
-  def returns_windowed_values(self):
-    return True
-
-  def __enter__(self):
-    return self
-
-  def __exit__(self, *exn_info):
-    pass
-
-  def __iter__(self):
-    # TODO(robertwb): Support pagination.
-    input_stream = coder_impl.create_InputStream(
-        self._state_handler.Get(self._state_key).data)
-    while input_stream.size() > 0:
-      yield self._coder.get_impl().decode_from_stream(input_stream, True)
-
-
-def memoize(func):
-  cache = {}
-  missing = object()
-
-  def wrapper(*args):
-    result = cache.get(args, missing)
-    if result is missing:
-      result = cache[args] = func(*args)
-    return result
-  return wrapper
-
-
-def only_element(iterable):
-  element, = iterable
-  return element
+from apache_beam.runners.worker import bundle_processor
+from apache_beam.runners.worker import data_plane
 
 
 class SdkHarness(object):
 
   def __init__(self, control_channel):
     self._control_channel = control_channel
-    self._data_channel_factory = GrpcClientDataChannelFactory()
+    self._data_channel_factory = data_plane.GrpcClientDataChannelFactory()
 
   def run(self):
     contol_stub = beam_fn_api_pb2.BeamFnControlStub(self._control_channel)
@@ -251,245 +102,10 @@ class SdkWorker(object):
       self.fns[process_bundle_descriptor.id] = process_bundle_descriptor
     return beam_fn_api_pb2.RegisterResponse()
 
-  def create_execution_tree(self, descriptor):
-    # TODO(robertwb): Figure out the correct prefix to use for output counters
-    # from StateSampler.
-    counter_factory = counters.CounterFactory()
-    state_sampler = statesampler.StateSampler(
-        'fnapi-step%s-' % descriptor.id, counter_factory)
-
-    transform_factory = BeamTransformFactory(
-        descriptor, self.data_channel_factory, counter_factory, state_sampler,
-        self.state_handler)
-
-    pcoll_consumers = collections.defaultdict(list)
-    for transform_id, transform_proto in descriptor.transforms.items():
-      for pcoll_id in transform_proto.inputs.values():
-        pcoll_consumers[pcoll_id].append(transform_id)
-
-    @memoize
-    def get_operation(transform_id):
-      transform_consumers = {
-          tag: [get_operation(op) for op in pcoll_consumers[pcoll_id]]
-          for tag, pcoll_id
-          in descriptor.transforms[transform_id].outputs.items()
-      }
-      return transform_factory.create_operation(
-          transform_id, transform_consumers)
-
-    # Operations must be started (hence returned) in order.
-    @memoize
-    def topological_height(transform_id):
-      return 1 + max(
-          [0] +
-          [topological_height(consumer)
-           for pcoll in descriptor.transforms[transform_id].outputs.values()
-           for consumer in pcoll_consumers[pcoll]])
-
-    return [get_operation(transform_id)
-            for transform_id in sorted(
-                descriptor.transforms, key=topological_height, reverse=True)]
-
   def process_bundle(self, request, instruction_id):
-    ops = self.create_execution_tree(
-        self.fns[request.process_bundle_descriptor_reference])
-
-    expected_inputs = []
-    for op in ops:
-      if isinstance(op, DataOutputOperation):
-        # TODO(robertwb): Is there a better way to pass the instruction id to
-        # the operation?
-        op.set_output_stream(op.data_channel.output_stream(
-            instruction_id, op.target))
-      elif isinstance(op, DataInputOperation):
-        # We must wait until we receive "end of stream" for each of these ops.
-        expected_inputs.append(op)
-
-    # Start all operations.
-    for op in reversed(ops):
-      logging.info('start %s', op)
-      op.start()
-
-    # Inject inputs from data plane.
-    for input_op in expected_inputs:
-      for data in input_op.data_channel.input_elements(
-          instruction_id, [input_op.target]):
-        # ignores input name
-        input_op.process_encoded(data.data)
-
-    # Finish all operations.
-    for op in ops:
-      logging.info('finish %s', op)
-      op.finish()
+    bundle_processor.BundleProcessor(
+        self.fns[request.process_bundle_descriptor_reference],
+        self.state_handler,
+        self.data_channel_factory).process_bundle(instruction_id)
 
     return beam_fn_api_pb2.ProcessBundleResponse()
-
-
-class BeamTransformFactory(object):
-  """Factory for turning transform_protos into executable operations."""
-  def __init__(self, descriptor, data_channel_factory, counter_factory,
-               state_sampler, state_handler):
-    self.descriptor = descriptor
-    self.data_channel_factory = data_channel_factory
-    self.counter_factory = counter_factory
-    self.state_sampler = state_sampler
-    self.state_handler = state_handler
-    self.context = pipeline_context.PipelineContext(descriptor)
-
-  _known_urns = {}
-
-  @classmethod
-  def register_urn(cls, urn, parameter_type):
-    def wrapper(func):
-      cls._known_urns[urn] = func, parameter_type
-      return func
-    return wrapper
-
-  def create_operation(self, transform_id, consumers):
-    transform_proto = self.descriptor.transforms[transform_id]
-    creator, parameter_type = self._known_urns[transform_proto.spec.urn]
-    parameter = proto_utils.unpack_Any(
-        transform_proto.spec.parameter, parameter_type)
-    return creator(self, transform_id, transform_proto, parameter, consumers)
-
-  def get_coder(self, coder_id):
-    coder_proto = self.descriptor.coders[coder_id]
-    if coder_proto.spec.spec.urn:
-      return self.context.coders.get_by_id(coder_id)
-    else:
-      # No URN, assume cloud object encoding json bytes.
-      return operation_specs.get_coder_from_spec(
-          json.loads(
-              proto_utils.unpack_Any(coder_proto.spec.spec.parameter,
-                                     wrappers_pb2.BytesValue).value))
-
-  def get_output_coders(self, transform_proto):
-    return {
-        tag: self.get_coder(self.descriptor.pcollections[pcoll_id].coder_id)
-        for tag, pcoll_id in transform_proto.outputs.items()
-    }
-
-  def get_only_output_coder(self, transform_proto):
-    return only_element(self.get_output_coders(transform_proto).values())
-
-  def get_input_coders(self, transform_proto):
-    return {
-        tag: self.get_coder(self.descriptor.pcollections[pcoll_id].coder_id)
-        for tag, pcoll_id in transform_proto.inputs.items()
-    }
-
-  def get_only_input_coder(self, transform_proto):
-    return only_element(self.get_input_coders(transform_proto).values())
-
-  # TODO(robertwb): Update all operations to take these in the constructor.
-  @staticmethod
-  def augment_oldstyle_op(op, step_name, consumers, tag_list=None):
-    op.step_name = step_name
-    for tag, op_consumers in consumers.items():
-      for consumer in op_consumers:
-        op.add_receiver(consumer, tag_list.index(tag) if tag_list else 0)
-    return op
-
-
-@BeamTransformFactory.register_urn(
-    DATA_INPUT_URN, beam_fn_api_pb2.RemoteGrpcPort)
-def create(factory, transform_id, transform_proto, grpc_port, consumers):
-  target = beam_fn_api_pb2.Target(
-      primitive_transform_reference=transform_id,
-      name=only_element(transform_proto.outputs.keys()))
-  return DataInputOperation(
-      transform_proto.unique_name,
-      transform_proto.unique_name,
-      consumers,
-      factory.counter_factory,
-      factory.state_sampler,
-      factory.get_only_output_coder(transform_proto),
-      input_target=target,
-      data_channel=factory.data_channel_factory.create_data_channel(grpc_port))
-
-
-@BeamTransformFactory.register_urn(
-    DATA_OUTPUT_URN, beam_fn_api_pb2.RemoteGrpcPort)
-def create(factory, transform_id, transform_proto, grpc_port, consumers):
-  target = beam_fn_api_pb2.Target(
-      primitive_transform_reference=transform_id,
-      name=only_element(transform_proto.inputs.keys()))
-  return DataOutputOperation(
-      transform_proto.unique_name,
-      transform_proto.unique_name,
-      consumers,
-      factory.counter_factory,
-      factory.state_sampler,
-      # TODO(robertwb): Perhaps this could be distinct from the input coder?
-      factory.get_only_input_coder(transform_proto),
-      target=target,
-      data_channel=factory.data_channel_factory.create_data_channel(grpc_port))
-
-
-@BeamTransformFactory.register_urn(PYTHON_SOURCE_URN, wrappers_pb2.BytesValue)
-def create(factory, transform_id, transform_proto, parameter, consumers):
-  # The Dataflow runner harness strips the base64 encoding.
-  source = pickler.loads(base64.b64encode(parameter.value))
-  spec = operation_specs.WorkerRead(
-      iobase.SourceBundle(1.0, source, None, None),
-      [WindowedValueCoder(source.default_output_coder())])
-  return factory.augment_oldstyle_op(
-      operations.ReadOperation(
-          transform_proto.unique_name,
-          spec,
-          factory.counter_factory,
-          factory.state_sampler),
-      transform_proto.unique_name,
-      consumers)
-
-
-@BeamTransformFactory.register_urn(PYTHON_DOFN_URN, wrappers_pb2.BytesValue)
-def create(factory, transform_id, transform_proto, parameter, consumers):
-  dofn_data = pickler.loads(parameter.value)
-  if len(dofn_data) == 2:
-    # Has side input data.
-    serialized_fn, side_input_data = dofn_data
-  else:
-    # No side input data.
-    serialized_fn, side_input_data = parameter.value, []
-
-  def create_side_input(tag, coder):
-    # TODO(robertwb): Extract windows (and keys) out of element data.
-    # TODO(robertwb): Extract state key from ParDoPayload.
-    return operation_specs.WorkerSideInputSource(
-        tag=tag,
-        source=SideInputSource(
-            factory.state_handler,
-            beam_fn_api_pb2.StateKey.MultimapSideInput(
-                key=side_input_tag(transform_id, tag)),
-            coder=coder))
-  output_tags = list(transform_proto.outputs.keys())
-  output_coders = factory.get_output_coders(transform_proto)
-  spec = operation_specs.WorkerDoFn(
-      serialized_fn=serialized_fn,
-      output_tags=output_tags,
-      input=None,
-      side_inputs=[
-          create_side_input(tag, coder) for tag, coder in side_input_data],
-      output_coders=[output_coders[tag] for tag in output_tags])
-  return factory.augment_oldstyle_op(
-      operations.DoOperation(
-          transform_proto.unique_name,
-          spec,
-          factory.counter_factory,
-          factory.state_sampler),
-      transform_proto.unique_name,
-      consumers,
-      output_tags)
-
-
-@BeamTransformFactory.register_urn(IDENTITY_DOFN_URN, None)
-def create(factory, transform_id, transform_proto, unused_parameter, consumers):
-  return factory.augment_oldstyle_op(
-      operations.FlattenOperation(
-          transform_proto.unique_name,
-          None,
-          factory.counter_factory,
-          factory.state_sampler),
-      transform_proto.unique_name,
-      consumers)


[49/50] [abbrv] beam git commit: Reformatting Kinesis IO to comply with official code style

Posted by ta...@apache.org.
Reformatting Kinesis IO to comply with official code style


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7925a668
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7925a668
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7925a668

Branch: refs/heads/DSL_SQL
Commit: 7925a668b12e272c7b2631ff6b20376e92ad90be
Parents: 4abd714
Author: Pawel Kaczmarczyk <p....@ocado.com>
Authored: Mon Jun 19 11:10:25 2017 +0200
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:02 2017 -0700

----------------------------------------------------------------------
 .../sdk/io/kinesis/CheckpointGenerator.java     |   6 +-
 .../beam/sdk/io/kinesis/CustomOptional.java     | 111 ++--
 .../io/kinesis/DynamicCheckpointGenerator.java  |  52 +-
 .../sdk/io/kinesis/GetKinesisRecordsResult.java |  49 +-
 .../sdk/io/kinesis/KinesisClientProvider.java   |   4 +-
 .../apache/beam/sdk/io/kinesis/KinesisIO.java   | 279 +++++-----
 .../beam/sdk/io/kinesis/KinesisReader.java      | 206 +++----
 .../sdk/io/kinesis/KinesisReaderCheckpoint.java |  97 ++--
 .../beam/sdk/io/kinesis/KinesisRecord.java      | 177 +++---
 .../beam/sdk/io/kinesis/KinesisRecordCoder.java |  68 +--
 .../beam/sdk/io/kinesis/KinesisSource.java      | 147 ++---
 .../beam/sdk/io/kinesis/RecordFilter.java       |  18 +-
 .../apache/beam/sdk/io/kinesis/RoundRobin.java  |  37 +-
 .../beam/sdk/io/kinesis/ShardCheckpoint.java    | 241 ++++-----
 .../sdk/io/kinesis/ShardRecordsIterator.java    | 106 ++--
 .../sdk/io/kinesis/SimplifiedKinesisClient.java | 215 ++++----
 .../beam/sdk/io/kinesis/StartingPoint.java      |  84 +--
 .../io/kinesis/StaticCheckpointGenerator.java   |  27 +-
 .../io/kinesis/TransientKinesisException.java   |   7 +-
 .../beam/sdk/io/kinesis/AmazonKinesisMock.java  | 539 ++++++++++---------
 .../beam/sdk/io/kinesis/CustomOptionalTest.java |  27 +-
 .../kinesis/DynamicCheckpointGeneratorTest.java |  33 +-
 .../sdk/io/kinesis/KinesisMockReadTest.java     |  97 ++--
 .../io/kinesis/KinesisReaderCheckpointTest.java |  52 +-
 .../beam/sdk/io/kinesis/KinesisReaderIT.java    | 127 ++---
 .../beam/sdk/io/kinesis/KinesisReaderTest.java  | 166 +++---
 .../sdk/io/kinesis/KinesisRecordCoderTest.java  |  34 +-
 .../beam/sdk/io/kinesis/KinesisTestOptions.java |  43 +-
 .../beam/sdk/io/kinesis/KinesisUploader.java    |  70 +--
 .../beam/sdk/io/kinesis/RecordFilterTest.java   |  52 +-
 .../beam/sdk/io/kinesis/RoundRobinTest.java     |  42 +-
 .../sdk/io/kinesis/ShardCheckpointTest.java     | 203 +++----
 .../io/kinesis/ShardRecordsIteratorTest.java    | 216 ++++----
 .../io/kinesis/SimplifiedKinesisClientTest.java | 351 ++++++------
 34 files changed, 2031 insertions(+), 1952 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CheckpointGenerator.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CheckpointGenerator.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CheckpointGenerator.java
index 919d85a..2629c57 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CheckpointGenerator.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CheckpointGenerator.java
@@ -17,7 +17,6 @@
  */
 package org.apache.beam.sdk.io.kinesis;
 
-
 import java.io.Serializable;
 
 /**
@@ -25,6 +24,7 @@ import java.io.Serializable;
  * How exactly the checkpoint is generated is up to implementing class.
  */
 interface CheckpointGenerator extends Serializable {
-    KinesisReaderCheckpoint generate(SimplifiedKinesisClient client)
-            throws TransientKinesisException;
+
+  KinesisReaderCheckpoint generate(SimplifiedKinesisClient client)
+      throws TransientKinesisException;
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CustomOptional.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CustomOptional.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CustomOptional.java
index 4bed0e3..5a28214 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CustomOptional.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/CustomOptional.java
@@ -24,76 +24,79 @@ import java.util.Objects;
  * Similar to Guava {@code Optional}, but throws {@link NoSuchElementException} for missing element.
  */
 abstract class CustomOptional<T> {
-    @SuppressWarnings("unchecked")
-    public static <T> CustomOptional<T> absent() {
-        return (Absent<T>) Absent.INSTANCE;
-    }
 
-    public static <T> CustomOptional<T> of(T v) {
-        return new Present<>(v);
-    }
+  @SuppressWarnings("unchecked")
+  public static <T> CustomOptional<T> absent() {
+    return (Absent<T>) Absent.INSTANCE;
+  }
 
-    public abstract boolean isPresent();
+  public static <T> CustomOptional<T> of(T v) {
+    return new Present<>(v);
+  }
 
-    public abstract T get();
+  public abstract boolean isPresent();
 
-    private static class Present<T> extends CustomOptional<T> {
-        private final T value;
+  public abstract T get();
 
-        private Present(T value) {
-            this.value = value;
-        }
+  private static class Present<T> extends CustomOptional<T> {
 
-        @Override
-        public boolean isPresent() {
-            return true;
-        }
+    private final T value;
 
-        @Override
-        public T get() {
-            return value;
-        }
+    private Present(T value) {
+      this.value = value;
+    }
 
-        @Override
-        public boolean equals(Object o) {
-            if (!(o instanceof Present)) {
-                return false;
-            }
+    @Override
+    public boolean isPresent() {
+      return true;
+    }
 
-            Present<?> present = (Present<?>) o;
-            return Objects.equals(value, present.value);
-        }
+    @Override
+    public T get() {
+      return value;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (!(o instanceof Present)) {
+        return false;
+      }
 
-        @Override
-        public int hashCode() {
-            return Objects.hash(value);
-        }
+      Present<?> present = (Present<?>) o;
+      return Objects.equals(value, present.value);
     }
 
-    private static class Absent<T> extends CustomOptional<T> {
-        private static final Absent<Object> INSTANCE = new Absent<>();
+    @Override
+    public int hashCode() {
+      return Objects.hash(value);
+    }
+  }
 
-        private Absent() {
-        }
+  private static class Absent<T> extends CustomOptional<T> {
 
-        @Override
-        public boolean isPresent() {
-            return false;
-        }
+    private static final Absent<Object> INSTANCE = new Absent<>();
 
-        @Override
-        public T get() {
-            throw new NoSuchElementException();
-        }
+    private Absent() {
+    }
+
+    @Override
+    public boolean isPresent() {
+      return false;
+    }
 
-        @Override
-        public boolean equals(Object o) {
-            return o instanceof Absent;
-        }
+    @Override
+    public T get() {
+      throw new NoSuchElementException();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      return o instanceof Absent;
+    }
 
-        @Override
-        public int hashCode() {
-            return 0;
-        }
+    @Override
+    public int hashCode() {
+      return 0;
     }
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGenerator.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGenerator.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGenerator.java
index 2ec293c..9933019 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGenerator.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/DynamicCheckpointGenerator.java
@@ -28,29 +28,31 @@ import com.google.common.base.Function;
  * List of shards is obtained dynamically on call to {@link #generate(SimplifiedKinesisClient)}.
  */
 class DynamicCheckpointGenerator implements CheckpointGenerator {
-    private final String streamName;
-    private final StartingPoint startingPoint;
-
-    public DynamicCheckpointGenerator(String streamName, StartingPoint startingPoint) {
-        this.streamName = checkNotNull(streamName, "streamName");
-        this.startingPoint = checkNotNull(startingPoint, "startingPoint");
-    }
-
-    @Override
-    public KinesisReaderCheckpoint generate(SimplifiedKinesisClient kinesis)
-            throws TransientKinesisException {
-        return new KinesisReaderCheckpoint(
-                transform(kinesis.listShards(streamName), new Function<Shard, ShardCheckpoint>() {
-                    @Override
-                    public ShardCheckpoint apply(Shard shard) {
-                        return new ShardCheckpoint(streamName, shard.getShardId(), startingPoint);
-                    }
-                })
-        );
-    }
-
-    @Override
-    public String toString() {
-        return String.format("Checkpoint generator for %s: %s", streamName, startingPoint);
-    }
+
+  private final String streamName;
+  private final StartingPoint startingPoint;
+
+  public DynamicCheckpointGenerator(String streamName, StartingPoint startingPoint) {
+    this.streamName = checkNotNull(streamName, "streamName");
+    this.startingPoint = checkNotNull(startingPoint, "startingPoint");
+  }
+
+  @Override
+  public KinesisReaderCheckpoint generate(SimplifiedKinesisClient kinesis)
+      throws TransientKinesisException {
+    return new KinesisReaderCheckpoint(
+        transform(kinesis.listShards(streamName), new Function<Shard, ShardCheckpoint>() {
+
+          @Override
+          public ShardCheckpoint apply(Shard shard) {
+            return new ShardCheckpoint(streamName, shard.getShardId(), startingPoint);
+          }
+        })
+    );
+  }
+
+  @Override
+  public String toString() {
+    return String.format("Checkpoint generator for %s: %s", streamName, startingPoint);
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/GetKinesisRecordsResult.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/GetKinesisRecordsResult.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/GetKinesisRecordsResult.java
index 5a34d7d..f605f55 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/GetKinesisRecordsResult.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/GetKinesisRecordsResult.java
@@ -21,6 +21,7 @@ import static com.google.common.collect.Lists.transform;
 
 import com.amazonaws.services.kinesis.clientlibrary.types.UserRecord;
 import com.google.common.base.Function;
+
 import java.util.List;
 import javax.annotation.Nullable;
 
@@ -28,27 +29,29 @@ import javax.annotation.Nullable;
  * Represents the output of 'get' operation on Kinesis stream.
  */
 class GetKinesisRecordsResult {
-    private final List<KinesisRecord> records;
-    private final String nextShardIterator;
-
-    public GetKinesisRecordsResult(List<UserRecord> records, String nextShardIterator,
-                                   final String streamName, final String shardId) {
-        this.records = transform(records, new Function<UserRecord, KinesisRecord>() {
-            @Nullable
-            @Override
-            public KinesisRecord apply(@Nullable UserRecord input) {
-                assert input != null;  // to make FindBugs happy
-                return new KinesisRecord(input, streamName, shardId);
-            }
-        });
-        this.nextShardIterator = nextShardIterator;
-    }
-
-    public List<KinesisRecord> getRecords() {
-        return records;
-    }
-
-    public String getNextShardIterator() {
-        return nextShardIterator;
-    }
+
+  private final List<KinesisRecord> records;
+  private final String nextShardIterator;
+
+  public GetKinesisRecordsResult(List<UserRecord> records, String nextShardIterator,
+      final String streamName, final String shardId) {
+    this.records = transform(records, new Function<UserRecord, KinesisRecord>() {
+
+      @Nullable
+      @Override
+      public KinesisRecord apply(@Nullable UserRecord input) {
+        assert input != null;  // to make FindBugs happy
+        return new KinesisRecord(input, streamName, shardId);
+      }
+    });
+    this.nextShardIterator = nextShardIterator;
+  }
+
+  public List<KinesisRecord> getRecords() {
+    return records;
+  }
+
+  public String getNextShardIterator() {
+    return nextShardIterator;
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisClientProvider.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisClientProvider.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisClientProvider.java
index c7fd7f6..b5b721e 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisClientProvider.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisClientProvider.java
@@ -18,6 +18,7 @@
 package org.apache.beam.sdk.io.kinesis;
 
 import com.amazonaws.services.kinesis.AmazonKinesis;
+
 import java.io.Serializable;
 
 /**
@@ -27,5 +28,6 @@ import java.io.Serializable;
  * {@link Serializable} to ensure it can be sent to worker machines.
  */
 interface KinesisClientProvider extends Serializable {
-    AmazonKinesis get();
+
+  AmazonKinesis get();
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisIO.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisIO.java
index b85eb63..bc8ada1 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisIO.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisIO.java
@@ -17,7 +17,6 @@
  */
 package org.apache.beam.sdk.io.kinesis;
 
-
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 
@@ -29,7 +28,9 @@ import com.amazonaws.services.kinesis.AmazonKinesis;
 import com.amazonaws.services.kinesis.AmazonKinesisClient;
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream;
 import com.google.auto.value.AutoValue;
+
 import javax.annotation.Nullable;
+
 import org.apache.beam.sdk.annotations.Experimental;
 import org.apache.beam.sdk.io.BoundedReadFromUnboundedSource;
 import org.apache.beam.sdk.transforms.PTransform;
@@ -102,142 +103,148 @@ import org.joda.time.Instant;
  */
 @Experimental(Experimental.Kind.SOURCE_SINK)
 public final class KinesisIO {
-    /** Returns a new {@link Read} transform for reading from Kinesis. */
-    public static Read read() {
-        return new AutoValue_KinesisIO_Read.Builder().setMaxNumRecords(-1).build();
+
+  /** Returns a new {@link Read} transform for reading from Kinesis. */
+  public static Read read() {
+    return new AutoValue_KinesisIO_Read.Builder().setMaxNumRecords(-1).build();
+  }
+
+  /** Implementation of {@link #read}. */
+  @AutoValue
+  public abstract static class Read extends PTransform<PBegin, PCollection<KinesisRecord>> {
+
+    @Nullable
+    abstract String getStreamName();
+
+    @Nullable
+    abstract StartingPoint getInitialPosition();
+
+    @Nullable
+    abstract KinesisClientProvider getClientProvider();
+
+    abstract int getMaxNumRecords();
+
+    @Nullable
+    abstract Duration getMaxReadTime();
+
+    abstract Builder toBuilder();
+
+    @AutoValue.Builder
+    abstract static class Builder {
+
+      abstract Builder setStreamName(String streamName);
+
+      abstract Builder setInitialPosition(StartingPoint startingPoint);
+
+      abstract Builder setClientProvider(KinesisClientProvider clientProvider);
+
+      abstract Builder setMaxNumRecords(int maxNumRecords);
+
+      abstract Builder setMaxReadTime(Duration maxReadTime);
+
+      abstract Read build();
     }
 
-    /** Implementation of {@link #read}. */
-    @AutoValue
-    public abstract static class Read extends PTransform<PBegin, PCollection<KinesisRecord>> {
-        @Nullable
-        abstract String getStreamName();
-
-        @Nullable
-        abstract StartingPoint getInitialPosition();
-
-        @Nullable
-        abstract KinesisClientProvider getClientProvider();
-
-        abstract int getMaxNumRecords();
-
-        @Nullable
-        abstract Duration getMaxReadTime();
-
-        abstract Builder toBuilder();
-
-        @AutoValue.Builder
-        abstract static class Builder {
-            abstract Builder setStreamName(String streamName);
-            abstract Builder setInitialPosition(StartingPoint startingPoint);
-            abstract Builder setClientProvider(KinesisClientProvider clientProvider);
-            abstract Builder setMaxNumRecords(int maxNumRecords);
-            abstract Builder setMaxReadTime(Duration maxReadTime);
-
-            abstract Read build();
-        }
-
-        /**
-         * Specify reading from streamName at some initial position.
-         */
-        public Read from(String streamName, InitialPositionInStream initialPosition) {
-            return toBuilder()
-                .setStreamName(streamName)
-                .setInitialPosition(
-                    new StartingPoint(checkNotNull(initialPosition, "initialPosition")))
-                .build();
-        }
-
-        /**
-         * Specify reading from streamName beginning at given {@link Instant}.
-         * This {@link Instant} must be in the past, i.e. before {@link Instant#now()}.
-         */
-        public Read from(String streamName, Instant initialTimestamp) {
-            return toBuilder()
-                .setStreamName(streamName)
-                .setInitialPosition(
-                    new StartingPoint(checkNotNull(initialTimestamp, "initialTimestamp")))
-                .build();
-        }
-
-        /**
-         * Allows to specify custom {@link KinesisClientProvider}.
-         * {@link KinesisClientProvider} provides {@link AmazonKinesis} instances which are later
-         * used for communication with Kinesis.
-         * You should use this method if {@link Read#withClientProvider(String, String, Regions)}
-         * does not suit your needs.
-         */
-        public Read withClientProvider(KinesisClientProvider kinesisClientProvider) {
-            return toBuilder().setClientProvider(kinesisClientProvider).build();
-        }
-
-        /**
-         * Specify credential details and region to be used to read from Kinesis.
-         * If you need more sophisticated credential protocol, then you should look at
-         * {@link Read#withClientProvider(KinesisClientProvider)}.
-         */
-        public Read withClientProvider(String awsAccessKey, String awsSecretKey, Regions region) {
-            return withClientProvider(new BasicKinesisProvider(awsAccessKey, awsSecretKey, region));
-        }
-
-        /** Specifies to read at most a given number of records. */
-        public Read withMaxNumRecords(int maxNumRecords) {
-            checkArgument(
-                maxNumRecords > 0, "maxNumRecords must be positive, but was: %s", maxNumRecords);
-            return toBuilder().setMaxNumRecords(maxNumRecords).build();
-        }
-
-        /** Specifies to read at most a given number of records. */
-        public Read withMaxReadTime(Duration maxReadTime) {
-            checkNotNull(maxReadTime, "maxReadTime");
-            return toBuilder().setMaxReadTime(maxReadTime).build();
-        }
-
-        @Override
-        public PCollection<KinesisRecord> expand(PBegin input) {
-            org.apache.beam.sdk.io.Read.Unbounded<KinesisRecord> read =
-                org.apache.beam.sdk.io.Read.from(
-                    new KinesisSource(getClientProvider(), getStreamName(), getInitialPosition()));
-            if (getMaxNumRecords() > 0) {
-                BoundedReadFromUnboundedSource<KinesisRecord> bounded =
-                    read.withMaxNumRecords(getMaxNumRecords());
-                return getMaxReadTime() == null
-                    ? input.apply(bounded)
-                    : input.apply(bounded.withMaxReadTime(getMaxReadTime()));
-            } else {
-                return getMaxReadTime() == null
-                    ? input.apply(read)
-                    : input.apply(read.withMaxReadTime(getMaxReadTime()));
-            }
-        }
-
-        private static final class BasicKinesisProvider implements KinesisClientProvider {
-
-            private final String accessKey;
-            private final String secretKey;
-            private final Regions region;
-
-            private BasicKinesisProvider(String accessKey, String secretKey, Regions region) {
-                this.accessKey = checkNotNull(accessKey, "accessKey");
-                this.secretKey = checkNotNull(secretKey, "secretKey");
-                this.region = checkNotNull(region, "region");
-            }
-
-
-            private AWSCredentialsProvider getCredentialsProvider() {
-                return new StaticCredentialsProvider(new BasicAWSCredentials(
-                        accessKey,
-                        secretKey
-                ));
-
-            }
-
-            @Override
-            public AmazonKinesis get() {
-                AmazonKinesisClient client = new AmazonKinesisClient(getCredentialsProvider());
-                client.withRegion(region);
-                return client;
-            }
-        }
+    /**
+     * Specify reading from streamName at some initial position.
+     */
+    public Read from(String streamName, InitialPositionInStream initialPosition) {
+      return toBuilder()
+          .setStreamName(streamName)
+          .setInitialPosition(
+              new StartingPoint(checkNotNull(initialPosition, "initialPosition")))
+          .build();
+    }
+
+    /**
+     * Specify reading from streamName beginning at given {@link Instant}.
+     * This {@link Instant} must be in the past, i.e. before {@link Instant#now()}.
+     */
+    public Read from(String streamName, Instant initialTimestamp) {
+      return toBuilder()
+          .setStreamName(streamName)
+          .setInitialPosition(
+              new StartingPoint(checkNotNull(initialTimestamp, "initialTimestamp")))
+          .build();
+    }
+
+    /**
+     * Allows to specify custom {@link KinesisClientProvider}.
+     * {@link KinesisClientProvider} provides {@link AmazonKinesis} instances which are later
+     * used for communication with Kinesis.
+     * You should use this method if {@link Read#withClientProvider(String, String, Regions)}
+     * does not suit your needs.
+     */
+    public Read withClientProvider(KinesisClientProvider kinesisClientProvider) {
+      return toBuilder().setClientProvider(kinesisClientProvider).build();
+    }
+
+    /**
+     * Specify credential details and region to be used to read from Kinesis.
+     * If you need more sophisticated credential protocol, then you should look at
+     * {@link Read#withClientProvider(KinesisClientProvider)}.
+     */
+    public Read withClientProvider(String awsAccessKey, String awsSecretKey, Regions region) {
+      return withClientProvider(new BasicKinesisProvider(awsAccessKey, awsSecretKey, region));
+    }
+
+    /** Specifies to read at most a given number of records. */
+    public Read withMaxNumRecords(int maxNumRecords) {
+      checkArgument(
+          maxNumRecords > 0, "maxNumRecords must be positive, but was: %s", maxNumRecords);
+      return toBuilder().setMaxNumRecords(maxNumRecords).build();
+    }
+
+    /** Specifies to read at most a given number of records. */
+    public Read withMaxReadTime(Duration maxReadTime) {
+      checkNotNull(maxReadTime, "maxReadTime");
+      return toBuilder().setMaxReadTime(maxReadTime).build();
+    }
+
+    @Override
+    public PCollection<KinesisRecord> expand(PBegin input) {
+      org.apache.beam.sdk.io.Read.Unbounded<KinesisRecord> read =
+          org.apache.beam.sdk.io.Read.from(
+              new KinesisSource(getClientProvider(), getStreamName(), getInitialPosition()));
+      if (getMaxNumRecords() > 0) {
+        BoundedReadFromUnboundedSource<KinesisRecord> bounded =
+            read.withMaxNumRecords(getMaxNumRecords());
+        return getMaxReadTime() == null
+            ? input.apply(bounded)
+            : input.apply(bounded.withMaxReadTime(getMaxReadTime()));
+      } else {
+        return getMaxReadTime() == null
+            ? input.apply(read)
+            : input.apply(read.withMaxReadTime(getMaxReadTime()));
+      }
+    }
+
+    private static final class BasicKinesisProvider implements KinesisClientProvider {
+
+      private final String accessKey;
+      private final String secretKey;
+      private final Regions region;
+
+      private BasicKinesisProvider(String accessKey, String secretKey, Regions region) {
+        this.accessKey = checkNotNull(accessKey, "accessKey");
+        this.secretKey = checkNotNull(secretKey, "secretKey");
+        this.region = checkNotNull(region, "region");
+      }
+
+      private AWSCredentialsProvider getCredentialsProvider() {
+        return new StaticCredentialsProvider(new BasicAWSCredentials(
+            accessKey,
+            secretKey
+        ));
+
+      }
+
+      @Override
+      public AmazonKinesis get() {
+        AmazonKinesisClient client = new AmazonKinesisClient(getCredentialsProvider());
+        client.withRegion(region);
+        return client;
+      }
     }
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReader.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReader.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReader.java
index 2138094..e5c32d2 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReader.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReader.java
@@ -17,129 +17,129 @@
  */
 package org.apache.beam.sdk.io.kinesis;
 
-
 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.collect.Lists.newArrayList;
 
 import java.io.IOException;
 import java.util.List;
 import java.util.NoSuchElementException;
+
 import org.apache.beam.sdk.io.UnboundedSource;
 import org.joda.time.Instant;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 /**
  * Reads data from multiple kinesis shards in a single thread.
  * It uses simple round robin algorithm when fetching data from shards.
  */
 class KinesisReader extends UnboundedSource.UnboundedReader<KinesisRecord> {
-    private static final Logger LOG = LoggerFactory.getLogger(KinesisReader.class);
-
-    private final SimplifiedKinesisClient kinesis;
-    private final UnboundedSource<KinesisRecord, ?> source;
-    private final CheckpointGenerator initialCheckpointGenerator;
-    private RoundRobin<ShardRecordsIterator> shardIterators;
-    private CustomOptional<KinesisRecord> currentRecord = CustomOptional.absent();
-
-    public KinesisReader(SimplifiedKinesisClient kinesis,
-                         CheckpointGenerator initialCheckpointGenerator,
-                         UnboundedSource<KinesisRecord, ?> source) {
-        this.kinesis = checkNotNull(kinesis, "kinesis");
-        this.initialCheckpointGenerator =
-                checkNotNull(initialCheckpointGenerator, "initialCheckpointGenerator");
-        this.source = source;
-    }
-
-    /**
-     * Generates initial checkpoint and instantiates iterators for shards.
-     */
-    @Override
-    public boolean start() throws IOException {
-        LOG.info("Starting reader using {}", initialCheckpointGenerator);
-
-        try {
-            KinesisReaderCheckpoint initialCheckpoint =
-                    initialCheckpointGenerator.generate(kinesis);
-            List<ShardRecordsIterator> iterators = newArrayList();
-            for (ShardCheckpoint checkpoint : initialCheckpoint) {
-                iterators.add(checkpoint.getShardRecordsIterator(kinesis));
-            }
-            shardIterators = new RoundRobin<>(iterators);
-        } catch (TransientKinesisException e) {
-            throw new IOException(e);
-        }
 
-        return advance();
+  private static final Logger LOG = LoggerFactory.getLogger(KinesisReader.class);
+
+  private final SimplifiedKinesisClient kinesis;
+  private final UnboundedSource<KinesisRecord, ?> source;
+  private final CheckpointGenerator initialCheckpointGenerator;
+  private RoundRobin<ShardRecordsIterator> shardIterators;
+  private CustomOptional<KinesisRecord> currentRecord = CustomOptional.absent();
+
+  public KinesisReader(SimplifiedKinesisClient kinesis,
+      CheckpointGenerator initialCheckpointGenerator,
+      UnboundedSource<KinesisRecord, ?> source) {
+    this.kinesis = checkNotNull(kinesis, "kinesis");
+    this.initialCheckpointGenerator =
+        checkNotNull(initialCheckpointGenerator, "initialCheckpointGenerator");
+    this.source = source;
+  }
+
+  /**
+   * Generates initial checkpoint and instantiates iterators for shards.
+   */
+  @Override
+  public boolean start() throws IOException {
+    LOG.info("Starting reader using {}", initialCheckpointGenerator);
+
+    try {
+      KinesisReaderCheckpoint initialCheckpoint =
+          initialCheckpointGenerator.generate(kinesis);
+      List<ShardRecordsIterator> iterators = newArrayList();
+      for (ShardCheckpoint checkpoint : initialCheckpoint) {
+        iterators.add(checkpoint.getShardRecordsIterator(kinesis));
+      }
+      shardIterators = new RoundRobin<>(iterators);
+    } catch (TransientKinesisException e) {
+      throw new IOException(e);
     }
 
-    /**
-     * Moves to the next record in one of the shards.
-     * If current shard iterator can be move forward (i.e. there's a record present) then we do it.
-     * If not, we iterate over shards in a round-robin manner.
-     */
-    @Override
-    public boolean advance() throws IOException {
-        try {
-            for (int i = 0; i < shardIterators.size(); ++i) {
-                currentRecord = shardIterators.getCurrent().next();
-                if (currentRecord.isPresent()) {
-                    return true;
-                } else {
-                    shardIterators.moveForward();
-                }
-            }
-        } catch (TransientKinesisException e) {
-            LOG.warn("Transient exception occurred", e);
+    return advance();
+  }
+
+  /**
+   * Moves to the next record in one of the shards.
+   * If current shard iterator can be move forward (i.e. there's a record present) then we do it.
+   * If not, we iterate over shards in a round-robin manner.
+   */
+  @Override
+  public boolean advance() throws IOException {
+    try {
+      for (int i = 0; i < shardIterators.size(); ++i) {
+        currentRecord = shardIterators.getCurrent().next();
+        if (currentRecord.isPresent()) {
+          return true;
+        } else {
+          shardIterators.moveForward();
         }
-        return false;
-    }
-
-    @Override
-    public byte[] getCurrentRecordId() throws NoSuchElementException {
-        return currentRecord.get().getUniqueId();
-    }
-
-    @Override
-    public KinesisRecord getCurrent() throws NoSuchElementException {
-        return currentRecord.get();
-    }
-
-    /**
-     * When {@link KinesisReader} was advanced to the current record.
-     * We cannot use approximate arrival timestamp given for each record by Kinesis as it
-     * is not guaranteed to be accurate - this could lead to mark some records as "late"
-     * even if they were not.
-     */
-    @Override
-    public Instant getCurrentTimestamp() throws NoSuchElementException {
-        return currentRecord.get().getReadTime();
-    }
-
-    @Override
-    public void close() throws IOException {
-    }
-
-    /**
-     * Current time.
-     * We cannot give better approximation of the watermark with current semantics of
-     * {@link KinesisReader#getCurrentTimestamp()}, because we don't know when the next
-     * {@link KinesisReader#advance()} will be called.
-     */
-    @Override
-    public Instant getWatermark() {
-        return Instant.now();
-    }
-
-    @Override
-    public UnboundedSource.CheckpointMark getCheckpointMark() {
-        return KinesisReaderCheckpoint.asCurrentStateOf(shardIterators);
-    }
-
-    @Override
-    public UnboundedSource<KinesisRecord, ?> getCurrentSource() {
-        return source;
+      }
+    } catch (TransientKinesisException e) {
+      LOG.warn("Transient exception occurred", e);
     }
+    return false;
+  }
+
+  @Override
+  public byte[] getCurrentRecordId() throws NoSuchElementException {
+    return currentRecord.get().getUniqueId();
+  }
+
+  @Override
+  public KinesisRecord getCurrent() throws NoSuchElementException {
+    return currentRecord.get();
+  }
+
+  /**
+   * When {@link KinesisReader} was advanced to the current record.
+   * We cannot use approximate arrival timestamp given for each record by Kinesis as it
+   * is not guaranteed to be accurate - this could lead to mark some records as "late"
+   * even if they were not.
+   */
+  @Override
+  public Instant getCurrentTimestamp() throws NoSuchElementException {
+    return currentRecord.get().getReadTime();
+  }
+
+  @Override
+  public void close() throws IOException {
+  }
+
+  /**
+   * Current time.
+   * We cannot give better approximation of the watermark with current semantics of
+   * {@link KinesisReader#getCurrentTimestamp()}, because we don't know when the next
+   * {@link KinesisReader#advance()} will be called.
+   */
+  @Override
+  public Instant getWatermark() {
+    return Instant.now();
+  }
+
+  @Override
+  public UnboundedSource.CheckpointMark getCheckpointMark() {
+    return KinesisReaderCheckpoint.asCurrentStateOf(shardIterators);
+  }
+
+  @Override
+  public UnboundedSource<KinesisRecord, ?> getCurrentSource() {
+    return source;
+  }
 
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpoint.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpoint.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpoint.java
index f0fa45d..d995e75 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpoint.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisReaderCheckpoint.java
@@ -23,11 +23,13 @@ import static com.google.common.collect.Lists.partition;
 
 import com.google.common.base.Function;
 import com.google.common.collect.ImmutableList;
+
 import java.io.IOException;
 import java.io.Serializable;
 import java.util.Iterator;
 import java.util.List;
 import javax.annotation.Nullable;
+
 import org.apache.beam.sdk.io.UnboundedSource;
 
 /**
@@ -37,60 +39,61 @@ import org.apache.beam.sdk.io.UnboundedSource;
  * This class is immutable.
  */
 class KinesisReaderCheckpoint implements Iterable<ShardCheckpoint>, UnboundedSource
-        .CheckpointMark, Serializable {
-    private final List<ShardCheckpoint> shardCheckpoints;
+    .CheckpointMark, Serializable {
 
-    public KinesisReaderCheckpoint(Iterable<ShardCheckpoint> shardCheckpoints) {
-        this.shardCheckpoints = ImmutableList.copyOf(shardCheckpoints);
-    }
+  private final List<ShardCheckpoint> shardCheckpoints;
 
-    public static KinesisReaderCheckpoint asCurrentStateOf(Iterable<ShardRecordsIterator>
-                                                                   iterators) {
-        return new KinesisReaderCheckpoint(transform(iterators,
-                new Function<ShardRecordsIterator, ShardCheckpoint>() {
-
-                    @Nullable
-                    @Override
-                    public ShardCheckpoint apply(@Nullable
-                                                 ShardRecordsIterator shardRecordsIterator) {
-                        assert shardRecordsIterator != null;
-                        return shardRecordsIterator.getCheckpoint();
-                    }
-                }));
-    }
+  public KinesisReaderCheckpoint(Iterable<ShardCheckpoint> shardCheckpoints) {
+    this.shardCheckpoints = ImmutableList.copyOf(shardCheckpoints);
+  }
 
-    /**
-     * Splits given multi-shard checkpoint into partitions of approximately equal size.
-     *
-     * @param desiredNumSplits - upper limit for number of partitions to generate.
-     * @return list of checkpoints covering consecutive partitions of current checkpoint.
-     */
-    public List<KinesisReaderCheckpoint> splitInto(int desiredNumSplits) {
-        int partitionSize = divideAndRoundUp(shardCheckpoints.size(), desiredNumSplits);
-
-        List<KinesisReaderCheckpoint> checkpoints = newArrayList();
-        for (List<ShardCheckpoint> shardPartition : partition(shardCheckpoints, partitionSize)) {
-            checkpoints.add(new KinesisReaderCheckpoint(shardPartition));
-        }
-        return checkpoints;
-    }
+  public static KinesisReaderCheckpoint asCurrentStateOf(Iterable<ShardRecordsIterator>
+      iterators) {
+    return new KinesisReaderCheckpoint(transform(iterators,
+        new Function<ShardRecordsIterator, ShardCheckpoint>() {
 
-    private int divideAndRoundUp(int nominator, int denominator) {
-        return (nominator + denominator - 1) / denominator;
-    }
+          @Nullable
+          @Override
+          public ShardCheckpoint apply(@Nullable
+              ShardRecordsIterator shardRecordsIterator) {
+            assert shardRecordsIterator != null;
+            return shardRecordsIterator.getCheckpoint();
+          }
+        }));
+  }
 
-    @Override
-    public void finalizeCheckpoint() throws IOException {
+  /**
+   * Splits given multi-shard checkpoint into partitions of approximately equal size.
+   *
+   * @param desiredNumSplits - upper limit for number of partitions to generate.
+   * @return list of checkpoints covering consecutive partitions of current checkpoint.
+   */
+  public List<KinesisReaderCheckpoint> splitInto(int desiredNumSplits) {
+    int partitionSize = divideAndRoundUp(shardCheckpoints.size(), desiredNumSplits);
 
+    List<KinesisReaderCheckpoint> checkpoints = newArrayList();
+    for (List<ShardCheckpoint> shardPartition : partition(shardCheckpoints, partitionSize)) {
+      checkpoints.add(new KinesisReaderCheckpoint(shardPartition));
     }
+    return checkpoints;
+  }
 
-    @Override
-    public String toString() {
-        return shardCheckpoints.toString();
-    }
+  private int divideAndRoundUp(int nominator, int denominator) {
+    return (nominator + denominator - 1) / denominator;
+  }
 
-    @Override
-    public Iterator<ShardCheckpoint> iterator() {
-        return shardCheckpoints.iterator();
-    }
+  @Override
+  public void finalizeCheckpoint() throws IOException {
+
+  }
+
+  @Override
+  public String toString() {
+    return shardCheckpoints.toString();
+  }
+
+  @Override
+  public Iterator<ShardCheckpoint> iterator() {
+    return shardCheckpoints.iterator();
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java
index 02b5370..057b7bb 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecord.java
@@ -22,7 +22,9 @@ import static org.apache.commons.lang.builder.HashCodeBuilder.reflectionHashCode
 import com.amazonaws.services.kinesis.clientlibrary.types.ExtendedSequenceNumber;
 import com.amazonaws.services.kinesis.clientlibrary.types.UserRecord;
 import com.google.common.base.Charsets;
+
 import java.nio.ByteBuffer;
+
 import org.apache.commons.lang.builder.EqualsBuilder;
 import org.joda.time.Instant;
 
@@ -30,91 +32,92 @@ import org.joda.time.Instant;
  * {@link UserRecord} enhanced with utility methods.
  */
 public class KinesisRecord {
-    private Instant readTime;
-    private String streamName;
-    private String shardId;
-    private long subSequenceNumber;
-    private String sequenceNumber;
-    private Instant approximateArrivalTimestamp;
-    private ByteBuffer data;
-    private String partitionKey;
-
-    public KinesisRecord(UserRecord record, String streamName, String shardId) {
-        this(record.getData(), record.getSequenceNumber(), record.getSubSequenceNumber(),
-                record.getPartitionKey(),
-                new Instant(record.getApproximateArrivalTimestamp()),
-                Instant.now(),
-                streamName, shardId);
-    }
-
-    public KinesisRecord(ByteBuffer data, String sequenceNumber, long subSequenceNumber,
-                         String partitionKey, Instant approximateArrivalTimestamp,
-                         Instant readTime,
-                         String streamName, String shardId) {
-        this.data = data;
-        this.sequenceNumber = sequenceNumber;
-        this.subSequenceNumber = subSequenceNumber;
-        this.partitionKey = partitionKey;
-        this.approximateArrivalTimestamp = approximateArrivalTimestamp;
-        this.readTime = readTime;
-        this.streamName = streamName;
-        this.shardId = shardId;
-    }
-
-    public ExtendedSequenceNumber getExtendedSequenceNumber() {
-        return new ExtendedSequenceNumber(getSequenceNumber(), getSubSequenceNumber());
-    }
-
-    /***
-     * @return unique id of the record based on its position in the stream
-     */
-    public byte[] getUniqueId() {
-        return getExtendedSequenceNumber().toString().getBytes(Charsets.UTF_8);
-    }
-
-    public Instant getReadTime() {
-        return readTime;
-    }
-
-    public String getStreamName() {
-        return streamName;
-    }
-
-    public String getShardId() {
-        return shardId;
-    }
-
-    public byte[] getDataAsBytes() {
-        return getData().array();
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-        return EqualsBuilder.reflectionEquals(this, obj);
-    }
-
-    @Override
-    public int hashCode() {
-        return reflectionHashCode(this);
-    }
-
-    public long getSubSequenceNumber() {
-        return subSequenceNumber;
-    }
-
-    public String getSequenceNumber() {
-        return sequenceNumber;
-    }
-
-    public Instant getApproximateArrivalTimestamp() {
-        return approximateArrivalTimestamp;
-    }
-
-    public ByteBuffer getData() {
-        return data;
-    }
-
-    public String getPartitionKey() {
-        return partitionKey;
-    }
+
+  private Instant readTime;
+  private String streamName;
+  private String shardId;
+  private long subSequenceNumber;
+  private String sequenceNumber;
+  private Instant approximateArrivalTimestamp;
+  private ByteBuffer data;
+  private String partitionKey;
+
+  public KinesisRecord(UserRecord record, String streamName, String shardId) {
+    this(record.getData(), record.getSequenceNumber(), record.getSubSequenceNumber(),
+        record.getPartitionKey(),
+        new Instant(record.getApproximateArrivalTimestamp()),
+        Instant.now(),
+        streamName, shardId);
+  }
+
+  public KinesisRecord(ByteBuffer data, String sequenceNumber, long subSequenceNumber,
+      String partitionKey, Instant approximateArrivalTimestamp,
+      Instant readTime,
+      String streamName, String shardId) {
+    this.data = data;
+    this.sequenceNumber = sequenceNumber;
+    this.subSequenceNumber = subSequenceNumber;
+    this.partitionKey = partitionKey;
+    this.approximateArrivalTimestamp = approximateArrivalTimestamp;
+    this.readTime = readTime;
+    this.streamName = streamName;
+    this.shardId = shardId;
+  }
+
+  public ExtendedSequenceNumber getExtendedSequenceNumber() {
+    return new ExtendedSequenceNumber(getSequenceNumber(), getSubSequenceNumber());
+  }
+
+  /***
+   * @return unique id of the record based on its position in the stream
+   */
+  public byte[] getUniqueId() {
+    return getExtendedSequenceNumber().toString().getBytes(Charsets.UTF_8);
+  }
+
+  public Instant getReadTime() {
+    return readTime;
+  }
+
+  public String getStreamName() {
+    return streamName;
+  }
+
+  public String getShardId() {
+    return shardId;
+  }
+
+  public byte[] getDataAsBytes() {
+    return getData().array();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    return EqualsBuilder.reflectionEquals(this, obj);
+  }
+
+  @Override
+  public int hashCode() {
+    return reflectionHashCode(this);
+  }
+
+  public long getSubSequenceNumber() {
+    return subSequenceNumber;
+  }
+
+  public String getSequenceNumber() {
+    return sequenceNumber;
+  }
+
+  public Instant getApproximateArrivalTimestamp() {
+    return approximateArrivalTimestamp;
+  }
+
+  public ByteBuffer getData() {
+    return data;
+  }
+
+  public String getPartitionKey() {
+    return partitionKey;
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoder.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoder.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoder.java
index f233e27..dcf564d 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoder.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisRecordCoder.java
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
+
 import org.apache.beam.sdk.coders.AtomicCoder;
 import org.apache.beam.sdk.coders.ByteArrayCoder;
 import org.apache.beam.sdk.coders.Coder;
@@ -33,40 +34,41 @@ import org.joda.time.Instant;
  * A {@link Coder} for {@link KinesisRecord}.
  */
 class KinesisRecordCoder extends AtomicCoder<KinesisRecord> {
-    private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of();
-    private static final ByteArrayCoder BYTE_ARRAY_CODER = ByteArrayCoder.of();
-    private static final InstantCoder INSTANT_CODER = InstantCoder.of();
-    private static final VarLongCoder VAR_LONG_CODER = VarLongCoder.of();
 
-    public static KinesisRecordCoder of() {
-        return new KinesisRecordCoder();
-    }
+  private static final StringUtf8Coder STRING_CODER = StringUtf8Coder.of();
+  private static final ByteArrayCoder BYTE_ARRAY_CODER = ByteArrayCoder.of();
+  private static final InstantCoder INSTANT_CODER = InstantCoder.of();
+  private static final VarLongCoder VAR_LONG_CODER = VarLongCoder.of();
+
+  public static KinesisRecordCoder of() {
+    return new KinesisRecordCoder();
+  }
 
-    @Override
-    public void encode(KinesisRecord value, OutputStream outStream) throws
-            IOException {
-        BYTE_ARRAY_CODER.encode(value.getData().array(), outStream);
-        STRING_CODER.encode(value.getSequenceNumber(), outStream);
-        STRING_CODER.encode(value.getPartitionKey(), outStream);
-        INSTANT_CODER.encode(value.getApproximateArrivalTimestamp(), outStream);
-        VAR_LONG_CODER.encode(value.getSubSequenceNumber(), outStream);
-        INSTANT_CODER.encode(value.getReadTime(), outStream);
-        STRING_CODER.encode(value.getStreamName(), outStream);
-        STRING_CODER.encode(value.getShardId(), outStream);
-    }
+  @Override
+  public void encode(KinesisRecord value, OutputStream outStream) throws
+      IOException {
+    BYTE_ARRAY_CODER.encode(value.getData().array(), outStream);
+    STRING_CODER.encode(value.getSequenceNumber(), outStream);
+    STRING_CODER.encode(value.getPartitionKey(), outStream);
+    INSTANT_CODER.encode(value.getApproximateArrivalTimestamp(), outStream);
+    VAR_LONG_CODER.encode(value.getSubSequenceNumber(), outStream);
+    INSTANT_CODER.encode(value.getReadTime(), outStream);
+    STRING_CODER.encode(value.getStreamName(), outStream);
+    STRING_CODER.encode(value.getShardId(), outStream);
+  }
 
-    @Override
-    public KinesisRecord decode(InputStream inStream) throws IOException {
-        ByteBuffer data = ByteBuffer.wrap(BYTE_ARRAY_CODER.decode(inStream));
-        String sequenceNumber = STRING_CODER.decode(inStream);
-        String partitionKey = STRING_CODER.decode(inStream);
-        Instant approximateArrivalTimestamp = INSTANT_CODER.decode(inStream);
-        long subSequenceNumber = VAR_LONG_CODER.decode(inStream);
-        Instant readTimestamp = INSTANT_CODER.decode(inStream);
-        String streamName = STRING_CODER.decode(inStream);
-        String shardId = STRING_CODER.decode(inStream);
-        return new KinesisRecord(data, sequenceNumber, subSequenceNumber, partitionKey,
-                approximateArrivalTimestamp, readTimestamp, streamName, shardId
-        );
-    }
+  @Override
+  public KinesisRecord decode(InputStream inStream) throws IOException {
+    ByteBuffer data = ByteBuffer.wrap(BYTE_ARRAY_CODER.decode(inStream));
+    String sequenceNumber = STRING_CODER.decode(inStream);
+    String partitionKey = STRING_CODER.decode(inStream);
+    Instant approximateArrivalTimestamp = INSTANT_CODER.decode(inStream);
+    long subSequenceNumber = VAR_LONG_CODER.decode(inStream);
+    Instant readTimestamp = INSTANT_CODER.decode(inStream);
+    String streamName = STRING_CODER.decode(inStream);
+    String shardId = STRING_CODER.decode(inStream);
+    return new KinesisRecord(data, sequenceNumber, subSequenceNumber, partitionKey,
+        approximateArrivalTimestamp, readTimestamp, streamName, shardId
+    );
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisSource.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisSource.java
index 7e67d07..362792b 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisSource.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/KinesisSource.java
@@ -21,6 +21,7 @@ import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.collect.Lists.newArrayList;
 
 import java.util.List;
+
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.SerializableCoder;
 import org.apache.beam.sdk.io.UnboundedSource;
@@ -28,85 +29,85 @@ import org.apache.beam.sdk.options.PipelineOptions;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 /**
  * Represents source for single stream in Kinesis.
  */
 class KinesisSource extends UnboundedSource<KinesisRecord, KinesisReaderCheckpoint> {
-    private static final Logger LOG = LoggerFactory.getLogger(KinesisSource.class);
-
-    private final KinesisClientProvider kinesis;
-    private CheckpointGenerator initialCheckpointGenerator;
 
-    public KinesisSource(KinesisClientProvider kinesis, String streamName,
-                         StartingPoint startingPoint) {
-        this(kinesis, new DynamicCheckpointGenerator(streamName, startingPoint));
+  private static final Logger LOG = LoggerFactory.getLogger(KinesisSource.class);
+
+  private final KinesisClientProvider kinesis;
+  private CheckpointGenerator initialCheckpointGenerator;
+
+  public KinesisSource(KinesisClientProvider kinesis, String streamName,
+      StartingPoint startingPoint) {
+    this(kinesis, new DynamicCheckpointGenerator(streamName, startingPoint));
+  }
+
+  private KinesisSource(KinesisClientProvider kinesisClientProvider,
+      CheckpointGenerator initialCheckpoint) {
+    this.kinesis = kinesisClientProvider;
+    this.initialCheckpointGenerator = initialCheckpoint;
+    validate();
+  }
+
+  /**
+   * Generate splits for reading from the stream.
+   * Basically, it'll try to evenly split set of shards in the stream into
+   * {@code desiredNumSplits} partitions. Each partition is then a split.
+   */
+  @Override
+  public List<KinesisSource> split(int desiredNumSplits,
+      PipelineOptions options) throws Exception {
+    KinesisReaderCheckpoint checkpoint =
+        initialCheckpointGenerator.generate(SimplifiedKinesisClient.from(kinesis));
+
+    List<KinesisSource> sources = newArrayList();
+
+    for (KinesisReaderCheckpoint partition : checkpoint.splitInto(desiredNumSplits)) {
+      sources.add(new KinesisSource(
+          kinesis,
+          new StaticCheckpointGenerator(partition)));
     }
-
-    private KinesisSource(KinesisClientProvider kinesisClientProvider,
-                          CheckpointGenerator initialCheckpoint) {
-        this.kinesis = kinesisClientProvider;
-        this.initialCheckpointGenerator = initialCheckpoint;
-        validate();
+    return sources;
+  }
+
+  /**
+   * Creates reader based on given {@link KinesisReaderCheckpoint}.
+   * If {@link KinesisReaderCheckpoint} is not given, then we use
+   * {@code initialCheckpointGenerator} to generate new checkpoint.
+   */
+  @Override
+  public UnboundedReader<KinesisRecord> createReader(PipelineOptions options,
+      KinesisReaderCheckpoint checkpointMark) {
+
+    CheckpointGenerator checkpointGenerator = initialCheckpointGenerator;
+
+    if (checkpointMark != null) {
+      checkpointGenerator = new StaticCheckpointGenerator(checkpointMark);
     }
 
-    /**
-     * Generate splits for reading from the stream.
-     * Basically, it'll try to evenly split set of shards in the stream into
-     * {@code desiredNumSplits} partitions. Each partition is then a split.
-     */
-    @Override
-    public List<KinesisSource> split(int desiredNumSplits,
-                                                     PipelineOptions options) throws Exception {
-        KinesisReaderCheckpoint checkpoint =
-                initialCheckpointGenerator.generate(SimplifiedKinesisClient.from(kinesis));
-
-        List<KinesisSource> sources = newArrayList();
-
-        for (KinesisReaderCheckpoint partition : checkpoint.splitInto(desiredNumSplits)) {
-            sources.add(new KinesisSource(
-                    kinesis,
-                    new StaticCheckpointGenerator(partition)));
-        }
-        return sources;
-    }
-
-    /**
-     * Creates reader based on given {@link KinesisReaderCheckpoint}.
-     * If {@link KinesisReaderCheckpoint} is not given, then we use
-     * {@code initialCheckpointGenerator} to generate new checkpoint.
-     */
-    @Override
-    public UnboundedReader<KinesisRecord> createReader(PipelineOptions options,
-                                                KinesisReaderCheckpoint checkpointMark) {
-
-        CheckpointGenerator checkpointGenerator = initialCheckpointGenerator;
-
-        if (checkpointMark != null) {
-            checkpointGenerator = new StaticCheckpointGenerator(checkpointMark);
-        }
-
-        LOG.info("Creating new reader using {}", checkpointGenerator);
-
-        return new KinesisReader(
-                SimplifiedKinesisClient.from(kinesis),
-                checkpointGenerator,
-                this);
-    }
-
-    @Override
-    public Coder<KinesisReaderCheckpoint> getCheckpointMarkCoder() {
-        return SerializableCoder.of(KinesisReaderCheckpoint.class);
-    }
-
-    @Override
-    public void validate() {
-        checkNotNull(kinesis);
-        checkNotNull(initialCheckpointGenerator);
-    }
-
-    @Override
-    public Coder<KinesisRecord> getDefaultOutputCoder() {
-        return KinesisRecordCoder.of();
-    }
+    LOG.info("Creating new reader using {}", checkpointGenerator);
+
+    return new KinesisReader(
+        SimplifiedKinesisClient.from(kinesis),
+        checkpointGenerator,
+        this);
+  }
+
+  @Override
+  public Coder<KinesisReaderCheckpoint> getCheckpointMarkCoder() {
+    return SerializableCoder.of(KinesisReaderCheckpoint.class);
+  }
+
+  @Override
+  public void validate() {
+    checkNotNull(kinesis);
+    checkNotNull(initialCheckpointGenerator);
+  }
+
+  @Override
+  public Coder<KinesisRecord> getDefaultOutputCoder() {
+    return KinesisRecordCoder.of();
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RecordFilter.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RecordFilter.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RecordFilter.java
index 40e65fc..eca725c 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RecordFilter.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RecordFilter.java
@@ -21,7 +21,6 @@ import static com.google.common.collect.Lists.newArrayList;
 
 import java.util.List;
 
-
 /**
  * Filters out records, which were already processed and checkpointed.
  *
@@ -29,13 +28,14 @@ import java.util.List;
  * accuracy, not with "subSequenceNumber" accuracy.
  */
 class RecordFilter {
-    public List<KinesisRecord> apply(List<KinesisRecord> records, ShardCheckpoint checkpoint) {
-        List<KinesisRecord> filteredRecords = newArrayList();
-        for (KinesisRecord record : records) {
-            if (checkpoint.isBeforeOrAt(record)) {
-                filteredRecords.add(record);
-            }
-        }
-        return filteredRecords;
+
+  public List<KinesisRecord> apply(List<KinesisRecord> records, ShardCheckpoint checkpoint) {
+    List<KinesisRecord> filteredRecords = newArrayList();
+    for (KinesisRecord record : records) {
+      if (checkpoint.isBeforeOrAt(record)) {
+        filteredRecords.add(record);
+      }
     }
+    return filteredRecords;
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RoundRobin.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RoundRobin.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RoundRobin.java
index e4ff541..806d982 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RoundRobin.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/RoundRobin.java
@@ -27,27 +27,28 @@ import java.util.Iterator;
  * Very simple implementation of round robin algorithm.
  */
 class RoundRobin<T> implements Iterable<T> {
-    private final Deque<T> deque;
 
-    public RoundRobin(Iterable<T> collection) {
-        this.deque = newArrayDeque(collection);
-        checkArgument(!deque.isEmpty(), "Tried to initialize RoundRobin with empty collection");
-    }
+  private final Deque<T> deque;
 
-    public T getCurrent() {
-        return deque.getFirst();
-    }
+  public RoundRobin(Iterable<T> collection) {
+    this.deque = newArrayDeque(collection);
+    checkArgument(!deque.isEmpty(), "Tried to initialize RoundRobin with empty collection");
+  }
 
-    public void moveForward() {
-        deque.addLast(deque.removeFirst());
-    }
+  public T getCurrent() {
+    return deque.getFirst();
+  }
 
-    public int size() {
-        return deque.size();
-    }
+  public void moveForward() {
+    deque.addLast(deque.removeFirst());
+  }
 
-    @Override
-    public Iterator<T> iterator() {
-        return deque.iterator();
-    }
+  public int size() {
+    return deque.size();
+  }
+
+  @Override
+  public Iterator<T> iterator() {
+    return deque.iterator();
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardCheckpoint.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardCheckpoint.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardCheckpoint.java
index 6aa3504..95f97b8 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardCheckpoint.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardCheckpoint.java
@@ -17,7 +17,6 @@
  */
 package org.apache.beam.sdk.io.kinesis;
 
-
 import static com.amazonaws.services.kinesis.model.ShardIteratorType.AFTER_SEQUENCE_NUMBER;
 import static com.amazonaws.services.kinesis.model.ShardIteratorType.AT_SEQUENCE_NUMBER;
 import static com.amazonaws.services.kinesis.model.ShardIteratorType.AT_TIMESTAMP;
@@ -27,9 +26,10 @@ import static com.google.common.base.Preconditions.checkNotNull;
 import com.amazonaws.services.kinesis.clientlibrary.types.ExtendedSequenceNumber;
 import com.amazonaws.services.kinesis.model.Record;
 import com.amazonaws.services.kinesis.model.ShardIteratorType;
+
 import java.io.Serializable;
-import org.joda.time.Instant;
 
+import org.joda.time.Instant;
 
 /**
  * Checkpoint mark for single shard in the stream.
@@ -45,131 +45,132 @@ import org.joda.time.Instant;
  * This class is immutable.
  */
 class ShardCheckpoint implements Serializable {
-    private final String streamName;
-    private final String shardId;
-    private final String sequenceNumber;
-    private final ShardIteratorType shardIteratorType;
-    private final Long subSequenceNumber;
-    private final Instant timestamp;
-
-    public ShardCheckpoint(String streamName, String shardId, StartingPoint
-            startingPoint) {
-        this(streamName, shardId,
-                ShardIteratorType.fromValue(startingPoint.getPositionName()),
-                startingPoint.getTimestamp());
-    }
-
-    public ShardCheckpoint(String streamName, String shardId, ShardIteratorType
-            shardIteratorType, Instant timestamp) {
-        this(streamName, shardId, shardIteratorType, null, null, timestamp);
-    }
-
-    public ShardCheckpoint(String streamName, String shardId, ShardIteratorType
-            shardIteratorType, String sequenceNumber, Long subSequenceNumber) {
-        this(streamName, shardId, shardIteratorType, sequenceNumber, subSequenceNumber, null);
-    }
-
-    private ShardCheckpoint(String streamName, String shardId, ShardIteratorType shardIteratorType,
-                            String sequenceNumber, Long subSequenceNumber, Instant timestamp) {
-        this.shardIteratorType = checkNotNull(shardIteratorType, "shardIteratorType");
-        this.streamName = checkNotNull(streamName, "streamName");
-        this.shardId = checkNotNull(shardId, "shardId");
-        if (shardIteratorType == AT_SEQUENCE_NUMBER || shardIteratorType == AFTER_SEQUENCE_NUMBER) {
-            checkNotNull(sequenceNumber,
-                    "You must provide sequence number for AT_SEQUENCE_NUMBER"
-                            + " or AFTER_SEQUENCE_NUMBER");
-        } else {
-            checkArgument(sequenceNumber == null,
-                    "Sequence number must be null for LATEST, TRIM_HORIZON or AT_TIMESTAMP");
-        }
-        if (shardIteratorType == AT_TIMESTAMP) {
-            checkNotNull(timestamp,
-                    "You must provide timestamp for AT_SEQUENCE_NUMBER"
-                            + " or AFTER_SEQUENCE_NUMBER");
-        } else {
-            checkArgument(timestamp == null,
-                    "Timestamp must be null for an iterator type other than AT_TIMESTAMP");
-        }
-
-        this.subSequenceNumber = subSequenceNumber;
-        this.sequenceNumber = sequenceNumber;
-        this.timestamp = timestamp;
-    }
-
-    /**
-     * Used to compare {@link ShardCheckpoint} object to {@link KinesisRecord}. Depending
-     * on the the underlying shardIteratorType, it will either compare the timestamp or the
-     * {@link ExtendedSequenceNumber}.
-     *
-     * @param other
-     * @return if current checkpoint mark points before or at given {@link ExtendedSequenceNumber}
-     */
-    public boolean isBeforeOrAt(KinesisRecord other) {
-        if (shardIteratorType == AT_TIMESTAMP) {
-            return timestamp.compareTo(other.getApproximateArrivalTimestamp()) <= 0;
-        }
-        int result = extendedSequenceNumber().compareTo(other.getExtendedSequenceNumber());
-        if (result == 0) {
-            return shardIteratorType == AT_SEQUENCE_NUMBER;
-        }
-        return result < 0;
-    }
-
-    private ExtendedSequenceNumber extendedSequenceNumber() {
-        String fullSequenceNumber = sequenceNumber;
-        if (fullSequenceNumber == null) {
-            fullSequenceNumber = shardIteratorType.toString();
-        }
-        return new ExtendedSequenceNumber(fullSequenceNumber, subSequenceNumber);
-    }
 
-    @Override
-    public String toString() {
-        return String.format("Checkpoint %s for stream %s, shard %s: %s", shardIteratorType,
-                streamName, shardId,
-                sequenceNumber);
+  private final String streamName;
+  private final String shardId;
+  private final String sequenceNumber;
+  private final ShardIteratorType shardIteratorType;
+  private final Long subSequenceNumber;
+  private final Instant timestamp;
+
+  public ShardCheckpoint(String streamName, String shardId, StartingPoint
+      startingPoint) {
+    this(streamName, shardId,
+        ShardIteratorType.fromValue(startingPoint.getPositionName()),
+        startingPoint.getTimestamp());
+  }
+
+  public ShardCheckpoint(String streamName, String shardId, ShardIteratorType
+      shardIteratorType, Instant timestamp) {
+    this(streamName, shardId, shardIteratorType, null, null, timestamp);
+  }
+
+  public ShardCheckpoint(String streamName, String shardId, ShardIteratorType
+      shardIteratorType, String sequenceNumber, Long subSequenceNumber) {
+    this(streamName, shardId, shardIteratorType, sequenceNumber, subSequenceNumber, null);
+  }
+
+  private ShardCheckpoint(String streamName, String shardId, ShardIteratorType shardIteratorType,
+      String sequenceNumber, Long subSequenceNumber, Instant timestamp) {
+    this.shardIteratorType = checkNotNull(shardIteratorType, "shardIteratorType");
+    this.streamName = checkNotNull(streamName, "streamName");
+    this.shardId = checkNotNull(shardId, "shardId");
+    if (shardIteratorType == AT_SEQUENCE_NUMBER || shardIteratorType == AFTER_SEQUENCE_NUMBER) {
+      checkNotNull(sequenceNumber,
+          "You must provide sequence number for AT_SEQUENCE_NUMBER"
+              + " or AFTER_SEQUENCE_NUMBER");
+    } else {
+      checkArgument(sequenceNumber == null,
+          "Sequence number must be null for LATEST, TRIM_HORIZON or AT_TIMESTAMP");
     }
-
-    public ShardRecordsIterator getShardRecordsIterator(SimplifiedKinesisClient kinesis)
-            throws TransientKinesisException {
-        return new ShardRecordsIterator(this, kinesis);
+    if (shardIteratorType == AT_TIMESTAMP) {
+      checkNotNull(timestamp,
+          "You must provide timestamp for AT_SEQUENCE_NUMBER"
+              + " or AFTER_SEQUENCE_NUMBER");
+    } else {
+      checkArgument(timestamp == null,
+          "Timestamp must be null for an iterator type other than AT_TIMESTAMP");
     }
 
-    public String getShardIterator(SimplifiedKinesisClient kinesisClient)
-            throws TransientKinesisException {
-        if (checkpointIsInTheMiddleOfAUserRecord()) {
-            return kinesisClient.getShardIterator(streamName,
-                    shardId, AT_SEQUENCE_NUMBER,
-                    sequenceNumber, null);
-        }
-        return kinesisClient.getShardIterator(streamName,
-                shardId, shardIteratorType,
-                sequenceNumber, timestamp);
+    this.subSequenceNumber = subSequenceNumber;
+    this.sequenceNumber = sequenceNumber;
+    this.timestamp = timestamp;
+  }
+
+  /**
+   * Used to compare {@link ShardCheckpoint} object to {@link KinesisRecord}. Depending
+   * on the the underlying shardIteratorType, it will either compare the timestamp or the
+   * {@link ExtendedSequenceNumber}.
+   *
+   * @param other
+   * @return if current checkpoint mark points before or at given {@link ExtendedSequenceNumber}
+   */
+  public boolean isBeforeOrAt(KinesisRecord other) {
+    if (shardIteratorType == AT_TIMESTAMP) {
+      return timestamp.compareTo(other.getApproximateArrivalTimestamp()) <= 0;
     }
-
-    private boolean checkpointIsInTheMiddleOfAUserRecord() {
-        return shardIteratorType == AFTER_SEQUENCE_NUMBER && subSequenceNumber != null;
+    int result = extendedSequenceNumber().compareTo(other.getExtendedSequenceNumber());
+    if (result == 0) {
+      return shardIteratorType == AT_SEQUENCE_NUMBER;
     }
+    return result < 0;
+  }
 
-    /**
-     * Used to advance checkpoint mark to position after given {@link Record}.
-     *
-     * @param record
-     * @return new checkpoint object pointing directly after given {@link Record}
-     */
-    public ShardCheckpoint moveAfter(KinesisRecord record) {
-        return new ShardCheckpoint(
-                streamName, shardId,
-                AFTER_SEQUENCE_NUMBER,
-                record.getSequenceNumber(),
-                record.getSubSequenceNumber());
+  private ExtendedSequenceNumber extendedSequenceNumber() {
+    String fullSequenceNumber = sequenceNumber;
+    if (fullSequenceNumber == null) {
+      fullSequenceNumber = shardIteratorType.toString();
     }
-
-    public String getStreamName() {
-        return streamName;
-    }
-
-    public String getShardId() {
-        return shardId;
+    return new ExtendedSequenceNumber(fullSequenceNumber, subSequenceNumber);
+  }
+
+  @Override
+  public String toString() {
+    return String.format("Checkpoint %s for stream %s, shard %s: %s", shardIteratorType,
+        streamName, shardId,
+        sequenceNumber);
+  }
+
+  public ShardRecordsIterator getShardRecordsIterator(SimplifiedKinesisClient kinesis)
+      throws TransientKinesisException {
+    return new ShardRecordsIterator(this, kinesis);
+  }
+
+  public String getShardIterator(SimplifiedKinesisClient kinesisClient)
+      throws TransientKinesisException {
+    if (checkpointIsInTheMiddleOfAUserRecord()) {
+      return kinesisClient.getShardIterator(streamName,
+          shardId, AT_SEQUENCE_NUMBER,
+          sequenceNumber, null);
     }
+    return kinesisClient.getShardIterator(streamName,
+        shardId, shardIteratorType,
+        sequenceNumber, timestamp);
+  }
+
+  private boolean checkpointIsInTheMiddleOfAUserRecord() {
+    return shardIteratorType == AFTER_SEQUENCE_NUMBER && subSequenceNumber != null;
+  }
+
+  /**
+   * Used to advance checkpoint mark to position after given {@link Record}.
+   *
+   * @param record
+   * @return new checkpoint object pointing directly after given {@link Record}
+   */
+  public ShardCheckpoint moveAfter(KinesisRecord record) {
+    return new ShardCheckpoint(
+        streamName, shardId,
+        AFTER_SEQUENCE_NUMBER,
+        record.getSequenceNumber(),
+        record.getSubSequenceNumber());
+  }
+
+  public String getStreamName() {
+    return streamName;
+  }
+
+  public String getShardId() {
+    return shardId;
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7925a668/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIterator.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIterator.java b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIterator.java
index 872f604..a69c6c1 100644
--- a/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIterator.java
+++ b/sdks/java/io/kinesis/src/main/java/org/apache/beam/sdk/io/kinesis/ShardRecordsIterator.java
@@ -21,7 +21,9 @@ import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.collect.Queues.newArrayDeque;
 
 import com.amazonaws.services.kinesis.model.ExpiredIteratorException;
+
 import java.util.Deque;
+
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -31,68 +33,68 @@ import org.slf4j.LoggerFactory;
  * Then the caller of {@link ShardRecordsIterator#next()} can read from queue one by one.
  */
 class ShardRecordsIterator {
-    private static final Logger LOG = LoggerFactory.getLogger(ShardRecordsIterator.class);
 
-    private final SimplifiedKinesisClient kinesis;
-    private final RecordFilter filter;
-    private ShardCheckpoint checkpoint;
-    private String shardIterator;
-    private Deque<KinesisRecord> data = newArrayDeque();
+  private static final Logger LOG = LoggerFactory.getLogger(ShardRecordsIterator.class);
 
-    public ShardRecordsIterator(final ShardCheckpoint initialCheckpoint,
-                                SimplifiedKinesisClient simplifiedKinesisClient) throws
-            TransientKinesisException {
-        this(initialCheckpoint, simplifiedKinesisClient, new RecordFilter());
-    }
+  private final SimplifiedKinesisClient kinesis;
+  private final RecordFilter filter;
+  private ShardCheckpoint checkpoint;
+  private String shardIterator;
+  private Deque<KinesisRecord> data = newArrayDeque();
 
-    public ShardRecordsIterator(final ShardCheckpoint initialCheckpoint,
-                                SimplifiedKinesisClient simplifiedKinesisClient,
-                                RecordFilter filter) throws
-            TransientKinesisException {
+  public ShardRecordsIterator(final ShardCheckpoint initialCheckpoint,
+      SimplifiedKinesisClient simplifiedKinesisClient) throws
+      TransientKinesisException {
+    this(initialCheckpoint, simplifiedKinesisClient, new RecordFilter());
+  }
 
-        this.checkpoint = checkNotNull(initialCheckpoint, "initialCheckpoint");
-        this.filter = checkNotNull(filter, "filter");
-        this.kinesis = checkNotNull(simplifiedKinesisClient, "simplifiedKinesisClient");
-        shardIterator = checkpoint.getShardIterator(kinesis);
-    }
+  public ShardRecordsIterator(final ShardCheckpoint initialCheckpoint,
+      SimplifiedKinesisClient simplifiedKinesisClient,
+      RecordFilter filter) throws
+      TransientKinesisException {
 
-    /**
-     * Returns record if there's any present.
-     * Returns absent() if there are no new records at this time in the shard.
-     */
-    public CustomOptional<KinesisRecord> next() throws TransientKinesisException {
-        readMoreIfNecessary();
+    this.checkpoint = checkNotNull(initialCheckpoint, "initialCheckpoint");
+    this.filter = checkNotNull(filter, "filter");
+    this.kinesis = checkNotNull(simplifiedKinesisClient, "simplifiedKinesisClient");
+    shardIterator = checkpoint.getShardIterator(kinesis);
+  }
 
-        if (data.isEmpty()) {
-            return CustomOptional.absent();
-        } else {
-            KinesisRecord record = data.removeFirst();
-            checkpoint = checkpoint.moveAfter(record);
-            return CustomOptional.of(record);
-        }
-    }
+  /**
+   * Returns record if there's any present.
+   * Returns absent() if there are no new records at this time in the shard.
+   */
+  public CustomOptional<KinesisRecord> next() throws TransientKinesisException {
+    readMoreIfNecessary();
 
-    private void readMoreIfNecessary() throws TransientKinesisException {
-        if (data.isEmpty()) {
-            GetKinesisRecordsResult response;
-            try {
-                response = kinesis.getRecords(shardIterator, checkpoint.getStreamName(),
-                        checkpoint.getShardId());
-            } catch (ExpiredIteratorException e) {
-                LOG.info("Refreshing expired iterator", e);
-                shardIterator = checkpoint.getShardIterator(kinesis);
-                response = kinesis.getRecords(shardIterator, checkpoint.getStreamName(),
-                        checkpoint.getShardId());
-            }
-            LOG.debug("Fetched {} new records", response.getRecords().size());
-            shardIterator = response.getNextShardIterator();
-            data.addAll(filter.apply(response.getRecords(), checkpoint));
-        }
+    if (data.isEmpty()) {
+      return CustomOptional.absent();
+    } else {
+      KinesisRecord record = data.removeFirst();
+      checkpoint = checkpoint.moveAfter(record);
+      return CustomOptional.of(record);
     }
+  }
 
-    public ShardCheckpoint getCheckpoint() {
-        return checkpoint;
+  private void readMoreIfNecessary() throws TransientKinesisException {
+    if (data.isEmpty()) {
+      GetKinesisRecordsResult response;
+      try {
+        response = kinesis.getRecords(shardIterator, checkpoint.getStreamName(),
+            checkpoint.getShardId());
+      } catch (ExpiredIteratorException e) {
+        LOG.info("Refreshing expired iterator", e);
+        shardIterator = checkpoint.getShardIterator(kinesis);
+        response = kinesis.getRecords(shardIterator, checkpoint.getStreamName(),
+            checkpoint.getShardId());
+      }
+      LOG.debug("Fetched {} new records", response.getRecords().size());
+      shardIterator = response.getNextShardIterator();
+      data.addAll(filter.apply(response.getRecords(), checkpoint));
     }
+  }
 
+  public ShardCheckpoint getCheckpoint() {
+    return checkpoint;
+  }
 
 }


[30/50] [abbrv] beam git commit: Add more utilities to ParDoTranslation

Posted by ta...@apache.org.
Add more utilities to ParDoTranslation


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/860e0a08
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/860e0a08
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/860e0a08

Branch: refs/heads/DSL_SQL
Commit: 860e0a08ecd84533220f6ef8e18d1409964d69cd
Parents: 1f17b8a
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Jun 8 13:46:18 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:01 2017 -0700

----------------------------------------------------------------------
 .../core/construction/ParDoTranslation.java     | 48 ++++++++++++++++++++
 1 file changed, 48 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/860e0a08/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
index 34e0d86..5f2bcae 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
@@ -34,9 +34,11 @@ import com.google.protobuf.BytesValue;
 import com.google.protobuf.InvalidProtocolBufferException;
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.ArrayList;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 import org.apache.beam.runners.core.construction.PTransformTranslation.TransformPayloadTranslator;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.IterableCoder;
@@ -74,6 +76,7 @@ import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.TupleTagList;
 import org.apache.beam.sdk.values.WindowingStrategy;
 
 /**
@@ -215,11 +218,56 @@ public class ParDoTranslation {
     return doFnAndMainOutputTagFromProto(payload.getDoFn()).getDoFn();
   }
 
+  public static DoFn<?, ?> getDoFn(AppliedPTransform<?, ?, ?> application) throws IOException {
+    return getDoFn(getParDoPayload(application));
+  }
+
   public static TupleTag<?> getMainOutputTag(ParDoPayload payload)
       throws InvalidProtocolBufferException {
     return doFnAndMainOutputTagFromProto(payload.getDoFn()).getMainOutputTag();
   }
 
+  public static TupleTag<?> getMainOutputTag(AppliedPTransform<?, ?, ?> application)
+      throws IOException {
+    return getMainOutputTag(getParDoPayload(application));
+  }
+
+  public static TupleTagList getAdditionalOutputTags(AppliedPTransform<?, ?, ?> application)
+      throws IOException {
+
+    RunnerApi.PTransform protoTransform =
+        PTransformTranslation.toProto(application, SdkComponents.create());
+
+    ParDoPayload payload = protoTransform.getSpec().getParameter().unpack(ParDoPayload.class);
+    TupleTag<?> mainOutputTag = getMainOutputTag(payload);
+    Set<String> outputTags =
+        Sets.difference(
+            protoTransform.getOutputsMap().keySet(), Collections.singleton(mainOutputTag.getId()));
+
+    ArrayList<TupleTag<?>> additionalOutputTags = new ArrayList<>();
+    for (String outputTag : outputTags) {
+      additionalOutputTags.add(new TupleTag<>(outputTag));
+    }
+    return TupleTagList.of(additionalOutputTags);
+  }
+
+  public static List<PCollectionView<?>> getSideInputs(AppliedPTransform<?, ?, ?> application)
+      throws IOException {
+
+    SdkComponents sdkComponents = SdkComponents.create();
+    RunnerApi.PTransform parDoProto =
+        PTransformTranslation.toProto(application, sdkComponents);
+    ParDoPayload payload = parDoProto.getSpec().getParameter().unpack(ParDoPayload.class);
+
+    List<PCollectionView<?>> views = new ArrayList<>();
+    for (Map.Entry<String, SideInput> sideInput : payload.getSideInputsMap().entrySet()) {
+      views.add(
+          fromProto(
+              sideInput.getValue(), sideInput.getKey(), parDoProto, sdkComponents.toComponents()));
+    }
+    return views;
+  }
+
   public static RunnerApi.PCollection getMainInput(
       RunnerApi.PTransform ptransform, Components components) throws IOException {
     checkArgument(


[25/50] [abbrv] beam git commit: Include PCollection in rehydrated PCollectionView

Posted by ta...@apache.org.
Include PCollection in rehydrated PCollectionView


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bdece9d2
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bdece9d2
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bdece9d2

Branch: refs/heads/DSL_SQL
Commit: bdece9d2a57824865a35b4367619569e5800ed1b
Parents: 860e0a0
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Jul 6 09:24:55 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:01 2017 -0700

----------------------------------------------------------------------
 .../core/construction/ParDoTranslation.java     | 51 +++++++++++++++++---
 .../construction/RunnerPCollectionView.java     |  7 +--
 .../core/construction/ParDoTranslationTest.java | 28 +++++++----
 3 files changed, 67 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/bdece9d2/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
index 5f2bcae..fe8c5aa 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/ParDoTranslation.java
@@ -40,6 +40,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import org.apache.beam.runners.core.construction.PTransformTranslation.TransformPayloadTranslator;
+import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.IterableCoder;
 import org.apache.beam.sdk.common.runner.v1.RunnerApi;
@@ -74,6 +75,7 @@ import org.apache.beam.sdk.transforms.windowing.WindowMappingFn;
 import org.apache.beam.sdk.util.SerializableUtils;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.util.WindowedValue.FullWindowedValueCoder;
+import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.TupleTagList;
@@ -262,8 +264,12 @@ public class ParDoTranslation {
     List<PCollectionView<?>> views = new ArrayList<>();
     for (Map.Entry<String, SideInput> sideInput : payload.getSideInputsMap().entrySet()) {
       views.add(
-          fromProto(
-              sideInput.getValue(), sideInput.getKey(), parDoProto, sdkComponents.toComponents()));
+          viewFromProto(
+              application.getPipeline(),
+              sideInput.getValue(),
+              sideInput.getKey(),
+              parDoProto,
+              sdkComponents.toComponents()));
     }
     return views;
   }
@@ -495,15 +501,47 @@ public class ParDoTranslation {
     return builder.build();
   }
 
-  public static PCollectionView<?> fromProto(
-      SideInput sideInput, String id, RunnerApi.PTransform parDoTransform, Components components)
+  public static PCollectionView<?> viewFromProto(
+      Pipeline pipeline,
+      SideInput sideInput,
+      String localName,
+      RunnerApi.PTransform parDoTransform,
+      Components components)
       throws IOException {
-    TupleTag<?> tag = new TupleTag<>(id);
+
+    String pCollectionId = parDoTransform.getInputsOrThrow(localName);
+
+    // This may be a PCollection defined in another language, but we should be
+    // able to rehydrate it enough to stick it in a side input. The coder may not
+    // be grokkable in Java.
+    PCollection<?> pCollection =
+        PCollectionTranslation.fromProto(
+            pipeline, components.getPcollectionsOrThrow(pCollectionId), components);
+
+    return viewFromProto(sideInput, localName, pCollection, parDoTransform, components);
+  }
+
+  /**
+   * Create a {@link PCollectionView} from a side input spec and an already-deserialized {@link
+   * PCollection} that should be wired up.
+   */
+  public static PCollectionView<?> viewFromProto(
+      SideInput sideInput,
+      String localName,
+      PCollection<?> pCollection,
+      RunnerApi.PTransform parDoTransform,
+      Components components)
+      throws IOException {
+    checkArgument(
+        localName != null,
+        "%s.viewFromProto: localName must not be null",
+        ParDoTranslation.class.getSimpleName());
+    TupleTag<?> tag = new TupleTag<>(localName);
     WindowMappingFn<?> windowMappingFn = windowMappingFnFromProto(sideInput.getWindowMappingFn());
     ViewFn<?, ?> viewFn = viewFnFromProto(sideInput.getViewFn());
 
     RunnerApi.PCollection inputCollection =
-        components.getPcollectionsOrThrow(parDoTransform.getInputsOrThrow(id));
+        components.getPcollectionsOrThrow(parDoTransform.getInputsOrThrow(localName));
     WindowingStrategy<?, ?> windowingStrategy =
         WindowingStrategyTranslation.fromProto(
             components.getWindowingStrategiesOrThrow(inputCollection.getWindowingStrategyId()),
@@ -523,6 +561,7 @@ public class ParDoTranslation {
 
     PCollectionView<?> view =
         new RunnerPCollectionView<>(
+            pCollection,
             (TupleTag<Iterable<WindowedValue<?>>>) tag,
             (ViewFn<Iterable<WindowedValue<?>>, ?>) viewFn,
             windowMappingFn,

http://git-wip-us.apache.org/repos/asf/beam/blob/bdece9d2/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/RunnerPCollectionView.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/RunnerPCollectionView.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/RunnerPCollectionView.java
index c359cec..b275188 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/RunnerPCollectionView.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/RunnerPCollectionView.java
@@ -39,16 +39,19 @@ class RunnerPCollectionView<T> extends PValueBase implements PCollectionView<T>
   private final WindowMappingFn<?> windowMappingFn;
   private final WindowingStrategy<?, ?> windowingStrategy;
   private final Coder<Iterable<WindowedValue<?>>> coder;
+  private final transient PCollection<?> pCollection;
 
   /**
    * Create a new {@link RunnerPCollectionView} from the provided components.
    */
   RunnerPCollectionView(
+      PCollection<?> pCollection,
       TupleTag<Iterable<WindowedValue<?>>> tag,
       ViewFn<Iterable<WindowedValue<?>>, T> viewFn,
       WindowMappingFn<?> windowMappingFn,
       @Nullable WindowingStrategy<?, ?> windowingStrategy,
       @Nullable Coder<Iterable<WindowedValue<?>>> coder) {
+    this.pCollection = pCollection;
     this.tag = tag;
     this.viewFn = viewFn;
     this.windowMappingFn = windowMappingFn;
@@ -56,11 +59,9 @@ class RunnerPCollectionView<T> extends PValueBase implements PCollectionView<T>
     this.coder = coder;
   }
 
-  @Nullable
   @Override
   public PCollection<?> getPCollection() {
-    throw new IllegalStateException(
-        String.format("Cannot call getPCollection on a %s", getClass().getSimpleName()));
+    return pCollection;
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/bdece9d2/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ParDoTranslationTest.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ParDoTranslationTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ParDoTranslationTest.java
index a8490bf..6fdf9d6 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ParDoTranslationTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/ParDoTranslationTest.java
@@ -23,9 +23,9 @@ import static org.hamcrest.Matchers.instanceOf;
 import static org.junit.Assert.assertThat;
 
 import com.google.common.collect.ImmutableList;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
+import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.VarIntCoder;
@@ -143,22 +143,30 @@ public class ParDoTranslationTest {
       inputs.putAll(parDo.getAdditionalInputs());
       PCollectionTuple output = mainInput.apply(parDo);
 
-      SdkComponents components = SdkComponents.create();
-      String transformId =
-          components.registerPTransform(
+      SdkComponents sdkComponents = SdkComponents.create();
+
+      // Encode
+      RunnerApi.PTransform protoTransform =
+          PTransformTranslation.toProto(
               AppliedPTransform.<PCollection<KV<Long, String>>, PCollection<Void>, MultiOutput>of(
                   "foo", inputs, output.expand(), parDo, p),
-              Collections.<AppliedPTransform<?, ?, ?>>emptyList());
+              sdkComponents);
+      Components protoComponents = sdkComponents.toComponents();
+
+      // Decode
+      Pipeline rehydratedPipeline = Pipeline.create();
 
-      Components protoComponents = components.toComponents();
-      RunnerApi.PTransform protoTransform = protoComponents.getTransformsOrThrow(transformId);
       ParDoPayload parDoPayload =
           protoTransform.getSpec().getParameter().unpack(ParDoPayload.class);
       for (PCollectionView<?> view : parDo.getSideInputs()) {
         SideInput sideInput = parDoPayload.getSideInputsOrThrow(view.getTagInternal().getId());
         PCollectionView<?> restoredView =
-            ParDoTranslation.fromProto(
-                sideInput, view.getTagInternal().getId(), protoTransform, protoComponents);
+            ParDoTranslation.viewFromProto(
+                rehydratedPipeline,
+                sideInput,
+                view.getTagInternal().getId(),
+                protoTransform,
+                protoComponents);
         assertThat(restoredView.getTagInternal(), equalTo(view.getTagInternal()));
         assertThat(restoredView.getViewFn(), instanceOf(view.getViewFn().getClass()));
         assertThat(
@@ -169,7 +177,7 @@ public class ParDoTranslationTest {
                 view.getWindowingStrategyInternal().fixDefaults()));
         assertThat(restoredView.getCoderInternal(), equalTo(view.getCoderInternal()));
       }
-      String mainInputId = components.registerPCollection(mainInput);
+      String mainInputId = sdkComponents.registerPCollection(mainInput);
       assertThat(
           ParDoTranslation.getMainInput(protoTransform, protoComponents),
           equalTo(protoComponents.getPcollectionsOrThrow(mainInputId)));


[24/50] [abbrv] beam git commit: Ignore processing time timers in expired windows

Posted by ta...@apache.org.
Ignore processing time timers in expired windows


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/951f3cab
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/951f3cab
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/951f3cab

Branch: refs/heads/DSL_SQL
Commit: 951f3cab3f6558524ee1146e0e3f347bcd02ecda
Parents: c167d10
Author: Kenneth Knowles <kl...@google.com>
Authored: Thu Jun 22 18:09:11 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 .../beam/runners/core/ReduceFnRunner.java       | 10 ++++++
 .../beam/runners/core/ReduceFnRunnerTest.java   | 32 ++++++++++++++++++++
 2 files changed, 42 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/951f3cab/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java
index ef33bef..0632c05 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/ReduceFnRunner.java
@@ -693,6 +693,11 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
       @SuppressWarnings("unchecked")
         WindowNamespace<W> windowNamespace = (WindowNamespace<W>) timer.getNamespace();
       W window = windowNamespace.getWindow();
+
+      if (TimeDomain.PROCESSING_TIME == timer.getDomain() && windowIsExpired(window)) {
+        continue;
+      }
+
       ReduceFn<K, InputT, OutputT, W>.Context directContext =
           contextFactory.base(window, StateStyle.DIRECT);
       ReduceFn<K, InputT, OutputT, W>.Context renamedContext =
@@ -1090,4 +1095,9 @@ public class ReduceFnRunner<K, InputT, OutputT, W extends BoundedWindow> {
     }
   }
 
+  private boolean windowIsExpired(BoundedWindow w) {
+    return timerInternals
+        .currentInputWatermarkTime()
+        .isAfter(w.maxTimestamp().plus(windowingStrategy.getAllowedLateness()));
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/951f3cab/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnRunnerTest.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnRunnerTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnRunnerTest.java
index 3a2c220..79ee91b 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnRunnerTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/ReduceFnRunnerTest.java
@@ -286,6 +286,38 @@ public class ReduceFnRunnerTest {
 
   /**
    * Tests that when a processing time timer comes in after a window is expired
+   * it is just ignored.
+   */
+  @Test
+  public void testLateProcessingTimeTimer() throws Exception {
+    WindowingStrategy<?, IntervalWindow> strategy =
+        WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100)))
+            .withTimestampCombiner(TimestampCombiner.EARLIEST)
+            .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES)
+            .withAllowedLateness(Duration.ZERO)
+            .withTrigger(
+                Repeatedly.forever(
+                    AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10))));
+
+    ReduceFnTester<Integer, Integer, IntervalWindow> tester =
+        ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of());
+
+    tester.advanceProcessingTime(new Instant(5000));
+    injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100
+    injectElement(tester, 5);
+
+    // After this advancement, the window is expired and only the GC process
+    // should be allowed to touch it
+    tester.advanceInputWatermarkNoTimers(new Instant(100));
+
+    // This should not output
+    tester.advanceProcessingTime(new Instant(6000));
+
+    assertThat(tester.extractOutput(), emptyIterable());
+  }
+
+  /**
+   * Tests that when a processing time timer comes in after a window is expired
    * but in the same bundle it does not cause a spurious output.
    */
   @Test


[02/50] [abbrv] beam git commit: Add timeout to initialization of partition in KafkaIO

Posted by ta...@apache.org.
Add timeout to initialization of partition in KafkaIO


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/c167d109
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/c167d109
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/c167d109

Branch: refs/heads/DSL_SQL
Commit: c167d10968b1bbd4f959f93ab3bcd4f76576c823
Parents: 4862703
Author: Raghu Angadi <ra...@google.com>
Authored: Mon Jul 3 23:54:10 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:59 2017 -0700

----------------------------------------------------------------------
 .../org/apache/beam/sdk/io/kafka/KafkaIO.java   | 81 +++++++++++++++-----
 .../apache/beam/sdk/io/kafka/KafkaIOTest.java   | 30 ++++++++
 2 files changed, 92 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/c167d109/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
index e520367..026313a 100644
--- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
+++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
@@ -49,9 +49,11 @@ import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.concurrent.ScheduledExecutorService;
 import java.util.concurrent.SynchronousQueue;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.annotations.Experimental;
@@ -1061,8 +1063,32 @@ public class KafkaIO {
       curBatch = Iterators.cycle(nonEmpty);
     }
 
+    private void setupInitialOffset(PartitionState pState) {
+      Read<K, V> spec = source.spec;
+
+      if (pState.nextOffset != UNINITIALIZED_OFFSET) {
+        consumer.seek(pState.topicPartition, pState.nextOffset);
+      } else {
+        // nextOffset is unininitialized here, meaning start reading from latest record as of now
+        // ('latest' is the default, and is configurable) or 'look up offset by startReadTime.
+        // Remember the current position without waiting until the first record is read. This
+        // ensures checkpoint is accurate even if the reader is closed before reading any records.
+        Instant startReadTime = spec.getStartReadTime();
+        if (startReadTime != null) {
+          pState.nextOffset =
+              consumerSpEL.offsetForTime(consumer, pState.topicPartition, spec.getStartReadTime());
+          consumer.seek(pState.topicPartition, pState.nextOffset);
+        } else {
+          pState.nextOffset = consumer.position(pState.topicPartition);
+        }
+      }
+    }
+
     @Override
     public boolean start() throws IOException {
+      final int defaultPartitionInitTimeout = 60 * 1000;
+      final int kafkaRequestTimeoutMultiple = 2;
+
       Read<K, V> spec = source.spec;
       consumer = spec.getConsumerFactoryFn().apply(spec.getConsumerConfig());
       consumerSpEL.evaluateAssign(consumer, spec.getTopicPartitions());
@@ -1077,25 +1103,38 @@ public class KafkaIO {
       keyDeserializerInstance.configure(spec.getConsumerConfig(), true);
       valueDeserializerInstance.configure(spec.getConsumerConfig(), false);
 
-      for (PartitionState p : partitionStates) {
-        if (p.nextOffset != UNINITIALIZED_OFFSET) {
-          consumer.seek(p.topicPartition, p.nextOffset);
-        } else {
-          // nextOffset is unininitialized here, meaning start reading from latest record as of now
-          // ('latest' is the default, and is configurable) or 'look up offset by startReadTime.
-          // Remember the current position without waiting until the first record is read. This
-          // ensures checkpoint is accurate even if the reader is closed before reading any records.
-          Instant startReadTime = spec.getStartReadTime();
-          if (startReadTime != null) {
-            p.nextOffset =
-                consumerSpEL.offsetForTime(consumer, p.topicPartition, spec.getStartReadTime());
-            consumer.seek(p.topicPartition, p.nextOffset);
-          } else {
-            p.nextOffset = consumer.position(p.topicPartition);
+      // Seek to start offset for each partition. This is the first interaction with the server.
+      // Unfortunately it can block forever in case of network issues like incorrect ACLs.
+      // Initialize partition in a separate thread and cancel it if takes longer than a minute.
+      for (final PartitionState pState : partitionStates) {
+        Future<?> future =  consumerPollThread.submit(new Runnable() {
+          public void run() {
+            setupInitialOffset(pState);
           }
-        }
+        });
 
-        LOG.info("{}: reading from {} starting at offset {}", name, p.topicPartition, p.nextOffset);
+        try {
+          // Timeout : 1 minute OR 2 * Kafka consumer request timeout if it is set.
+          Integer reqTimeout = (Integer) source.spec.getConsumerConfig().get(
+              ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG);
+          future.get(reqTimeout != null ? kafkaRequestTimeoutMultiple * reqTimeout
+                         : defaultPartitionInitTimeout,
+                     TimeUnit.MILLISECONDS);
+        } catch (TimeoutException e) {
+          consumer.wakeup(); // This unblocks consumer stuck on network I/O.
+          // Likely reason : Kafka servers are configured to advertise internal ips, but
+          // those ips are not accessible from workers outside.
+          String msg = String.format(
+              "%s: Timeout while initializing partition '%s'. "
+                  + "Kafka client may not be able to connect to servers.",
+              this, pState.topicPartition);
+          LOG.error("{}", msg);
+          throw new IOException(msg);
+        } catch (Exception e) {
+          throw new IOException(e);
+        }
+        LOG.info("{}: reading from {} starting at offset {}",
+                 name, pState.topicPartition, pState.nextOffset);
       }
 
       // Start consumer read loop.
@@ -1329,8 +1368,12 @@ public class KafkaIO {
       // might block to enqueue right after availableRecordsQueue.poll() below.
       while (!isShutdown) {
 
-        consumer.wakeup();
-        offsetConsumer.wakeup();
+        if (consumer != null) {
+          consumer.wakeup();
+        }
+        if (offsetConsumer != null) {
+          offsetConsumer.wakeup();
+        }
         availableRecordsQueue.poll(); // drain unread batch, this unblocks consumer thread.
         try {
           isShutdown = consumerPollThread.awaitTermination(10, TimeUnit.SECONDS)

http://git-wip-us.apache.org/repos/asf/beam/blob/c167d109/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java
index b69bc83..482f5a2 100644
--- a/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java
+++ b/sdks/java/io/kafka/src/test/java/org/apache/beam/sdk/io/kafka/KafkaIOTest.java
@@ -83,6 +83,7 @@ import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionList;
 import org.apache.kafka.clients.consumer.Consumer;
+import org.apache.kafka.clients.consumer.ConsumerConfig;
 import org.apache.kafka.clients.consumer.ConsumerRecord;
 import org.apache.kafka.clients.consumer.MockConsumer;
 import org.apache.kafka.clients.consumer.OffsetResetStrategy;
@@ -364,6 +365,35 @@ public class KafkaIOTest {
   }
 
   @Test
+  public void testUnreachableKafkaBrokers() {
+    // Expect an exception when the Kafka brokers are not reachable on the workers.
+    // We specify partitions explicitly so that splitting does not involve server interaction.
+    // Set request timeout to 10ms so that test does not take long.
+
+    thrown.expect(Exception.class);
+    thrown.expectMessage("Reader-0: Timeout while initializing partition 'test-0'");
+
+    int numElements = 1000;
+    PCollection<Long> input = p
+        .apply(KafkaIO.<Integer, Long>read()
+            .withBootstrapServers("8.8.8.8:9092") // Google public DNS ip.
+            .withTopicPartitions(ImmutableList.of(new TopicPartition("test", 0)))
+            .withKeyDeserializer(IntegerDeserializer.class)
+            .withValueDeserializer(LongDeserializer.class)
+            .updateConsumerProperties(ImmutableMap.<String, Object>of(
+                ConsumerConfig.REQUEST_TIMEOUT_MS_CONFIG, 10,
+                ConsumerConfig.HEARTBEAT_INTERVAL_MS_CONFIG, 5,
+                ConsumerConfig.SESSION_TIMEOUT_MS_CONFIG, 8,
+                ConsumerConfig.FETCH_MAX_WAIT_MS_CONFIG, 8))
+            .withMaxNumRecords(10)
+            .withoutMetadata())
+        .apply(Values.<Long>create());
+
+    addCountingAsserts(input, numElements);
+    p.run();
+  }
+
+  @Test
   public void testUnboundedSourceWithSingleTopic() {
     // same as testUnboundedSource, but with single topic
 


[32/50] [abbrv] beam git commit: [BEAM-1348] Remove deprecated concepts in Fn API (now replaced with Runner API concepts).

Posted by ta...@apache.org.
[BEAM-1348] Remove deprecated concepts in Fn API (now replaced with Runner API concepts).


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1f6117ff
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1f6117ff
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1f6117ff

Branch: refs/heads/DSL_SQL
Commit: 1f6117ffb23fc179a699cf11ebc2620af6cf2d4c
Parents: e014db6
Author: Luke Cwik <lc...@google.com>
Authored: Fri Jun 30 10:21:55 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:01 2017 -0700

----------------------------------------------------------------------
 .../fn-api/src/main/proto/beam_fn_api.proto     | 151 +------------------
 .../harness/control/ProcessBundleHandler.java   |   4 +-
 .../fn/harness/control/RegisterHandler.java     |   2 +-
 .../fn/harness/control/RegisterHandlerTest.java |   8 +-
 .../apache_beam/runners/pipeline_context.py     |   2 +-
 .../runners/portability/fn_api_runner.py        |   2 +-
 .../apache_beam/runners/worker/sdk_worker.py    |   4 +-
 .../runners/worker/sdk_worker_test.py           |  16 +-
 8 files changed, 25 insertions(+), 164 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/1f6117ff/sdks/common/fn-api/src/main/proto/beam_fn_api.proto
----------------------------------------------------------------------
diff --git a/sdks/common/fn-api/src/main/proto/beam_fn_api.proto b/sdks/common/fn-api/src/main/proto/beam_fn_api.proto
index 8162bc5..9da5afe 100644
--- a/sdks/common/fn-api/src/main/proto/beam_fn_api.proto
+++ b/sdks/common/fn-api/src/main/proto/beam_fn_api.proto
@@ -38,7 +38,6 @@ option java_package = "org.apache.beam.fn.v1";
 option java_outer_classname = "BeamFnApi";
 
 import "beam_runner_api.proto";
-import "google/protobuf/any.proto";
 import "google/protobuf/timestamp.proto";
 
 /*
@@ -67,129 +66,6 @@ message Target {
   string name = 2;
 }
 
-// (Deprecated) Information defining a PCollection
-//
-// Migrate to Runner API.
-message PCollection {
-  // (Required) A reference to a coder.
-  string coder_reference = 1 [deprecated = true];
-
-  // TODO: Windowing strategy, ...
-}
-
-// (Deprecated) A primitive transform within Apache Beam.
-//
-// Migrate to Runner API.
-message PrimitiveTransform {
-  // (Required) A pipeline level unique id which can be used as a reference to
-  // refer to this.
-  string id = 1 [deprecated = true];
-
-  // (Required) A function spec that is used by this primitive
-  // transform to process data.
-  FunctionSpec function_spec = 2 [deprecated = true];
-
-  // A map of distinct input names to target definitions.
-  // For example, in CoGbk this represents the tag name associated with each
-  // distinct input name and a list of primitive transforms that are associated
-  // with the specified input.
-  map<string, Target.List> inputs = 3 [deprecated = true];
-
-  // A map from local output name to PCollection definitions. For example, in
-  // DoFn this represents the tag name associated with each distinct output.
-  map<string, PCollection> outputs = 4 [deprecated = true];
-
-  // TODO: Should we model side inputs as a special type of input for a
-  // primitive transform or should it be modeled as the relationship that
-  // the predecessor input will be a view primitive transform.
-  // A map of from side input names to side inputs.
-  map<string, SideInput> side_inputs = 5 [deprecated = true];
-
-  // The user name of this step.
-  // TODO: This should really be in display data and not at this level
-  string step_name = 6 [deprecated = true];
-}
-
-/*
- * User Definable Functions
- *
- * This is still unstable mainly due to how we model the side input.
- */
-
-// (Deprecated) Defines the common elements of user-definable functions,
-// to allow the SDK to express the information the runner needs to execute work.
-//
-// Migrate to Runner API.
-message FunctionSpec {
-  // (Required) A pipeline level unique id which can be used as a reference to
-  // refer to this.
-  string id = 1 [deprecated = true];
-
-  // (Required) A globally unique name representing this user definable
-  // function.
-  //
-  // User definable functions use the urn encodings registered such that another
-  // may implement the user definable function within another language.
-  //
-  // For example:
-  //    urn:org.apache.beam:coder:kv:1.0
-  string urn = 2 [deprecated = true];
-
-  // (Required) Reference to specification of execution environment required to
-  // invoke this function.
-  string environment_reference = 3 [deprecated = true];
-
-  // Data used to parameterize this function. Depending on the urn, this may be
-  // optional or required.
-  google.protobuf.Any data = 4 [deprecated = true];
-}
-
-// (Deprecated) Migrate to Runner API.
-message SideInput {
-  // TODO: Coder?
-
-  // For RunnerAPI.
-  Target input = 1 [deprecated = true];
-
-  // For FnAPI.
-  FunctionSpec view_fn = 2 [deprecated = true];
-}
-
-// (Deprecated) Defines how to encode values into byte streams and decode
-// values from byte streams. A coder can be parameterized by additional
-// properties which may or may not be language agnostic.
-//
-// Coders using the urn:org.apache.beam:coder namespace must have their
-// encodings registered such that another may implement the encoding within
-// another language.
-//
-// For example:
-//    urn:org.apache.beam:coder:kv:1.0
-//    urn:org.apache.beam:coder:iterable:1.0
-//
-// Migrate to Runner API.
-message Coder {
-  // TODO: This looks weird when compared to the other function specs
-  // which use URN to differentiate themselves. Should "Coder" be embedded
-  // inside the FunctionSpec data block.
-
-  // The data associated with this coder used to reconstruct it.
-  FunctionSpec function_spec = 1 [deprecated = true];
-
-  // A list of component coder references.
-  //
-  // For a key-value coder, there must be exactly two component coder references
-  // where the first reference represents the key coder and the second reference
-  // is the value coder.
-  //
-  // For an iterable coder, there must be exactly one component coder reference
-  // representing the value coder.
-  //
-  // TODO: Perhaps this is redundant with the data of the FunctionSpec
-  // for known coders?
-  repeated string component_coder_reference = 2 [deprecated = true];
-}
-
 // A descriptor for connecting to a remote port using the Beam Fn Data API.
 // Allows for communication between two environments (for example between the
 // runner and the SDK).
@@ -278,33 +154,20 @@ message ProcessBundleDescriptor {
   // refer to this.
   string id = 1;
 
-  // (Deprecated) A list of primitive transforms that should
-  // be used to construct the bundle processing graph.
-  //
-  // Migrate to Runner API definitions found within transforms field.
-  repeated PrimitiveTransform primitive_transform = 2 [deprecated = true];
-
-  // (Deprecated) The set of all coders referenced in this bundle.
-  //
-  // Migrate to Runner API defintions found within codersyyy field.
-  repeated Coder coders = 4 [deprecated = true];
-
   // (Required) A map from pipeline-scoped id to PTransform.
-  map<string, org.apache.beam.runner_api.v1.PTransform> transforms = 5;
+  map<string, org.apache.beam.runner_api.v1.PTransform> transforms = 2;
 
   // (Required) A map from pipeline-scoped id to PCollection.
-  map<string, org.apache.beam.runner_api.v1.PCollection> pcollections = 6;
+  map<string, org.apache.beam.runner_api.v1.PCollection> pcollections = 3;
 
   // (Required) A map from pipeline-scoped id to WindowingStrategy.
-  map<string, org.apache.beam.runner_api.v1.WindowingStrategy> windowing_strategies = 7;
+  map<string, org.apache.beam.runner_api.v1.WindowingStrategy> windowing_strategies = 4;
 
   // (Required) A map from pipeline-scoped id to Coder.
-  // TODO: Rename to "coders" once deprecated coders field is removed. Unique
-  // name is choosen to make it an easy search/replace
-  map<string, org.apache.beam.runner_api.v1.Coder> codersyyy = 8;
+  map<string, org.apache.beam.runner_api.v1.Coder> coders = 5;
 
   // (Required) A map from pipeline-scoped id to Environment.
-  map<string, org.apache.beam.runner_api.v1.Environment> environments = 9;
+  map<string, org.apache.beam.runner_api.v1.Environment> environments = 6;
 }
 
 // A request to process a given bundle.
@@ -385,14 +248,14 @@ message PrimitiveTransformSplit {
   //
   // For example, a remote GRPC source will have a specific urn and data
   // block containing an ElementCountRestriction.
-  FunctionSpec completed_restriction = 2;
+  org.apache.beam.runner_api.v1.FunctionSpec completed_restriction = 2;
 
   // (Required) A function specification describing the restriction
   // representing the remainder of work for the primitive transform.
   //
   // FOr example, a remote GRPC source will have a specific urn and data
   // block contain an ElemntCountSkipRestriction.
-  FunctionSpec remaining_restriction = 3;
+  org.apache.beam.runner_api.v1.FunctionSpec remaining_restriction = 3;
 }
 
 message ProcessBundleSplitResponse {

http://git-wip-us.apache.org/repos/asf/beam/blob/1f6117ff/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
index 4c4f73d..2a9cef8 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
@@ -49,7 +49,7 @@ import org.slf4j.LoggerFactory;
 
 /**
  * Processes {@link org.apache.beam.fn.v1.BeamFnApi.ProcessBundleRequest}s by materializing
- * the set of required runners for each {@link org.apache.beam.fn.v1.BeamFnApi.FunctionSpec},
+ * the set of required runners for each {@link RunnerApi.FunctionSpec},
  * wiring them together based upon the {@code input} and {@code output} map definitions.
  *
  * <p>Finally executes the DAG based graph by starting all runners in reverse topological order,
@@ -166,7 +166,7 @@ public class ProcessBundleHandler {
             pTransform,
             processBundleInstructionId,
             processBundleDescriptor.getPcollectionsMap(),
-            processBundleDescriptor.getCodersyyyMap(),
+            processBundleDescriptor.getCodersMap(),
             pCollectionIdsToConsumers,
             addStartFunction,
             addFinishFunction);

http://git-wip-us.apache.org/repos/asf/beam/blob/1f6117ff/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/RegisterHandler.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/RegisterHandler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/RegisterHandler.java
index 276a120..0e738ac 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/RegisterHandler.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/RegisterHandler.java
@@ -79,7 +79,7 @@ public class RegisterHandler {
           processBundleDescriptor.getClass());
       computeIfAbsent(processBundleDescriptor.getId()).complete(processBundleDescriptor);
       for (Map.Entry<String, RunnerApi.Coder> entry
-          : processBundleDescriptor.getCodersyyyMap().entrySet()) {
+          : processBundleDescriptor.getCodersMap().entrySet()) {
         LOG.debug("Registering {} with type {}",
             entry.getKey(),
             entry.getValue().getClass());

http://git-wip-us.apache.org/repos/asf/beam/blob/1f6117ff/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/RegisterHandlerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/RegisterHandlerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/RegisterHandlerTest.java
index b1f4410..2b275af 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/RegisterHandlerTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/RegisterHandlerTest.java
@@ -44,14 +44,14 @@ public class RegisterHandlerTest {
       .setRegister(BeamFnApi.RegisterRequest.newBuilder()
           .addProcessBundleDescriptor(BeamFnApi.ProcessBundleDescriptor.newBuilder()
               .setId("1L")
-              .putCodersyyy("10L", RunnerApi.Coder.newBuilder()
+              .putCoders("10L", RunnerApi.Coder.newBuilder()
                   .setSpec(RunnerApi.SdkFunctionSpec.newBuilder()
                       .setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn("urn:10L").build())
                       .build())
                   .build())
               .build())
           .addProcessBundleDescriptor(BeamFnApi.ProcessBundleDescriptor.newBuilder().setId("2L")
-              .putCodersyyy("20L", RunnerApi.Coder.newBuilder()
+              .putCoders("20L", RunnerApi.Coder.newBuilder()
                   .setSpec(RunnerApi.SdkFunctionSpec.newBuilder()
                       .setSpec(RunnerApi.FunctionSpec.newBuilder().setUrn("urn:20L").build())
                       .build())
@@ -82,10 +82,10 @@ public class RegisterHandlerTest {
     assertEquals(REGISTER_REQUEST.getRegister().getProcessBundleDescriptor(1),
         handler.getById("2L"));
     assertEquals(
-        REGISTER_REQUEST.getRegister().getProcessBundleDescriptor(0).getCodersyyyOrThrow("10L"),
+        REGISTER_REQUEST.getRegister().getProcessBundleDescriptor(0).getCodersOrThrow("10L"),
         handler.getById("10L"));
     assertEquals(
-        REGISTER_REQUEST.getRegister().getProcessBundleDescriptor(1).getCodersyyyOrThrow("20L"),
+        REGISTER_REQUEST.getRegister().getProcessBundleDescriptor(1).getCodersOrThrow("20L"),
         handler.getById("20L"));
     assertEquals(REGISTER_RESPONSE, responseFuture.get());
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/1f6117ff/sdks/python/apache_beam/runners/pipeline_context.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/pipeline_context.py b/sdks/python/apache_beam/runners/pipeline_context.py
index c2ae3f3..a40069b 100644
--- a/sdks/python/apache_beam/runners/pipeline_context.py
+++ b/sdks/python/apache_beam/runners/pipeline_context.py
@@ -84,7 +84,7 @@ class PipelineContext(object):
   def __init__(self, proto=None):
     if isinstance(proto, beam_fn_api_pb2.ProcessBundleDescriptor):
       proto = beam_runner_api_pb2.Components(
-          coders=dict(proto.codersyyy.items()),
+          coders=dict(proto.coders.items()),
           windowing_strategies=dict(proto.windowing_strategies.items()),
           environments=dict(proto.environments.items()))
     for name, cls in self._COMPONENT_TYPES.items():

http://git-wip-us.apache.org/repos/asf/beam/blob/1f6117ff/sdks/python/apache_beam/runners/portability/fn_api_runner.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index c5438ad..f522864 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -261,7 +261,7 @@ class FnApiRunner(maptask_executor_runner.MapTaskExecutorRunner):
         id=self._next_uid(),
         transforms=transform_protos,
         pcollections=pcollection_protos,
-        codersyyy=dict(context_proto.coders.items()),
+        coders=dict(context_proto.coders.items()),
         windowing_strategies=dict(context_proto.windowing_strategies.items()),
         environments=dict(context_proto.environments.items()))
     return input_data, side_input_data, runner_sinks, process_bundle_descriptor

http://git-wip-us.apache.org/repos/asf/beam/blob/1f6117ff/sdks/python/apache_beam/runners/worker/sdk_worker.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py
index e1ddfb7..ae86830 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py
@@ -249,8 +249,6 @@ class SdkWorker(object):
   def register(self, request, unused_instruction_id=None):
     for process_bundle_descriptor in request.process_bundle_descriptor:
       self.fns[process_bundle_descriptor.id] = process_bundle_descriptor
-      for p_transform in list(process_bundle_descriptor.primitive_transform):
-        self.fns[p_transform.function_spec.id] = p_transform.function_spec
     return beam_fn_api_pb2.RegisterResponse()
 
   def create_execution_tree(self, descriptor):
@@ -355,7 +353,7 @@ class BeamTransformFactory(object):
     return creator(self, transform_id, transform_proto, parameter, consumers)
 
   def get_coder(self, coder_id):
-    coder_proto = self.descriptor.codersyyy[coder_id]
+    coder_proto = self.descriptor.coders[coder_id]
     if coder_proto.spec.spec.urn:
       return self.context.coders.get_by_id(coder_id)
     else:

http://git-wip-us.apache.org/repos/asf/beam/blob/1f6117ff/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
index 553d5b8..dc72a5f 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
@@ -28,6 +28,7 @@ from concurrent import futures
 import grpc
 
 from apache_beam.portability.api import beam_fn_api_pb2
+from apache_beam.portability.api import beam_runner_api_pb2
 from apache_beam.runners.worker import sdk_worker
 
 
@@ -61,13 +62,12 @@ class BeamFnControlServicer(beam_fn_api_pb2.BeamFnControlServicer):
 class SdkWorkerTest(unittest.TestCase):
 
   def test_fn_registration(self):
-    fns = [beam_fn_api_pb2.FunctionSpec(id=str(ix)) for ix in range(4)]
-
-    process_bundle_descriptors = [beam_fn_api_pb2.ProcessBundleDescriptor(
-        id=str(100+ix),
-        primitive_transform=[
-            beam_fn_api_pb2.PrimitiveTransform(function_spec=fn)])
-                                  for ix, fn in enumerate(fns)]
+    process_bundle_descriptors = [
+        beam_fn_api_pb2.ProcessBundleDescriptor(
+            id=str(100+ix),
+            transforms={
+                str(ix): beam_runner_api_pb2.PTransform(unique_name=str(ix))})
+        for ix in range(4)]
 
     test_controller = BeamFnControlServicer([beam_fn_api_pb2.InstructionRequest(
         register=beam_fn_api_pb2.RegisterRequest(
@@ -83,7 +83,7 @@ class SdkWorkerTest(unittest.TestCase):
     harness.run()
     self.assertEqual(
         harness.worker.fns,
-        {item.id: item for item in fns + process_bundle_descriptors})
+        {item.id: item for item in process_bundle_descriptors})
 
 
 if __name__ == "__main__":


[31/50] [abbrv] beam git commit: Fix misleading comment in TransformHierarchy

Posted by ta...@apache.org.
Fix misleading comment in TransformHierarchy


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/fc06b798
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/fc06b798
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/fc06b798

Branch: refs/heads/DSL_SQL
Commit: fc06b798749144c908a86ba1d2d8addb2af05b16
Parents: 16d4a15
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Jun 12 15:11:49 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:01 2017 -0700

----------------------------------------------------------------------
 .../main/java/org/apache/beam/sdk/runners/TransformHierarchy.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/fc06b798/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/TransformHierarchy.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/TransformHierarchy.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/TransformHierarchy.java
index 9c5f148..6f1ee94 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/TransformHierarchy.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/runners/TransformHierarchy.java
@@ -406,7 +406,7 @@ public class TransformHierarchy {
       return fullName;
     }
 
-    /** Returns the transform input, in unexpanded form. */
+    /** Returns the transform input, in fully expanded form. */
     public Map<TupleTag<?>, PValue> getInputs() {
       return inputs == null ? Collections.<TupleTag<?>, PValue>emptyMap() : inputs;
     }


[46/50] [abbrv] beam git commit: Adds TextIO.readAll(), implemented rather naively

Posted by ta...@apache.org.
Adds TextIO.readAll(), implemented rather naively


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/fcb06f3b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/fcb06f3b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/fcb06f3b

Branch: refs/heads/DSL_SQL
Commit: fcb06f3bf5482dc3ae63a3c070592bae0c631c6d
Parents: 2e42ae4
Author: Eugene Kirpichov <ki...@google.com>
Authored: Fri Jun 23 18:02:10 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:02 2017 -0700

----------------------------------------------------------------------
 ...ndedSplittableProcessElementInvokerTest.java |   2 +-
 .../core/SplittableParDoProcessFnTest.java      |   2 +-
 .../DataflowPipelineTranslatorTest.java         |   2 +-
 .../apache/beam/sdk/io/CompressedSource.java    |  40 ++--
 .../apache/beam/sdk/io/OffsetBasedSource.java   |  22 +-
 .../java/org/apache/beam/sdk/io/TextIO.java     | 230 +++++++++++++++++--
 .../apache/beam/sdk/io/range/OffsetRange.java   | 101 ++++++++
 .../beam/sdk/io/range/OffsetRangeTracker.java   |   3 +
 .../transforms/splittabledofn/OffsetRange.java  |  77 -------
 .../splittabledofn/OffsetRangeTracker.java      |   1 +
 .../java/org/apache/beam/sdk/io/TextIOTest.java |  62 +++--
 .../beam/sdk/transforms/SplittableDoFnTest.java |   2 +-
 .../splittabledofn/OffsetRangeTrackerTest.java  |   1 +
 13 files changed, 387 insertions(+), 158 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/runners/core-java/src/test/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvokerTest.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvokerTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvokerTest.java
index a2f6acc..b80a632 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvokerTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvokerTest.java
@@ -25,10 +25,10 @@ import static org.junit.Assert.assertThat;
 
 import java.util.Collection;
 import java.util.concurrent.Executors;
+import org.apache.beam.sdk.io.range.OffsetRange;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
-import org.apache.beam.sdk.transforms.splittabledofn.OffsetRange;
 import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.transforms.windowing.GlobalWindow;

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoProcessFnTest.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoProcessFnTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoProcessFnTest.java
index 9543de8..1cd1275 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoProcessFnTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoProcessFnTest.java
@@ -39,11 +39,11 @@ import org.apache.beam.sdk.coders.BigEndianIntegerCoder;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.InstantCoder;
 import org.apache.beam.sdk.coders.SerializableCoder;
+import org.apache.beam.sdk.io.range.OffsetRange;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.DoFnTester;
 import org.apache.beam.sdk.transforms.splittabledofn.HasDefaultTracker;
-import org.apache.beam.sdk.transforms.splittabledofn.OffsetRange;
 import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker;
 import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
index 948af1c..43b2788 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
@@ -84,6 +84,7 @@ import org.apache.beam.sdk.extensions.gcp.auth.TestCredential;
 import org.apache.beam.sdk.extensions.gcp.storage.GcsPathValidator;
 import org.apache.beam.sdk.io.FileSystems;
 import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.io.range.OffsetRange;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.options.ValueProvider;
@@ -98,7 +99,6 @@ import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.Sum;
 import org.apache.beam.sdk.transforms.View;
 import org.apache.beam.sdk.transforms.display.DisplayData;
-import org.apache.beam.sdk.transforms.splittabledofn.OffsetRange;
 import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker;
 import org.apache.beam.sdk.transforms.windowing.FixedWindows;
 import org.apache.beam.sdk.transforms.windowing.Window;

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/CompressedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/CompressedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/CompressedSource.java
index 6ab8dec..4baac36 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/CompressedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/CompressedSource.java
@@ -96,12 +96,6 @@ public class CompressedSource<T> extends FileBasedSource<T> {
      */
     ReadableByteChannel createDecompressingChannel(String fileName, ReadableByteChannel channel)
         throws IOException;
-
-    /**
-     * Given a file name, returns true if the file name matches any supported compression
-     * scheme.
-     */
-    boolean isCompressed(String fileName);
   }
 
   /**
@@ -242,6 +236,16 @@ public class CompressedSource<T> extends FileBasedSource<T> {
     @Override
     public abstract ReadableByteChannel createDecompressingChannel(ReadableByteChannel channel)
         throws IOException;
+
+    /** Returns whether the file's extension matches of one of the known compression formats. */
+    public static boolean isCompressed(String filename) {
+      for (CompressionMode type : CompressionMode.values()) {
+        if  (type.matches(filename)) {
+          return true;
+        }
+      }
+      return false;
+    }
   }
 
   /**
@@ -273,16 +277,6 @@ public class CompressedSource<T> extends FileBasedSource<T> {
               ReadableByteChannel.class.getSimpleName(),
               ReadableByteChannel.class.getSimpleName()));
     }
-
-    @Override
-    public boolean isCompressed(String fileName) {
-      for (CompressionMode type : CompressionMode.values()) {
-        if  (type.matches(fileName)) {
-          return true;
-        }
-      }
-      return false;
-    }
   }
 
   private final FileBasedSource<T> sourceDelegate;
@@ -366,13 +360,9 @@ public class CompressedSource<T> extends FileBasedSource<T> {
    */
   @Override
   protected final boolean isSplittable() throws Exception {
-    if (channelFactory instanceof FileNameBasedDecompressingChannelFactory) {
-      FileNameBasedDecompressingChannelFactory fileNameBasedChannelFactory =
-          (FileNameBasedDecompressingChannelFactory) channelFactory;
-      return !fileNameBasedChannelFactory.isCompressed(getFileOrPatternSpec())
-          && sourceDelegate.isSplittable();
-    }
-    return false;
+    return channelFactory instanceof FileNameBasedDecompressingChannelFactory
+        && !CompressionMode.isCompressed(getFileOrPatternSpec())
+        && sourceDelegate.isSplittable();
   }
 
   /**
@@ -386,9 +376,7 @@ public class CompressedSource<T> extends FileBasedSource<T> {
   @Override
   protected final FileBasedReader<T> createSingleFileReader(PipelineOptions options) {
     if (channelFactory instanceof FileNameBasedDecompressingChannelFactory) {
-      FileNameBasedDecompressingChannelFactory fileNameBasedChannelFactory =
-          (FileNameBasedDecompressingChannelFactory) channelFactory;
-      if (!fileNameBasedChannelFactory.isCompressed(getFileOrPatternSpec())) {
+      if (!CompressionMode.isCompressed(getFileOrPatternSpec())) {
         return sourceDelegate.createSingleFileReader(options);
       }
     }

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/OffsetBasedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/OffsetBasedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/OffsetBasedSource.java
index 05f0d97..c3687a9 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/OffsetBasedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/OffsetBasedSource.java
@@ -23,6 +23,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.NoSuchElementException;
+import org.apache.beam.sdk.io.range.OffsetRange;
 import org.apache.beam.sdk.io.range.OffsetRangeTracker;
 import org.apache.beam.sdk.io.range.RangeTracker;
 import org.apache.beam.sdk.options.PipelineOptions;
@@ -110,8 +111,7 @@ public abstract class OffsetBasedSource<T> extends BoundedSource<T> {
   @Override
   public List<? extends OffsetBasedSource<T>> split(
       long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-    // Split the range into bundles based on the desiredBundleSizeBytes. Final bundle is adjusted to
-    // make sure that we do not end up with a too small bundle at the end. If the desired bundle
+    // Split the range into bundles based on the desiredBundleSizeBytes. If the desired bundle
     // size is smaller than the minBundleSize of the source then minBundleSize will be used instead.
 
     long desiredBundleSizeOffsetUnits = Math.max(
@@ -119,20 +119,10 @@ public abstract class OffsetBasedSource<T> extends BoundedSource<T> {
         minBundleSize);
 
     List<OffsetBasedSource<T>> subSources = new ArrayList<>();
-    long start = startOffset;
-    long maxEnd = Math.min(endOffset, getMaxEndOffset(options));
-
-    while (start < maxEnd) {
-      long end = start + desiredBundleSizeOffsetUnits;
-      end = Math.min(end, maxEnd);
-      // Avoid having a too small bundle at the end and ensure that we respect minBundleSize.
-      long remaining = maxEnd - end;
-      if ((remaining < desiredBundleSizeOffsetUnits / 4) || (remaining < minBundleSize)) {
-        end = maxEnd;
-      }
-      subSources.add(createSourceForSubrange(start, end));
-
-      start = end;
+    for (OffsetRange range :
+        new OffsetRange(startOffset, Math.min(endOffset, getMaxEndOffset(options)))
+            .split(desiredBundleSizeOffsetUnits, minBundleSize)) {
+      subSources.add(createSourceForSubrange(range.getFrom(), range.getTo()));
     }
     return subSources;
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
index 5241589..78340f3 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/TextIO.java
@@ -23,25 +23,37 @@ import static com.google.common.base.Preconditions.checkState;
 
 import com.google.auto.value.AutoValue;
 import com.google.common.annotations.VisibleForTesting;
+import java.io.IOException;
+import java.util.concurrent.ThreadLocalRandom;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.annotations.Experimental;
 import org.apache.beam.sdk.annotations.Experimental.Kind;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.VoidCoder;
+import org.apache.beam.sdk.io.CompressedSource.CompressionMode;
 import org.apache.beam.sdk.io.DefaultFilenamePolicy.Params;
 import org.apache.beam.sdk.io.FileBasedSink.DynamicDestinations;
 import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
 import org.apache.beam.sdk.io.FileBasedSink.WritableByteChannelFactory;
 import org.apache.beam.sdk.io.Read.Bounded;
+import org.apache.beam.sdk.io.fs.MatchResult;
+import org.apache.beam.sdk.io.fs.MatchResult.Metadata;
+import org.apache.beam.sdk.io.fs.MatchResult.Status;
 import org.apache.beam.sdk.io.fs.ResourceId;
+import org.apache.beam.sdk.io.range.OffsetRange;
 import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
+import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.Reshuffle;
 import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.SerializableFunctions;
+import org.apache.beam.sdk.transforms.Values;
 import org.apache.beam.sdk.transforms.display.DisplayData;
+import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PBegin;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PDone;
@@ -51,13 +63,14 @@ import org.apache.beam.sdk.values.PDone;
  *
  * <p>To read a {@link PCollection} from one or more text files, use {@code TextIO.read()} to
  * instantiate a transform and use {@link TextIO.Read#from(String)} to specify the path of the
- * file(s) to be read.
+ * file(s) to be read. Alternatively, if the filenames to be read are themselves in a
+ * {@link PCollection}, apply {@link TextIO#readAll()}.
  *
  * <p>{@link TextIO.Read} returns a {@link PCollection} of {@link String Strings}, each
  * corresponding to one line of an input UTF-8 text file (split into lines delimited by '\n', '\r',
  * or '\r\n').
  *
- * <p>Example:
+ * <p>Example 1: reading a file or filepattern.
  *
  * <pre>{@code
  * Pipeline p = ...;
@@ -66,6 +79,19 @@ import org.apache.beam.sdk.values.PDone;
  * PCollection<String> lines = p.apply(TextIO.read().from("/local/path/to/file.txt"));
  * }</pre>
  *
+ * <p>Example 2: reading a PCollection of filenames.
+ *
+ * <pre>{@code
+ * Pipeline p = ...;
+ *
+ * // E.g. the filenames might be computed from other data in the pipeline, or
+ * // read from a data source.
+ * PCollection<String> filenames = ...;
+ *
+ * // Read all files in the collection.
+ * PCollection<String> lines = filenames.apply(TextIO.readAll());
+ * }</pre>
+ *
  * <p>To write a {@link PCollection} to one or more text files, use {@code TextIO.write()}, using
  * {@link TextIO.Write#to(String)} to specify the output prefix of the files to write.
  *
@@ -132,6 +158,26 @@ public class TextIO {
   }
 
   /**
+   * A {@link PTransform} that works like {@link #read}, but reads each file in a {@link
+   * PCollection} of filepatterns.
+   *
+   * <p>Can be applied to both bounded and unbounded {@link PCollection PCollections}, so this is
+   * suitable for reading a {@link PCollection} of filepatterns arriving as a stream. However, every
+   * filepattern is expanded once at the moment it is processed, rather than watched for new files
+   * matching the filepattern to appear. Likewise, every file is read once, rather than watched for
+   * new entries.
+   */
+  public static ReadAll readAll() {
+    return new AutoValue_TextIO_ReadAll.Builder()
+        .setCompressionType(CompressionType.AUTO)
+        // 64MB is a reasonable value that allows to amortize the cost of opening files,
+        // but is not so large as to exhaust a typical runner's maximum amount of output per
+        // ProcessElement call.
+        .setDesiredBundleSizeBytes(64 * 1024 * 1024L)
+        .build();
+  }
+
+  /**
    * A {@link PTransform} that writes a {@link PCollection} to a text file (or multiple text files
    * matching a sharding pattern), with each element of the input collection encoded into its own
    * line.
@@ -228,29 +274,34 @@ public class TextIO {
 
     // Helper to create a source specific to the requested compression type.
     protected FileBasedSource<String> getSource() {
-      switch (getCompressionType()) {
+      return wrapWithCompression(new TextSource(getFilepattern()), getCompressionType());
+    }
+
+    private static FileBasedSource<String> wrapWithCompression(
+        FileBasedSource<String> source, CompressionType compressionType) {
+      switch (compressionType) {
         case UNCOMPRESSED:
-          return new TextSource(getFilepattern());
+          return source;
         case AUTO:
-          return CompressedSource.from(new TextSource(getFilepattern()));
+          return CompressedSource.from(source);
         case BZIP2:
           return
-              CompressedSource.from(new TextSource(getFilepattern()))
-                  .withDecompression(CompressedSource.CompressionMode.BZIP2);
+              CompressedSource.from(source)
+                  .withDecompression(CompressionMode.BZIP2);
         case GZIP:
           return
-              CompressedSource.from(new TextSource(getFilepattern()))
-                  .withDecompression(CompressedSource.CompressionMode.GZIP);
+              CompressedSource.from(source)
+                  .withDecompression(CompressionMode.GZIP);
         case ZIP:
           return
-              CompressedSource.from(new TextSource(getFilepattern()))
-                  .withDecompression(CompressedSource.CompressionMode.ZIP);
+              CompressedSource.from(source)
+                  .withDecompression(CompressionMode.ZIP);
         case DEFLATE:
           return
-              CompressedSource.from(new TextSource(getFilepattern()))
-                  .withDecompression(CompressedSource.CompressionMode.DEFLATE);
+              CompressedSource.from(source)
+                  .withDecompression(CompressionMode.DEFLATE);
         default:
-          throw new IllegalArgumentException("Unknown compression type: " + getFilepattern());
+          throw new IllegalArgumentException("Unknown compression type: " + compressionType);
       }
     }
 
@@ -273,7 +324,156 @@ public class TextIO {
     }
   }
 
-  // ///////////////////////////////////////////////////////////////////////////
+  /////////////////////////////////////////////////////////////////////////////
+
+  /** Implementation of {@link #readAll}. */
+  @AutoValue
+  public abstract static class ReadAll
+      extends PTransform<PCollection<String>, PCollection<String>> {
+    abstract CompressionType getCompressionType();
+    abstract long getDesiredBundleSizeBytes();
+
+    abstract Builder toBuilder();
+
+    @AutoValue.Builder
+    abstract static class Builder {
+      abstract Builder setCompressionType(CompressionType compressionType);
+      abstract Builder setDesiredBundleSizeBytes(long desiredBundleSizeBytes);
+
+      abstract ReadAll build();
+    }
+
+    /** Same as {@link Read#withCompressionType(CompressionType)}. */
+    public ReadAll withCompressionType(CompressionType compressionType) {
+      return toBuilder().setCompressionType(compressionType).build();
+    }
+
+    @VisibleForTesting
+    ReadAll withDesiredBundleSizeBytes(long desiredBundleSizeBytes) {
+      return toBuilder().setDesiredBundleSizeBytes(desiredBundleSizeBytes).build();
+    }
+
+    @Override
+    public PCollection<String> expand(PCollection<String> input) {
+      return input
+          .apply("Expand glob", ParDo.of(new ExpandGlobFn()))
+          .apply(
+              "Split into ranges",
+              ParDo.of(new SplitIntoRangesFn(getCompressionType(), getDesiredBundleSizeBytes())))
+          .apply("Reshuffle", new ReshuffleWithUniqueKey<KV<Metadata, OffsetRange>>())
+          .apply("Read", ParDo.of(new ReadTextFn(this)));
+    }
+
+    private static class ReshuffleWithUniqueKey<T>
+        extends PTransform<PCollection<T>, PCollection<T>> {
+      @Override
+      public PCollection<T> expand(PCollection<T> input) {
+        return input
+            .apply("Unique key", ParDo.of(new AssignUniqueKeyFn<T>()))
+            .apply("Reshuffle", Reshuffle.<Integer, T>of())
+            .apply("Values", Values.<T>create());
+      }
+    }
+
+    private static class AssignUniqueKeyFn<T> extends DoFn<T, KV<Integer, T>> {
+      private int index;
+
+      @Setup
+      public void setup() {
+        this.index = ThreadLocalRandom.current().nextInt();
+      }
+
+      @ProcessElement
+      public void process(ProcessContext c) {
+        c.output(KV.of(++index, c.element()));
+      }
+    }
+
+    private static class ExpandGlobFn extends DoFn<String, Metadata> {
+      @ProcessElement
+      public void process(ProcessContext c) throws Exception {
+        MatchResult match = FileSystems.match(c.element());
+        checkArgument(
+            match.status().equals(Status.OK),
+            "Failed to match filepattern %s: %s",
+            c.element(),
+            match.status());
+        for (Metadata metadata : match.metadata()) {
+          c.output(metadata);
+        }
+      }
+    }
+
+    private static class SplitIntoRangesFn extends DoFn<Metadata, KV<Metadata, OffsetRange>> {
+      private final CompressionType compressionType;
+      private final long desiredBundleSize;
+
+      private SplitIntoRangesFn(CompressionType compressionType, long desiredBundleSize) {
+        this.compressionType = compressionType;
+        this.desiredBundleSize = desiredBundleSize;
+      }
+
+      @ProcessElement
+      public void process(ProcessContext c) {
+        Metadata metadata = c.element();
+        final boolean isSplittable = isSplittable(metadata, compressionType);
+        if (!isSplittable) {
+          c.output(KV.of(metadata, new OffsetRange(0, metadata.sizeBytes())));
+          return;
+        }
+        for (OffsetRange range :
+            new OffsetRange(0, metadata.sizeBytes()).split(desiredBundleSize, 0)) {
+          c.output(KV.of(metadata, range));
+        }
+      }
+
+      static boolean isSplittable(Metadata metadata, CompressionType compressionType) {
+        if (!metadata.isReadSeekEfficient()) {
+          return false;
+        }
+        switch (compressionType) {
+          case AUTO:
+            return !CompressionMode.isCompressed(metadata.resourceId().toString());
+          case UNCOMPRESSED:
+            return true;
+          case GZIP:
+          case BZIP2:
+          case ZIP:
+          case DEFLATE:
+            return false;
+          default:
+            throw new UnsupportedOperationException("Unknown compression type: " + compressionType);
+        }
+      }
+    }
+
+    private static class ReadTextFn extends DoFn<KV<Metadata, OffsetRange>, String> {
+      private final TextIO.ReadAll spec;
+
+      private ReadTextFn(ReadAll spec) {
+        this.spec = spec;
+      }
+
+      @ProcessElement
+      public void process(ProcessContext c) throws IOException {
+        Metadata metadata = c.element().getKey();
+        OffsetRange range = c.element().getValue();
+        FileBasedSource<String> source =
+            TextIO.Read.wrapWithCompression(
+                new TextSource(StaticValueProvider.of(metadata.toString())),
+                spec.getCompressionType());
+        BoundedSource.BoundedReader<String> reader =
+            source
+                .createForSubrangeOfFile(metadata, range.getFrom(), range.getTo())
+                .createReader(c.getPipelineOptions());
+        for (boolean more = reader.start(); more; more = reader.advance()) {
+          c.output(reader.getCurrent());
+        }
+      }
+    }
+  }
+
+  /////////////////////////////////////////////////////////////////////////////
 
   /** Implementation of {@link #write}. */
   @AutoValue

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/OffsetRange.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/OffsetRange.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/OffsetRange.java
new file mode 100644
index 0000000..d3bff37
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/OffsetRange.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.range;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.sdk.transforms.splittabledofn.HasDefaultTracker;
+
+/** A restriction represented by a range of integers [from, to). */
+public class OffsetRange
+    implements Serializable,
+    HasDefaultTracker<
+                OffsetRange, org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker> {
+  private final long from;
+  private final long to;
+
+  public OffsetRange(long from, long to) {
+    checkArgument(from <= to, "Malformed range [%s, %s)", from, to);
+    this.from = from;
+    this.to = to;
+  }
+
+  public long getFrom() {
+    return from;
+  }
+
+  public long getTo() {
+    return to;
+  }
+
+  @Override
+  public org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker newTracker() {
+    return new org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker(this);
+  }
+
+  @Override
+  public String toString() {
+    return "[" + from + ", " + to + ')';
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+
+    OffsetRange that = (OffsetRange) o;
+
+    if (from != that.from) {
+      return false;
+    }
+    return to == that.to;
+  }
+
+  @Override
+  public int hashCode() {
+    int result = (int) (from ^ (from >>> 32));
+    result = 31 * result + (int) (to ^ (to >>> 32));
+    return result;
+  }
+
+  public List<OffsetRange> split(long desiredNumOffsetsPerSplit, long minNumOffsetPerSplit) {
+    List<OffsetRange> res = new ArrayList<>();
+    long start = getFrom();
+    long maxEnd = getTo();
+
+    while (start < maxEnd) {
+      long end = start + desiredNumOffsetsPerSplit;
+      end = Math.min(end, maxEnd);
+      // Avoid having a too small range at the end and ensure that we respect minNumOffsetPerSplit.
+      long remaining = maxEnd - end;
+      if ((remaining < desiredNumOffsetsPerSplit / 4) || (remaining < minNumOffsetPerSplit)) {
+        end = maxEnd;
+      }
+      res.add(new OffsetRange(start, end));
+      start = end;
+    }
+    return res;
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/OffsetRangeTracker.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/OffsetRangeTracker.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/OffsetRangeTracker.java
index 51e2b1a..8f0083e 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/OffsetRangeTracker.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/range/OffsetRangeTracker.java
@@ -26,6 +26,9 @@ import org.slf4j.LoggerFactory;
 
 /**
  * A {@link RangeTracker} for non-negative positions of type {@code long}.
+ *
+ * <p>Not to be confused with {@link
+ * org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker}.
  */
 public class OffsetRangeTracker implements RangeTracker<Long> {
   private static final Logger LOG = LoggerFactory.getLogger(OffsetRangeTracker.class);

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRange.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRange.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRange.java
deleted file mode 100644
index 104f5f2..0000000
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRange.java
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.sdk.transforms.splittabledofn;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import java.io.Serializable;
-
-/** A restriction represented by a range of integers [from, to). */
-public class OffsetRange
-    implements Serializable, HasDefaultTracker<OffsetRange, OffsetRangeTracker> {
-  private final long from;
-  private final long to;
-
-  public OffsetRange(long from, long to) {
-    checkArgument(from <= to, "Malformed range [%s, %s)", from, to);
-    this.from = from;
-    this.to = to;
-  }
-
-  public long getFrom() {
-    return from;
-  }
-
-  public long getTo() {
-    return to;
-  }
-
-  @Override
-  public OffsetRangeTracker newTracker() {
-    return new OffsetRangeTracker(this);
-  }
-
-  @Override
-  public String toString() {
-    return "[" + from + ", " + to + ')';
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-    if (o == null || getClass() != o.getClass()) {
-      return false;
-    }
-
-    OffsetRange that = (OffsetRange) o;
-
-    if (from != that.from) {
-      return false;
-    }
-    return to == that.to;
-  }
-
-  @Override
-  public int hashCode() {
-    int result = (int) (from ^ (from >>> 32));
-    result = 31 * result + (int) (to ^ (to >>> 32));
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTracker.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTracker.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTracker.java
index 0271a0d..62c10a7 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTracker.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTracker.java
@@ -21,6 +21,7 @@ import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkState;
 
+import org.apache.beam.sdk.io.range.OffsetRange;
 import org.apache.beam.sdk.transforms.DoFn;
 
 /**

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
index 8797ff7..a6be4fb 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
@@ -120,10 +120,10 @@ import org.junit.runners.JUnit4;
 public class TextIOTest {
   private static final String MY_HEADER = "myHeader";
   private static final String MY_FOOTER = "myFooter";
-  private static final String[] EMPTY = new String[] {};
-  private static final String[] TINY =
-      new String[] {"Irritable eagle", "Optimistic jay", "Fanciful hawk"};
-  private static final String[] LARGE = makeLines(1000);
+  private static final List<String> EMPTY = Collections.emptyList();
+  private static final List<String> TINY =
+      Arrays.asList("Irritable eagle", "Optimistic jay", "Fanciful hawk");
+  private static final List<String> LARGE = makeLines(1000);
 
   private static Path tempFolder;
   private static File emptyTxt;
@@ -148,7 +148,7 @@ public class TextIOTest {
   @Rule
   public ExpectedException expectedException = ExpectedException.none();
 
-  private static File writeToFile(String[] lines, String filename, CompressionType compression)
+  private static File writeToFile(List<String> lines, String filename, CompressionType compression)
       throws IOException {
     File file = tempFolder.resolve(filename).toFile();
     OutputStream output = new FileOutputStream(file);
@@ -791,7 +791,7 @@ public class TextIOTest {
    * Helper that writes the given lines (adding a newline in between) to a stream, then closes the
    * stream.
    */
-  private static void writeToStreamAndClose(String[] lines, OutputStream outputStream) {
+  private static void writeToStreamAndClose(List<String> lines, OutputStream outputStream) {
     try (PrintStream writer = new PrintStream(outputStream)) {
       for (String line : lines) {
         writer.println(line);
@@ -800,27 +800,33 @@ public class TextIOTest {
   }
 
   /**
-   * Helper method that runs TextIO.read().from(filename).withCompressionType(compressionType)
+   * Helper method that runs TextIO.read().from(filename).withCompressionType(compressionType) and
+   * TextIO.readAll().withCompressionType(compressionType) applied to the single filename,
    * and asserts that the results match the given expected output.
    */
   private void assertReadingCompressedFileMatchesExpected(
-      File file, CompressionType compressionType, String[] expected) {
-
-    TextIO.Read read =
-        TextIO.read().from(file.getPath()).withCompressionType(compressionType);
-    PCollection<String> output = p.apply("Read_" + file + "_" + compressionType.toString(), read);
-
-    PAssert.that(output).containsInAnyOrder(expected);
+      File file, CompressionType compressionType, List<String> expected) {
+
+    TextIO.Read read = TextIO.read().from(file.getPath()).withCompressionType(compressionType);
+    PAssert.that(p.apply("Read_" + file + "_" + compressionType.toString(), read))
+        .containsInAnyOrder(expected);
+
+    TextIO.ReadAll readAll =
+        TextIO.readAll().withCompressionType(compressionType).withDesiredBundleSizeBytes(10);
+    PAssert.that(
+            p.apply("Create_" + file, Create.of(file.getPath()))
+                .apply("Read_" + compressionType.toString(), readAll))
+        .containsInAnyOrder(expected);
     p.run();
   }
 
   /**
    * Helper to make an array of compressible strings. Returns ["word"i] for i in range(0,n).
    */
-  private static String[] makeLines(int n) {
-    String[] ret = new String[n];
+  private static List<String> makeLines(int n) {
+    List<String> ret = new ArrayList<>();
     for (int i = 0; i < n; ++i) {
-      ret[i] = "word" + i;
+      ret.add("word" + i);
     }
     return ret;
   }
@@ -1004,7 +1010,7 @@ public class TextIOTest {
 
     String filename = createZipFile(expected, "multiple entries", entry0, entry1, entry2);
     assertReadingCompressedFileMatchesExpected(
-        new File(filename), CompressionType.ZIP, expected.toArray(new String[]{}));
+        new File(filename), CompressionType.ZIP, expected);
   }
 
   /**
@@ -1023,7 +1029,7 @@ public class TextIOTest {
         new String[]{"dog"});
 
     assertReadingCompressedFileMatchesExpected(
-        new File(filename), CompressionType.ZIP, new String[] {"cat", "dog"});
+        new File(filename), CompressionType.ZIP, Arrays.asList("cat", "dog"));
   }
 
   @Test
@@ -1340,5 +1346,21 @@ public class TextIOTest {
     SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
   }
 
-}
 
+  @Test
+  @Category(NeedsRunner.class)
+  public void testReadAll() throws IOException {
+    writeToFile(TINY, "readAllTiny1.zip", ZIP);
+    writeToFile(TINY, "readAllTiny2.zip", ZIP);
+    writeToFile(LARGE, "readAllLarge1.zip", ZIP);
+    writeToFile(LARGE, "readAllLarge2.zip", ZIP);
+    PCollection<String> lines =
+        p.apply(
+                Create.of(
+                    tempFolder.resolve("readAllTiny*").toString(),
+                    tempFolder.resolve("readAllLarge*").toString()))
+            .apply(TextIO.readAll().withCompressionType(AUTO));
+    PAssert.that(lines).containsInAnyOrder(Iterables.concat(TINY, TINY, LARGE, LARGE));
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
index 0c2bd1c..cb60f9a 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
@@ -34,6 +34,7 @@ import org.apache.beam.sdk.coders.BigEndianIntegerCoder;
 import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.VarIntCoder;
+import org.apache.beam.sdk.io.range.OffsetRange;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.StreamingOptions;
 import org.apache.beam.sdk.testing.PAssert;
@@ -44,7 +45,6 @@ import org.apache.beam.sdk.testing.UsesSplittableParDoWithWindowedSideInputs;
 import org.apache.beam.sdk.testing.UsesTestStream;
 import org.apache.beam.sdk.testing.ValidatesRunner;
 import org.apache.beam.sdk.transforms.DoFn.BoundedPerElement;
-import org.apache.beam.sdk.transforms.splittabledofn.OffsetRange;
 import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker;
 import org.apache.beam.sdk.transforms.windowing.FixedWindows;
 import org.apache.beam.sdk.transforms.windowing.IntervalWindow;

http://git-wip-us.apache.org/repos/asf/beam/blob/fcb06f3b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTrackerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTrackerTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTrackerTest.java
index 831894c..8aed6b9 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTrackerTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTrackerTest.java
@@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import org.apache.beam.sdk.io.range.OffsetRange;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;


[40/50] [abbrv] beam git commit: [BEAM-2447] Reintroduces DoFn.ProcessContinuation

Posted by ta...@apache.org.
[BEAM-2447] Reintroduces DoFn.ProcessContinuation


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4f7f1699
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4f7f1699
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4f7f1699

Branch: refs/heads/DSL_SQL
Commit: 4f7f16990a8fc49a9b6ae199809f0ada7dc7448d
Parents: bd2a8cc
Author: Eugene Kirpichov <ki...@google.com>
Authored: Tue Jun 13 16:50:35 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:02 2017 -0700

----------------------------------------------------------------------
 .../core/construction/SplittableParDoTest.java  |  10 +-
 ...eBoundedSplittableProcessElementInvoker.java |  35 ++++++-
 .../core/SplittableParDoViaKeyedWorkItems.java  |   9 +-
 .../core/SplittableProcessElementInvoker.java   |  25 ++++-
 ...ndedSplittableProcessElementInvokerTest.java |  45 +++++++--
 .../core/SplittableParDoProcessFnTest.java      |  99 ++++++++++++++++--
 .../org/apache/beam/sdk/transforms/DoFn.java    |  51 +++++++++-
 .../reflect/ByteBuddyDoFnInvokerFactory.java    |  19 +++-
 .../sdk/transforms/reflect/DoFnInvoker.java     |   4 +-
 .../sdk/transforms/reflect/DoFnSignature.java   |  10 +-
 .../sdk/transforms/reflect/DoFnSignatures.java  |  22 +++-
 .../splittabledofn/OffsetRangeTracker.java      |  10 ++
 .../splittabledofn/RestrictionTracker.java      |  11 +-
 .../beam/sdk/transforms/SplittableDoFnTest.java | 100 ++++++++-----------
 .../transforms/reflect/DoFnInvokersTest.java    |  93 +++++++++++++----
 .../DoFnSignaturesProcessElementTest.java       |   2 +-
 .../DoFnSignaturesSplittableDoFnTest.java       |  83 +++++++++++++--
 17 files changed, 487 insertions(+), 141 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/SplittableParDoTest.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/SplittableParDoTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/SplittableParDoTest.java
index f4c596e..267232c 100644
--- a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/SplittableParDoTest.java
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/SplittableParDoTest.java
@@ -17,6 +17,7 @@
  */
 package org.apache.beam.runners.core.construction;
 
+import static org.apache.beam.sdk.transforms.DoFn.ProcessContinuation.stop;
 import static org.junit.Assert.assertEquals;
 
 import java.io.Serializable;
@@ -24,8 +25,6 @@ import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.transforms.Create;
 import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.DoFn.BoundedPerElement;
-import org.apache.beam.sdk.transforms.DoFn.UnboundedPerElement;
 import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.splittabledofn.HasDefaultTracker;
 import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
@@ -70,7 +69,6 @@ public class SplittableParDoTest {
     public void checkDone() {}
   }
 
-  @BoundedPerElement
   private static class BoundedFakeFn extends DoFn<Integer, String> {
     @ProcessElement
     public void processElement(ProcessContext context, SomeRestrictionTracker tracker) {}
@@ -81,10 +79,12 @@ public class SplittableParDoTest {
     }
   }
 
-  @UnboundedPerElement
   private static class UnboundedFakeFn extends DoFn<Integer, String> {
     @ProcessElement
-    public void processElement(ProcessContext context, SomeRestrictionTracker tracker) {}
+    public ProcessContinuation processElement(
+        ProcessContext context, SomeRestrictionTracker tracker) {
+      return stop();
+    }
 
     @GetInitialRestriction
     public SomeRestriction getInitialRestriction(Integer element) {

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/runners/core-java/src/main/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvoker.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvoker.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvoker.java
index 475abf2..0c956d5 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvoker.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvoker.java
@@ -96,7 +96,7 @@ public class OutputAndTimeBoundedSplittableProcessElementInvoker<
       final WindowedValue<InputT> element,
       final TrackerT tracker) {
     final ProcessContext processContext = new ProcessContext(element, tracker);
-    invoker.invokeProcessElement(
+    DoFn.ProcessContinuation cont = invoker.invokeProcessElement(
         new DoFnInvoker.ArgumentProvider<InputT, OutputT>() {
           @Override
           public DoFn<InputT, OutputT>.ProcessContext processContext(
@@ -155,10 +155,37 @@ public class OutputAndTimeBoundedSplittableProcessElementInvoker<
                 "Access to timers not supported in Splittable DoFn");
           }
         });
-
+    // TODO: verify that if there was a failed tryClaim() call, then cont.shouldResume() is false.
+    // Currently we can't verify this because there are no hooks into tryClaim().
+    // See https://issues.apache.org/jira/browse/BEAM-2607
+    RestrictionT residual = processContext.extractCheckpoint();
+    if (cont.shouldResume()) {
+      if (residual == null) {
+        // No checkpoint had been taken by the runner while the ProcessElement call ran, however
+        // the call says that not the whole restriction has been processed. So we need to take
+        // a checkpoint now: checkpoint() guarantees that the primary restriction describes exactly
+        // the work that was done in the current ProcessElement call, and returns a residual
+        // restriction that describes exactly the work that wasn't done in the current call.
+        residual = tracker.checkpoint();
+      } else {
+        // A checkpoint was taken by the runner, and then the ProcessElement call returned resume()
+        // without making more tryClaim() calls (since no tryClaim() calls can succeed after
+        // checkpoint(), and since if it had made a failed tryClaim() call, it should have returned
+        // stop()).
+        // This means that the resulting primary restriction and the taken checkpoint already
+        // accurately describe respectively the work that was and wasn't done in the current
+        // ProcessElement call.
+        // In other words, if we took a checkpoint *after* ProcessElement completed (like in the
+        // branch above), it would have been equivalent to this one.
+      }
+    } else {
+      // The ProcessElement call returned stop() - that means the tracker's current restriction
+      // has been fully processed by the call. A checkpoint may or may not have been taken in
+      // "residual"; if it was, then we'll need to process it; if no, then we don't - nothing
+      // special needs to be done.
+    }
     tracker.checkDone();
-    return new Result(
-        processContext.extractCheckpoint(), processContext.getLastReportedWatermark());
+    return new Result(residual, cont, processContext.getLastReportedWatermark());
   }
 
   private class ProcessContext extends DoFn<InputT, OutputT>.ProcessContext {

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDoViaKeyedWorkItems.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDoViaKeyedWorkItems.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDoViaKeyedWorkItems.java
index 09f3b15..6e97645 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDoViaKeyedWorkItems.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDoViaKeyedWorkItems.java
@@ -200,8 +200,8 @@ public class SplittableParDoViaKeyedWorkItems {
     /**
      * The state cell containing a watermark hold for the output of this {@link DoFn}. The hold is
      * acquired during the first {@link DoFn.ProcessElement} call for each element and restriction,
-     * and is released when the {@link DoFn.ProcessElement} call returns and there is no residual
-     * restriction captured by the {@link SplittableProcessElementInvoker}.
+     * and is released when the {@link DoFn.ProcessElement} call returns {@link
+     * ProcessContinuation#stop()}.
      *
      * <p>A hold is needed to avoid letting the output watermark immediately progress together with
      * the input watermark when the first {@link DoFn.ProcessElement} call for this element
@@ -365,11 +365,12 @@ public class SplittableParDoViaKeyedWorkItems {
       if (futureOutputWatermark == null) {
         futureOutputWatermark = elementAndRestriction.getKey().getTimestamp();
       }
+      Instant wakeupTime =
+          timerInternals.currentProcessingTime().plus(result.getContinuation().resumeDelay());
       holdState.add(futureOutputWatermark);
       // Set a timer to continue processing this element.
       timerInternals.setTimer(
-          TimerInternals.TimerData.of(
-              stateNamespace, timerInternals.currentProcessingTime(), TimeDomain.PROCESSING_TIME));
+          TimerInternals.TimerData.of(stateNamespace, wakeupTime, TimeDomain.PROCESSING_TIME));
     }
 
     private DoFn<InputT, OutputT>.StartBundleContext wrapContextAsStartBundle(

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableProcessElementInvoker.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableProcessElementInvoker.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableProcessElementInvoker.java
index ced6c01..7732df3 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableProcessElementInvoker.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableProcessElementInvoker.java
@@ -17,6 +17,8 @@
  */
 package org.apache.beam.runners.core;
 
+import static com.google.common.base.Preconditions.checkNotNull;
+
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
@@ -34,20 +36,35 @@ public abstract class SplittableProcessElementInvoker<
   public class Result {
     @Nullable
     private final RestrictionT residualRestriction;
+    private final DoFn.ProcessContinuation continuation;
     private final Instant futureOutputWatermark;
 
     public Result(
-        @Nullable RestrictionT residualRestriction, Instant futureOutputWatermark) {
+        @Nullable RestrictionT residualRestriction,
+        DoFn.ProcessContinuation continuation,
+        Instant futureOutputWatermark) {
+      this.continuation = checkNotNull(continuation);
+      if (continuation.shouldResume()) {
+        checkNotNull(residualRestriction);
+      }
       this.residualRestriction = residualRestriction;
       this.futureOutputWatermark = futureOutputWatermark;
     }
 
-    /** If {@code null}, means the call should not resume. */
+    /**
+     * Can be {@code null} only if {@link #getContinuation} specifies the call should not resume.
+     * However, the converse is not true: this can be non-null even if {@link #getContinuation}
+     * is {@link DoFn.ProcessContinuation#stop()}.
+     */
     @Nullable
     public RestrictionT getResidualRestriction() {
       return residualRestriction;
     }
 
+    public DoFn.ProcessContinuation getContinuation() {
+      return continuation;
+    }
+
     public Instant getFutureOutputWatermark() {
       return futureOutputWatermark;
     }
@@ -57,8 +74,8 @@ public abstract class SplittableProcessElementInvoker<
    * Invokes the {@link DoFn.ProcessElement} method using the given {@link DoFnInvoker} for the
    * original {@link DoFn}, on the given element and with the given {@link RestrictionTracker}.
    *
-   * @return Information on how to resume the call: residual restriction and a
-   * future output watermark.
+   * @return Information on how to resume the call: residual restriction, a {@link
+   *     DoFn.ProcessContinuation}, and a future output watermark.
    */
   public abstract Result invokeProcessElement(
       DoFnInvoker<InputT, OutputT> invoker, WindowedValue<InputT> element, TrackerT tracker);

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/runners/core-java/src/test/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvokerTest.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvokerTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvokerTest.java
index b80a632..959909e 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvokerTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/OutputAndTimeBoundedSplittableProcessElementInvokerTest.java
@@ -17,11 +17,15 @@
  */
 package org.apache.beam.runners.core;
 
+import static org.apache.beam.sdk.transforms.DoFn.ProcessContinuation.resume;
+import static org.apache.beam.sdk.transforms.DoFn.ProcessContinuation.stop;
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.lessThan;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
 
 import java.util.Collection;
 import java.util.concurrent.Executors;
@@ -42,19 +46,27 @@ import org.junit.Test;
 /** Tests for {@link OutputAndTimeBoundedSplittableProcessElementInvoker}. */
 public class OutputAndTimeBoundedSplittableProcessElementInvokerTest {
   private static class SomeFn extends DoFn<Integer, String> {
+    private final int numOutputsPerProcessCall;
     private final Duration sleepBeforeEachOutput;
 
-    private SomeFn(Duration sleepBeforeEachOutput) {
+    private SomeFn(int numOutputsPerProcessCall, Duration sleepBeforeEachOutput) {
+      this.numOutputsPerProcessCall = numOutputsPerProcessCall;
       this.sleepBeforeEachOutput = sleepBeforeEachOutput;
     }
 
     @ProcessElement
-    public void process(ProcessContext context, OffsetRangeTracker tracker)
+    public ProcessContinuation process(ProcessContext context, OffsetRangeTracker tracker)
         throws Exception {
-      for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) {
+      for (long i = tracker.currentRestriction().getFrom(), numIterations = 1;
+          tracker.tryClaim(i);
+          ++i, ++numIterations) {
         Thread.sleep(sleepBeforeEachOutput.getMillis());
         context.output("" + i);
+        if (numIterations == numOutputsPerProcessCall) {
+          return resume();
+        }
       }
+      return stop();
     }
 
     @GetInitialRestriction
@@ -64,8 +76,8 @@ public class OutputAndTimeBoundedSplittableProcessElementInvokerTest {
   }
 
   private SplittableProcessElementInvoker<Integer, String, OffsetRange, OffsetRangeTracker>.Result
-      runTest(int count, Duration sleepPerElement) {
-    SomeFn fn = new SomeFn(sleepPerElement);
+      runTest(int totalNumOutputs, int numOutputsPerProcessCall, Duration sleepPerElement) {
+    SomeFn fn = new SomeFn(numOutputsPerProcessCall, sleepPerElement);
     SplittableProcessElementInvoker<Integer, String, OffsetRange, OffsetRangeTracker> invoker =
         new OutputAndTimeBoundedSplittableProcessElementInvoker<>(
             fn,
@@ -93,14 +105,15 @@ public class OutputAndTimeBoundedSplittableProcessElementInvokerTest {
 
     return invoker.invokeProcessElement(
         DoFnInvokers.invokerFor(fn),
-        WindowedValue.of(count, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING),
-        new OffsetRangeTracker(new OffsetRange(0, count)));
+        WindowedValue.of(totalNumOutputs, Instant.now(), GlobalWindow.INSTANCE, PaneInfo.NO_FIRING),
+        new OffsetRangeTracker(new OffsetRange(0, totalNumOutputs)));
   }
 
   @Test
   public void testInvokeProcessElementOutputBounded() throws Exception {
     SplittableProcessElementInvoker<Integer, String, OffsetRange, OffsetRangeTracker>.Result res =
-        runTest(10000, Duration.ZERO);
+        runTest(10000, Integer.MAX_VALUE, Duration.ZERO);
+    assertFalse(res.getContinuation().shouldResume());
     OffsetRange residualRange = res.getResidualRestriction();
     // Should process the first 100 elements.
     assertEquals(1000, residualRange.getFrom());
@@ -110,7 +123,8 @@ public class OutputAndTimeBoundedSplittableProcessElementInvokerTest {
   @Test
   public void testInvokeProcessElementTimeBounded() throws Exception {
     SplittableProcessElementInvoker<Integer, String, OffsetRange, OffsetRangeTracker>.Result res =
-        runTest(10000, Duration.millis(100));
+        runTest(10000, Integer.MAX_VALUE, Duration.millis(100));
+    assertFalse(res.getContinuation().shouldResume());
     OffsetRange residualRange = res.getResidualRestriction();
     // Should process ideally around 30 elements - but due to timing flakiness, we can't enforce
     // that precisely. Just test that it's not egregiously off.
@@ -120,9 +134,18 @@ public class OutputAndTimeBoundedSplittableProcessElementInvokerTest {
   }
 
   @Test
-  public void testInvokeProcessElementVoluntaryReturn() throws Exception {
+  public void testInvokeProcessElementVoluntaryReturnStop() throws Exception {
     SplittableProcessElementInvoker<Integer, String, OffsetRange, OffsetRangeTracker>.Result res =
-        runTest(5, Duration.millis(100));
+        runTest(5, Integer.MAX_VALUE, Duration.millis(100));
+    assertFalse(res.getContinuation().shouldResume());
     assertNull(res.getResidualRestriction());
   }
+
+  @Test
+  public void testInvokeProcessElementVoluntaryReturnResume() throws Exception {
+    SplittableProcessElementInvoker<Integer, String, OffsetRange, OffsetRangeTracker>.Result res =
+        runTest(10, 5, Duration.millis(100));
+    assertTrue(res.getContinuation().shouldResume());
+    assertEquals(new OffsetRange(5, 10), res.getResidualRestriction());
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoProcessFnTest.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoProcessFnTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoProcessFnTest.java
index 1cd1275..7449af3 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoProcessFnTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoProcessFnTest.java
@@ -17,6 +17,9 @@
  */
 package org.apache.beam.runners.core;
 
+import static org.apache.beam.sdk.transforms.DoFn.ProcessContinuation.resume;
+import static org.apache.beam.sdk.transforms.DoFn.ProcessContinuation.stop;
+import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.greaterThanOrEqualTo;
 import static org.hamcrest.Matchers.hasItem;
 import static org.hamcrest.Matchers.hasItems;
@@ -365,16 +368,71 @@ public class SplittableParDoProcessFnTest {
     assertEquals(null, tester.getWatermarkHold());
   }
 
-  /**
-   * A splittable {@link DoFn} that generates the sequence [init, init + total).
-   */
+  /** A simple splittable {@link DoFn} that outputs the given element every 5 seconds forever. */
+  private static class SelfInitiatedResumeFn extends DoFn<Integer, String> {
+    @ProcessElement
+    public ProcessContinuation process(ProcessContext c, SomeRestrictionTracker tracker) {
+      c.output(c.element().toString());
+      return resume().withResumeDelay(Duration.standardSeconds(5));
+    }
+
+    @GetInitialRestriction
+    public SomeRestriction getInitialRestriction(Integer elem) {
+      return new SomeRestriction();
+    }
+  }
+
+  @Test
+  public void testResumeSetsTimer() throws Exception {
+    DoFn<Integer, String> fn = new SelfInitiatedResumeFn();
+    Instant base = Instant.now();
+    ProcessFnTester<Integer, String, SomeRestriction, SomeRestrictionTracker> tester =
+        new ProcessFnTester<>(
+            base,
+            fn,
+            BigEndianIntegerCoder.of(),
+            SerializableCoder.of(SomeRestriction.class),
+            MAX_OUTPUTS_PER_BUNDLE,
+            MAX_BUNDLE_DURATION);
+
+    tester.startElement(42, new SomeRestriction());
+    assertThat(tester.takeOutputElements(), contains("42"));
+
+    // Should resume after 5 seconds: advancing by 3 seconds should have no effect.
+    assertFalse(tester.advanceProcessingTimeBy(Duration.standardSeconds(3)));
+    assertTrue(tester.takeOutputElements().isEmpty());
+
+    // 6 seconds should be enough  should invoke the fn again.
+    assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(3)));
+    assertThat(tester.takeOutputElements(), contains("42"));
+
+    // Should again resume after 5 seconds: advancing by 3 seconds should again have no effect.
+    assertFalse(tester.advanceProcessingTimeBy(Duration.standardSeconds(3)));
+    assertTrue(tester.takeOutputElements().isEmpty());
+
+    // 6 seconds should again be enough.
+    assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(3)));
+    assertThat(tester.takeOutputElements(), contains("42"));
+  }
+
+  /** A splittable {@link DoFn} that generates the sequence [init, init + total). */
   private static class CounterFn extends DoFn<Integer, String> {
+    private final int numOutputsPerCall;
+
+    public CounterFn(int numOutputsPerCall) {
+      this.numOutputsPerCall = numOutputsPerCall;
+    }
+
     @ProcessElement
-    public void process(ProcessContext c, OffsetRangeTracker tracker) {
-      for (long i = tracker.currentRestriction().getFrom();
-          tracker.tryClaim(i); ++i) {
+    public ProcessContinuation process(ProcessContext c, OffsetRangeTracker tracker) {
+      for (long i = tracker.currentRestriction().getFrom(), numIterations = 0;
+          tracker.tryClaim(i); ++i, ++numIterations) {
         c.output(String.valueOf(c.element() + i));
+        if (numIterations == numOutputsPerCall) {
+          return resume();
+        }
       }
+      return stop();
     }
 
     @GetInitialRestriction
@@ -383,10 +441,35 @@ public class SplittableParDoProcessFnTest {
     }
   }
 
+  public void testResumeCarriesOverState() throws Exception {
+    DoFn<Integer, String> fn = new CounterFn(1);
+    Instant base = Instant.now();
+    ProcessFnTester<Integer, String, OffsetRange, OffsetRangeTracker> tester =
+        new ProcessFnTester<>(
+            base,
+            fn,
+            BigEndianIntegerCoder.of(),
+            SerializableCoder.of(OffsetRange.class),
+            MAX_OUTPUTS_PER_BUNDLE,
+            MAX_BUNDLE_DURATION);
+
+    tester.startElement(42, new OffsetRange(0, 3));
+    assertThat(tester.takeOutputElements(), contains("42"));
+    assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(1)));
+    assertThat(tester.takeOutputElements(), contains("43"));
+    assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(1)));
+    assertThat(tester.takeOutputElements(), contains("44"));
+    assertTrue(tester.advanceProcessingTimeBy(Duration.standardSeconds(1)));
+    // After outputting all 3 items, should not output anything more.
+    assertEquals(0, tester.takeOutputElements().size());
+    // Should also not ask to resume.
+    assertFalse(tester.advanceProcessingTimeBy(Duration.standardSeconds(1)));
+  }
+
   @Test
   public void testCheckpointsAfterNumOutputs() throws Exception {
     int max = 100;
-    DoFn<Integer, String> fn = new CounterFn();
+    DoFn<Integer, String> fn = new CounterFn(Integer.MAX_VALUE);
     Instant base = Instant.now();
     int baseIndex = 42;
 
@@ -428,7 +511,7 @@ public class SplittableParDoProcessFnTest {
     // But bound bundle duration - the bundle should terminate.
     Duration maxBundleDuration = Duration.standardSeconds(1);
     // Create an fn that attempts to 2x output more than checkpointing allows.
-    DoFn<Integer, String> fn = new CounterFn();
+    DoFn<Integer, String> fn = new CounterFn(Integer.MAX_VALUE);
     Instant base = Instant.now();
     int baseIndex = 42;
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
index a2e5c16..1b809c2 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
@@ -17,6 +17,7 @@
  */
 package org.apache.beam.sdk.transforms;
 
+import com.google.auto.value.AutoValue;
 import java.io.Serializable;
 import java.lang.annotation.Documented;
 import java.lang.annotation.ElementType;
@@ -545,11 +546,15 @@ public abstract class DoFn<InputT, OutputT> implements Serializable, HasDisplayD
    *     returned by {@link GetInitialRestriction} implements {@link HasDefaultTracker}.
    * <li>It <i>may</i> define a {@link GetRestrictionCoder} method.
    * <li>The type of restrictions used by all of these methods must be the same.
+   * <li>Its {@link ProcessElement} method <i>may</i> return a {@link ProcessContinuation} to
+   *     indicate whether there is more work to be done for the current element.
    * <li>Its {@link ProcessElement} method <i>must not</i> use any extra context parameters, such as
    *     {@link BoundedWindow}.
    * <li>The {@link DoFn} itself <i>may</i> be annotated with {@link BoundedPerElement} or
    *     {@link UnboundedPerElement}, but not both at the same time. If it's not annotated with
-   *     either of these, it's assumed to be {@link BoundedPerElement}.
+   *     either of these, it's assumed to be {@link BoundedPerElement} if its {@link
+   *     ProcessElement} method returns {@code void} and {@link UnboundedPerElement} if it
+   *     returns a {@link ProcessContinuation}.
    * </ul>
    *
    * <p>A non-splittable {@link DoFn} <i>must not</i> define any of these methods.
@@ -677,8 +682,48 @@ public abstract class DoFn<InputT, OutputT> implements Serializable, HasDisplayD
   @Experimental(Kind.SPLITTABLE_DO_FN)
   public @interface UnboundedPerElement {}
 
-  /** Temporary, do not use. See https://issues.apache.org/jira/browse/BEAM-1904 */
-  public class ProcessContinuation {}
+  // This can't be put into ProcessContinuation itself due to the following problem:
+  // http://ternarysearch.blogspot.com/2013/07/static-initialization-deadlock.html
+  private static final ProcessContinuation PROCESS_CONTINUATION_STOP =
+      new AutoValue_DoFn_ProcessContinuation(false, Duration.ZERO);
+
+  /**
+   * When used as a return value of {@link ProcessElement}, indicates whether there is more work to
+   * be done for the current element.
+   *
+   * <p>If the {@link ProcessElement} call completes because of a failed {@code tryClaim()} call
+   * on the {@link RestrictionTracker}, then the call MUST return {@link #stop()}.
+   */
+  @Experimental(Kind.SPLITTABLE_DO_FN)
+  @AutoValue
+  public abstract static class ProcessContinuation {
+    /** Indicates that there is no more work to be done for the current element. */
+    public static ProcessContinuation stop() {
+      return PROCESS_CONTINUATION_STOP;
+    }
+
+    /** Indicates that there is more work to be done for the current element. */
+    public static ProcessContinuation resume() {
+      return new AutoValue_DoFn_ProcessContinuation(true, Duration.ZERO);
+    }
+
+    /**
+     * If false, the {@link DoFn} promises that there is no more work remaining for the current
+     * element, so the runner should not resume the {@link ProcessElement} call.
+     */
+    public abstract boolean shouldResume();
+
+    /**
+     * A minimum duration that should elapse between the end of this {@link ProcessElement} call and
+     * the {@link ProcessElement} call continuing processing of the same element. By default, zero.
+     */
+    public abstract Duration resumeDelay();
+
+    /** Builder method to set the value of {@link #resumeDelay()}. */
+    public ProcessContinuation withResumeDelay(Duration resumeDelay) {
+      return new AutoValue_DoFn_ProcessContinuation(shouldResume(), resumeDelay);
+    }
+  }
 
   /**
    * Finalize the {@link DoFn} construction to prepare for processing.

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java
index 8378204..cf96c9b 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyDoFnInvokerFactory.java
@@ -49,7 +49,6 @@ import net.bytebuddy.implementation.bytecode.Throw;
 import net.bytebuddy.implementation.bytecode.assign.Assigner;
 import net.bytebuddy.implementation.bytecode.assign.Assigner.Typing;
 import net.bytebuddy.implementation.bytecode.assign.TypeCasting;
-import net.bytebuddy.implementation.bytecode.constant.NullConstant;
 import net.bytebuddy.implementation.bytecode.constant.TextConstant;
 import net.bytebuddy.implementation.bytecode.member.FieldAccess;
 import net.bytebuddy.implementation.bytecode.member.MethodInvocation;
@@ -641,6 +640,17 @@ public class ByteBuddyDoFnInvokerFactory implements DoFnInvokerFactory {
    * {@link ProcessElement} method.
    */
   private static final class ProcessElementDelegation extends DoFnMethodDelegation {
+    private static final MethodDescription PROCESS_CONTINUATION_STOP_METHOD;
+
+    static {
+      try {
+        PROCESS_CONTINUATION_STOP_METHOD =
+            new MethodDescription.ForLoadedMethod(DoFn.ProcessContinuation.class.getMethod("stop"));
+      } catch (NoSuchMethodException e) {
+        throw new RuntimeException("Failed to locate ProcessContinuation.stop()");
+      }
+    }
+
     private final DoFnSignature.ProcessElementMethod signature;
 
     /** Implementation of {@link MethodDelegation} for the {@link ProcessElement} method. */
@@ -677,7 +687,12 @@ public class ByteBuddyDoFnInvokerFactory implements DoFnInvokerFactory {
 
     @Override
     protected StackManipulation afterDelegation(MethodDescription instrumentedMethod) {
-      return new StackManipulation.Compound(NullConstant.INSTANCE, MethodReturn.REFERENCE);
+      if (TypeDescription.VOID.equals(targetMethod.getReturnType().asErasure())) {
+        return new StackManipulation.Compound(
+            MethodInvocation.invoke(PROCESS_CONTINUATION_STOP_METHOD), MethodReturn.REFERENCE);
+      } else {
+        return MethodReturn.of(targetMethod.getReturnType().asErasure());
+      }
     }
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java
index 3b22fda..8b41fee 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvoker.java
@@ -54,8 +54,8 @@ public interface DoFnInvoker<InputT, OutputT> {
    * Invoke the {@link DoFn.ProcessElement} method on the bound {@link DoFn}.
    *
    * @param extra Factory for producing extra parameter objects (such as window), if necessary.
-   * @return {@code null} - see <a href="https://issues.apache.org/jira/browse/BEAM-1904">JIRA</a>
-   *     tracking the complete removal of {@link DoFn.ProcessContinuation}.
+   * @return The {@link DoFn.ProcessContinuation} returned by the underlying method, or {@link
+   *     DoFn.ProcessContinuation#stop()} if it returns {@code void}.
    */
   DoFn.ProcessContinuation invokeProcessElement(ArgumentProvider<InputT, OutputT> extra);
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignature.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignature.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignature.java
index 6eeed8e..bfad69e 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignature.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignature.java
@@ -33,6 +33,7 @@ import org.apache.beam.sdk.state.StateSpec;
 import org.apache.beam.sdk.state.Timer;
 import org.apache.beam.sdk.state.TimerSpec;
 import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.DoFn.ProcessContinuation;
 import org.apache.beam.sdk.transforms.DoFn.StateId;
 import org.apache.beam.sdk.transforms.DoFn.TimerId;
 import org.apache.beam.sdk.transforms.reflect.DoFnSignature.Parameter.RestrictionTrackerParameter;
@@ -433,16 +434,21 @@ public abstract class DoFnSignature {
     @Nullable
     public abstract TypeDescriptor<? extends BoundedWindow> windowT();
 
+    /** Whether this {@link DoFn} returns a {@link ProcessContinuation} or void. */
+    public abstract boolean hasReturnValue();
+
     static ProcessElementMethod create(
         Method targetMethod,
         List<Parameter> extraParameters,
         TypeDescriptor<?> trackerT,
-        @Nullable TypeDescriptor<? extends BoundedWindow> windowT) {
+        @Nullable TypeDescriptor<? extends BoundedWindow> windowT,
+        boolean hasReturnValue) {
       return new AutoValue_DoFnSignature_ProcessElementMethod(
           targetMethod,
           Collections.unmodifiableList(extraParameters),
           trackerT,
-          windowT);
+          windowT,
+          hasReturnValue);
     }
 
     /**

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignatures.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignatures.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignatures.java
index 1b27e66..de57c3b 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignatures.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnSignatures.java
@@ -17,6 +17,8 @@
  */
 package org.apache.beam.sdk.transforms.reflect;
 
+import static com.google.common.base.Preconditions.checkState;
+
 import com.google.auto.value.AutoValue;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Predicates;
@@ -440,6 +442,8 @@ public class DoFnSignatures {
    * <li>If the {@link DoFn} (or any of its supertypes) is annotated as {@link
    *     DoFn.BoundedPerElement} or {@link DoFn.UnboundedPerElement}, use that. Only one of
    *     these must be specified.
+   * <li>If {@link DoFn.ProcessElement} returns {@link DoFn.ProcessContinuation}, assume it is
+   *     unbounded. Otherwise (if it returns {@code void}), assume it is bounded.
    * <li>If {@link DoFn.ProcessElement} returns {@code void}, but the {@link DoFn} is annotated
    *     {@link DoFn.UnboundedPerElement}, this is an error.
    * </ol>
@@ -465,7 +469,10 @@ public class DoFnSignatures {
     }
     if (processElement.isSplittable()) {
       if (isBounded == null) {
-        isBounded = PCollection.IsBounded.BOUNDED;
+        isBounded =
+            processElement.hasReturnValue()
+                ? PCollection.IsBounded.UNBOUNDED
+                : PCollection.IsBounded.BOUNDED;
       }
     } else {
       errors.checkArgument(
@@ -474,6 +481,7 @@ public class DoFnSignatures {
               + ((isBounded == PCollection.IsBounded.BOUNDED)
                   ? DoFn.BoundedPerElement.class.getSimpleName()
                   : DoFn.UnboundedPerElement.class.getSimpleName()));
+      checkState(!processElement.hasReturnValue(), "Should have been inferred splittable");
       isBounded = PCollection.IsBounded.BOUNDED;
     }
     return isBounded;
@@ -710,8 +718,10 @@ public class DoFnSignatures {
       TypeDescriptor<?> outputT,
       FnAnalysisContext fnContext) {
     errors.checkArgument(
-        void.class.equals(m.getReturnType()),
-        "Must return void");
+        void.class.equals(m.getReturnType())
+            || DoFn.ProcessContinuation.class.equals(m.getReturnType()),
+        "Must return void or %s",
+        DoFn.ProcessContinuation.class.getSimpleName());
 
 
     MethodAnalysisContext methodContext = MethodAnalysisContext.create();
@@ -751,7 +761,11 @@ public class DoFnSignatures {
     }
 
     return DoFnSignature.ProcessElementMethod.create(
-        m, methodContext.getExtraParameters(), trackerT, windowT);
+        m,
+        methodContext.getExtraParameters(),
+        trackerT,
+        windowT,
+        DoFn.ProcessContinuation.class.equals(m.getReturnType()));
   }
 
   private static void checkParameterOneOf(

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTracker.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTracker.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTracker.java
index 62c10a7..4987409 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTracker.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/OffsetRangeTracker.java
@@ -21,6 +21,7 @@ import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkState;
 
+import com.google.common.base.MoreObjects;
 import org.apache.beam.sdk.io.range.OffsetRange;
 import org.apache.beam.sdk.transforms.DoFn;
 
@@ -100,4 +101,13 @@ public class OffsetRangeTracker implements RestrictionTracker<OffsetRange> {
         lastAttemptedOffset + 1,
         range.getTo());
   }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(this)
+        .add("range", range)
+        .add("lastClaimedOffset", lastClaimedOffset)
+        .add("lastAttemptedOffset", lastAttemptedOffset)
+        .toString();
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/RestrictionTracker.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/RestrictionTracker.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/RestrictionTracker.java
index 27ef68f..8cb0a6b 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/RestrictionTracker.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/splittabledofn/RestrictionTracker.java
@@ -31,10 +31,13 @@ public interface RestrictionTracker<RestrictionT> {
   RestrictionT currentRestriction();
 
   /**
-   * Signals that the current {@link DoFn.ProcessElement} call should terminate as soon as possible.
-   * Modifies {@link #currentRestriction}. Returns a restriction representing the rest of the work:
-   * the old value of {@link #currentRestriction} is equivalent to the new value and the return
-   * value of this method combined. Must be called at most once on a given object.
+   * Signals that the current {@link DoFn.ProcessElement} call should terminate as soon as possible:
+   * after this method returns, the tracker MUST refuse all future claim calls, and {@link
+   * #checkDone} MUST succeed.
+   *
+   * <p>Modifies {@link #currentRestriction}. Returns a restriction representing the rest of the
+   * work: the old value of {@link #currentRestriction} is equivalent to the new value and the
+   * return value of this method combined. Must be called at most once on a given object.
    */
   RestrictionT checkpoint();
 

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
index cb60f9a..d2d2529 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
@@ -19,10 +19,10 @@ package org.apache.beam.sdk.transforms;
 
 import static com.google.common.base.Preconditions.checkState;
 import static org.apache.beam.sdk.testing.TestPipeline.testingPipelineOptions;
-import static org.hamcrest.Matchers.greaterThan;
+import static org.apache.beam.sdk.transforms.DoFn.ProcessContinuation.resume;
+import static org.apache.beam.sdk.transforms.DoFn.ProcessContinuation.stop;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 import com.google.common.collect.Ordering;
@@ -33,7 +33,6 @@ import java.util.List;
 import org.apache.beam.sdk.coders.BigEndianIntegerCoder;
 import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.coders.VarIntCoder;
 import org.apache.beam.sdk.io.range.OffsetRange;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.StreamingOptions;
@@ -74,10 +73,16 @@ public class SplittableDoFnTest implements Serializable {
 
   static class PairStringWithIndexToLength extends DoFn<String, KV<String, Integer>> {
     @ProcessElement
-    public void process(ProcessContext c, OffsetRangeTracker tracker) {
-      for (long i = tracker.currentRestriction().getFrom(); tracker.tryClaim(i); ++i) {
+    public ProcessContinuation process(ProcessContext c, OffsetRangeTracker tracker) {
+      for (long i = tracker.currentRestriction().getFrom(), numIterations = 0;
+          tracker.tryClaim(i);
+          ++i, ++numIterations) {
         c.output(KV.of(c.element(), (int) i));
+        if (numIterations % 3 == 0) {
+          return resume();
+        }
       }
+      return stop();
     }
 
     @GetInitialRestriction
@@ -206,10 +211,10 @@ public class SplittableDoFnTest implements Serializable {
   private static class SDFWithMultipleOutputsPerBlock extends DoFn<String, Integer> {
     private static final int MAX_INDEX = 98765;
 
-    private final TupleTag<Integer> numProcessCalls;
+    private final int numClaimsPerCall;
 
-    private SDFWithMultipleOutputsPerBlock(TupleTag<Integer> numProcessCalls) {
-      this.numProcessCalls = numProcessCalls;
+    private SDFWithMultipleOutputsPerBlock(int numClaimsPerCall) {
+      this.numClaimsPerCall = numClaimsPerCall;
     }
 
     private static int snapToNextBlock(int index, int[] blockStarts) {
@@ -222,15 +227,20 @@ public class SplittableDoFnTest implements Serializable {
     }
 
     @ProcessElement
-    public void processElement(ProcessContext c, OffsetRangeTracker tracker) {
+    public ProcessContinuation processElement(ProcessContext c, OffsetRangeTracker tracker) {
       int[] blockStarts = {-1, 0, 12, 123, 1234, 12345, 34567, MAX_INDEX};
       int trueStart = snapToNextBlock((int) tracker.currentRestriction().getFrom(), blockStarts);
-      c.output(numProcessCalls, 1);
-      for (int i = trueStart; tracker.tryClaim(blockStarts[i]); ++i) {
+      for (int i = trueStart, numIterations = 1;
+          tracker.tryClaim(blockStarts[i]);
+          ++i, ++numIterations) {
         for (int index = blockStarts[i]; index < blockStarts[i + 1]; ++index) {
           c.output(index);
         }
+        if (numIterations == numClaimsPerCall) {
+          return resume();
+        }
       }
+      return stop();
     }
 
     @GetInitialRestriction
@@ -242,26 +252,10 @@ public class SplittableDoFnTest implements Serializable {
   @Test
   @Category({ValidatesRunner.class, UsesSplittableParDo.class})
   public void testOutputAfterCheckpoint() throws Exception {
-    TupleTag<Integer> main = new TupleTag<>();
-    TupleTag<Integer> numProcessCalls = new TupleTag<>();
-    PCollectionTuple outputs =
-        p.apply(Create.of("foo"))
-            .apply(
-                ParDo.of(new SDFWithMultipleOutputsPerBlock(numProcessCalls))
-                    .withOutputTags(main, TupleTagList.of(numProcessCalls)));
-    PAssert.thatSingleton(outputs.get(main).apply(Count.<Integer>globally()))
+    PCollection<Integer> outputs = p.apply(Create.of("foo"))
+        .apply(ParDo.of(new SDFWithMultipleOutputsPerBlock(3)));
+    PAssert.thatSingleton(outputs.apply(Count.<Integer>globally()))
         .isEqualTo((long) SDFWithMultipleOutputsPerBlock.MAX_INDEX);
-    // Verify that more than 1 process() call was involved, i.e. that there was checkpointing.
-    PAssert.thatSingleton(
-            outputs.get(numProcessCalls).setCoder(VarIntCoder.of()).apply(Sum.integersGlobally()))
-        .satisfies(
-            new SerializableFunction<Integer, Void>() {
-              @Override
-              public Void apply(Integer input) {
-                assertThat(input, greaterThan(1));
-                return null;
-              }
-            });
     p.run();
   }
 
@@ -341,12 +335,12 @@ public class SplittableDoFnTest implements Serializable {
       extends DoFn<Integer, KV<String, Integer>> {
     private static final int MAX_INDEX = 98765;
     private final PCollectionView<String> sideInput;
-    private final TupleTag<Integer> numProcessCalls;
+    private final int numClaimsPerCall;
 
     public SDFWithMultipleOutputsPerBlockAndSideInput(
-        PCollectionView<String> sideInput, TupleTag<Integer> numProcessCalls) {
+        PCollectionView<String> sideInput, int numClaimsPerCall) {
       this.sideInput = sideInput;
-      this.numProcessCalls = numProcessCalls;
+      this.numClaimsPerCall = numClaimsPerCall;
     }
 
     private static int snapToNextBlock(int index, int[] blockStarts) {
@@ -359,15 +353,20 @@ public class SplittableDoFnTest implements Serializable {
     }
 
     @ProcessElement
-    public void processElement(ProcessContext c, OffsetRangeTracker tracker) {
+    public ProcessContinuation processElement(ProcessContext c, OffsetRangeTracker tracker) {
       int[] blockStarts = {-1, 0, 12, 123, 1234, 12345, 34567, MAX_INDEX};
       int trueStart = snapToNextBlock((int) tracker.currentRestriction().getFrom(), blockStarts);
-      c.output(numProcessCalls, 1);
-      for (int i = trueStart; tracker.tryClaim(blockStarts[i]); ++i) {
+      for (int i = trueStart, numIterations = 1;
+          tracker.tryClaim(blockStarts[i]);
+          ++i, ++numIterations) {
         for (int index = blockStarts[i]; index < blockStarts[i + 1]; ++index) {
           c.output(KV.of(c.sideInput(sideInput) + ":" + c.element(), index));
         }
+        if (numIterations == numClaimsPerCall) {
+          return resume();
+        }
       }
+      return stop();
     }
 
     @GetInitialRestriction
@@ -400,15 +399,14 @@ public class SplittableDoFnTest implements Serializable {
             .apply("window 2", Window.<String>into(FixedWindows.of(Duration.millis(2))))
             .apply("singleton", View.<String>asSingleton());
 
-    TupleTag<KV<String, Integer>> main = new TupleTag<>();
-    TupleTag<Integer> numProcessCalls = new TupleTag<>();
-    PCollectionTuple res =
+    PCollection<KV<String, Integer>> res =
         mainInput.apply(
-            ParDo.of(new SDFWithMultipleOutputsPerBlockAndSideInput(sideInput, numProcessCalls))
-                .withSideInputs(sideInput)
-                .withOutputTags(main, TupleTagList.of(numProcessCalls)));
+            ParDo.of(
+                    new SDFWithMultipleOutputsPerBlockAndSideInput(
+                        sideInput, 3 /* numClaimsPerCall */))
+                .withSideInputs(sideInput));
     PCollection<KV<String, Iterable<Integer>>> grouped =
-        res.get(main).apply(GroupByKey.<String, Integer>create());
+        res.apply(GroupByKey.<String, Integer>create());
 
     PAssert.that(grouped.apply(Keys.<String>create()))
         .containsInAnyOrder("a:0", "a:1", "b:2", "b:3");
@@ -427,22 +425,6 @@ public class SplittableDoFnTest implements Serializable {
                 return null;
               }
             });
-
-    // Verify that more than 1 process() call was involved, i.e. that there was checkpointing.
-    PAssert.thatSingleton(
-            res.get(numProcessCalls)
-                .setCoder(VarIntCoder.of())
-                .apply(Sum.integersGlobally().withoutDefaults()))
-        // This should hold in all windows, but verifying a particular window is sufficient.
-        .inOnlyPane(new IntervalWindow(new Instant(0), new Instant(1)))
-        .satisfies(
-            new SerializableFunction<Integer, Void>() {
-              @Override
-              public Void apply(Integer input) {
-                assertThat(input, greaterThan(1));
-                return null;
-              }
-            });
     p.run();
 
     // TODO: also test coverage when some of the windows of the side input are not ready.

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnInvokersTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnInvokersTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnInvokersTest.java
index 3edb194..2098c66 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnInvokersTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnInvokersTest.java
@@ -17,6 +17,8 @@
  */
 package org.apache.beam.sdk.transforms.reflect;
 
+import static org.apache.beam.sdk.transforms.DoFn.ProcessContinuation.resume;
+import static org.apache.beam.sdk.transforms.DoFn.ProcessContinuation.stop;
 import static org.hamcrest.CoreMatchers.instanceOf;
 import static org.hamcrest.Matchers.equalTo;
 import static org.junit.Assert.assertEquals;
@@ -89,8 +91,8 @@ public class DoFnInvokersTest {
     when(mockArgumentProvider.processContext(Matchers.<DoFn>any())).thenReturn(mockProcessContext);
   }
 
-  private void invokeProcessElement(DoFn<String, String> fn) {
-    DoFnInvokers.invokerFor(fn).invokeProcessElement(mockArgumentProvider);
+  private DoFn.ProcessContinuation invokeProcessElement(DoFn<String, String> fn) {
+    return DoFnInvokers.invokerFor(fn).invokeProcessElement(mockArgumentProvider);
   }
 
   private void invokeOnTimer(String timerId, DoFn<String, String> fn) {
@@ -119,7 +121,7 @@ public class DoFnInvokersTest {
       public void processElement(ProcessContext c) throws Exception {}
     }
     MockFn mockFn = mock(MockFn.class);
-    invokeProcessElement(mockFn);
+    assertEquals(stop(), invokeProcessElement(mockFn));
     verify(mockFn).processElement(mockProcessContext);
   }
 
@@ -140,7 +142,7 @@ public class DoFnInvokersTest {
   public void testDoFnWithProcessElementInterface() throws Exception {
     IdentityUsingInterfaceWithProcessElement fn =
         mock(IdentityUsingInterfaceWithProcessElement.class);
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     verify(fn).processElement(mockProcessContext);
   }
 
@@ -161,14 +163,14 @@ public class DoFnInvokersTest {
   @Test
   public void testDoFnWithMethodInSuperclass() throws Exception {
     IdentityChildWithoutOverride fn = mock(IdentityChildWithoutOverride.class);
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     verify(fn).process(mockProcessContext);
   }
 
   @Test
   public void testDoFnWithMethodInSubclass() throws Exception {
     IdentityChildWithOverride fn = mock(IdentityChildWithOverride.class);
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     verify(fn).process(mockProcessContext);
   }
 
@@ -179,7 +181,7 @@ public class DoFnInvokersTest {
       public void processElement(ProcessContext c, IntervalWindow w) throws Exception {}
     }
     MockFn fn = mock(MockFn.class);
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     verify(fn).processElement(mockProcessContext, mockWindow);
   }
 
@@ -203,7 +205,7 @@ public class DoFnInvokersTest {
           throws Exception {}
     }
     MockFn fn = mock(MockFn.class);
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     verify(fn).processElement(mockProcessContext, mockState);
   }
 
@@ -229,11 +231,35 @@ public class DoFnInvokersTest {
       public void onTimer() {}
     }
     MockFn fn = mock(MockFn.class);
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     verify(fn).processElement(mockProcessContext, mockTimer);
   }
 
   @Test
+  public void testDoFnWithReturn() throws Exception {
+    class MockFn extends DoFn<String, String> {
+      @DoFn.ProcessElement
+      public ProcessContinuation processElement(ProcessContext c, SomeRestrictionTracker tracker)
+          throws Exception {
+        return null;
+      }
+
+      @GetInitialRestriction
+      public SomeRestriction getInitialRestriction(String element) {
+        return null;
+      }
+
+      @NewTracker
+      public SomeRestrictionTracker newTracker(SomeRestriction restriction) {
+        return null;
+      }
+    }
+    MockFn fn = mock(MockFn.class);
+    when(fn.processElement(mockProcessContext, null)).thenReturn(resume());
+    assertEquals(resume(), invokeProcessElement(fn));
+  }
+
+  @Test
   public void testDoFnWithStartBundleSetupTeardown() throws Exception {
     class MockFn extends DoFn<String, String> {
       @ProcessElement
@@ -288,7 +314,9 @@ public class DoFnInvokersTest {
   /** Public so Mockito can do "delegatesTo()" in the test below. */
   public static class MockFn extends DoFn<String, String> {
     @ProcessElement
-    public void processElement(ProcessContext c, SomeRestrictionTracker tracker) {}
+    public ProcessContinuation processElement(ProcessContext c, SomeRestrictionTracker tracker) {
+      return null;
+    }
 
     @GetInitialRestriction
     public SomeRestriction getInitialRestriction(String element) {
@@ -340,7 +368,7 @@ public class DoFnInvokersTest {
         .splitRestriction(
             eq("blah"), same(restriction), Mockito.<DoFn.OutputReceiver<SomeRestriction>>any());
     when(fn.newTracker(restriction)).thenReturn(tracker);
-    fn.processElement(mockProcessContext, tracker);
+    when(fn.processElement(mockProcessContext, tracker)).thenReturn(resume());
 
     assertEquals(coder, invoker.invokeGetRestrictionCoder(CoderRegistry.createDefault()));
     assertEquals(restriction, invoker.invokeGetInitialRestriction("blah"));
@@ -356,6 +384,8 @@ public class DoFnInvokersTest {
         });
     assertEquals(Arrays.asList(part1, part2, part3), outputs);
     assertEquals(tracker, invoker.invokeNewTracker(restriction));
+    assertEquals(
+        resume(),
         invoker.invokeProcessElement(
             new FakeArgumentProvider<String, String>() {
               @Override
@@ -367,7 +397,7 @@ public class DoFnInvokersTest {
               public RestrictionTracker<?> restrictionTracker() {
                 return tracker;
               }
-            });
+            }));
   }
 
   private static class RestrictionWithDefaultTracker
@@ -441,7 +471,7 @@ public class DoFnInvokersTest {
             assertEquals("foo", output);
           }
         });
-    invoker.invokeProcessElement(mockArgumentProvider);
+    assertEquals(stop(), invoker.invokeProcessElement(mockArgumentProvider));
     assertThat(
         invoker.invokeNewTracker(new RestrictionWithDefaultTracker()),
         instanceOf(DefaultTracker.class));
@@ -531,14 +561,14 @@ public class DoFnInvokersTest {
   @Test
   public void testLocalPrivateDoFnClass() throws Exception {
     PrivateDoFnClass fn = mock(PrivateDoFnClass.class);
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     verify(fn).processThis(mockProcessContext);
   }
 
   @Test
   public void testStaticPackagePrivateDoFnClass() throws Exception {
     DoFn<String, String> fn = mock(DoFnInvokersTestHelper.newStaticPackagePrivateDoFn().getClass());
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     DoFnInvokersTestHelper.verifyStaticPackagePrivateDoFn(fn, mockProcessContext);
   }
 
@@ -546,28 +576,28 @@ public class DoFnInvokersTest {
   public void testInnerPackagePrivateDoFnClass() throws Exception {
     DoFn<String, String> fn =
         mock(new DoFnInvokersTestHelper().newInnerPackagePrivateDoFn().getClass());
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     DoFnInvokersTestHelper.verifyInnerPackagePrivateDoFn(fn, mockProcessContext);
   }
 
   @Test
   public void testStaticPrivateDoFnClass() throws Exception {
     DoFn<String, String> fn = mock(DoFnInvokersTestHelper.newStaticPrivateDoFn().getClass());
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     DoFnInvokersTestHelper.verifyStaticPrivateDoFn(fn, mockProcessContext);
   }
 
   @Test
   public void testInnerPrivateDoFnClass() throws Exception {
     DoFn<String, String> fn = mock(new DoFnInvokersTestHelper().newInnerPrivateDoFn().getClass());
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     DoFnInvokersTestHelper.verifyInnerPrivateDoFn(fn, mockProcessContext);
   }
 
   @Test
   public void testAnonymousInnerDoFn() throws Exception {
     DoFn<String, String> fn = mock(new DoFnInvokersTestHelper().newInnerAnonymousDoFn().getClass());
-    invokeProcessElement(fn);
+    assertEquals(stop(), invokeProcessElement(fn));
     DoFnInvokersTestHelper.verifyInnerAnonymousDoFn(fn, mockProcessContext);
   }
 
@@ -604,6 +634,31 @@ public class DoFnInvokersTest {
   }
 
   @Test
+  public void testProcessElementExceptionWithReturn() throws Exception {
+    thrown.expect(UserCodeException.class);
+    thrown.expectMessage("bogus");
+    DoFnInvokers.invokerFor(
+            new DoFn<Integer, Integer>() {
+              @ProcessElement
+              public ProcessContinuation processElement(
+                  @SuppressWarnings("unused") ProcessContext c, SomeRestrictionTracker tracker) {
+                throw new IllegalArgumentException("bogus");
+              }
+
+              @GetInitialRestriction
+              public SomeRestriction getInitialRestriction(Integer element) {
+                return null;
+              }
+
+              @NewTracker
+              public SomeRestrictionTracker newTracker(SomeRestriction restriction) {
+                return null;
+              }
+            })
+        .invokeProcessElement(new FakeArgumentProvider<Integer, Integer>());
+  }
+
+  @Test
   public void testStartBundleException() throws Exception {
     DoFnInvoker<Integer, Integer> invoker =
         DoFnInvokers.invokerFor(

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnSignaturesProcessElementTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnSignaturesProcessElementTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnSignaturesProcessElementTest.java
index d321f54..44ae5c4 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnSignaturesProcessElementTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnSignaturesProcessElementTest.java
@@ -50,7 +50,7 @@ public class DoFnSignaturesProcessElementTest {
   @Test
   public void testBadReturnType() throws Exception {
     thrown.expect(IllegalArgumentException.class);
-    thrown.expectMessage("Must return void");
+    thrown.expectMessage("Must return void or ProcessContinuation");
 
     analyzeProcessElementMethod(
         new AnonymousMethod() {

http://git-wip-us.apache.org/repos/asf/beam/blob/4f7f1699/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnSignaturesSplittableDoFnTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnSignaturesSplittableDoFnTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnSignaturesSplittableDoFnTest.java
index 07b3348..08af65e 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnSignaturesSplittableDoFnTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/reflect/DoFnSignaturesSplittableDoFnTest.java
@@ -52,7 +52,8 @@ import org.junit.runners.JUnit4;
 public class DoFnSignaturesSplittableDoFnTest {
   @Rule public ExpectedException thrown = ExpectedException.none();
 
-  private static class SomeRestriction {}
+  private abstract static class SomeRestriction
+      implements HasDefaultTracker<SomeRestriction, SomeRestrictionTracker> {}
 
   private abstract static class SomeRestrictionTracker
       implements RestrictionTracker<SomeRestriction> {}
@@ -60,6 +61,20 @@ public class DoFnSignaturesSplittableDoFnTest {
   private abstract static class SomeRestrictionCoder extends StructuredCoder<SomeRestriction> {}
 
   @Test
+  public void testReturnsProcessContinuation() throws Exception {
+    DoFnSignature.ProcessElementMethod signature =
+        analyzeProcessElementMethod(
+            new AnonymousMethod() {
+              private DoFn.ProcessContinuation method(
+                  DoFn<Integer, String>.ProcessContext context) {
+                return null;
+              }
+            });
+
+    assertTrue(signature.hasReturnValue());
+  }
+
+  @Test
   public void testHasRestrictionTracker() throws Exception {
     DoFnSignature.ProcessElementMethod signature =
         analyzeProcessElementMethod(
@@ -100,11 +115,6 @@ public class DoFnSignaturesSplittableDoFnTest {
       public SomeRestriction getInitialRestriction(Integer element) {
         return null;
       }
-
-      @NewTracker
-      public SomeRestrictionTracker newTracker(SomeRestriction restriction) {
-        return null;
-      }
     }
 
     @BoundedPerElement
@@ -130,6 +140,55 @@ public class DoFnSignaturesSplittableDoFnTest {
             .isBoundedPerElement());
   }
 
+  private static class BaseFnWithoutContinuation extends DoFn<Integer, String> {
+    @ProcessElement
+    public void processElement(ProcessContext context, SomeRestrictionTracker tracker) {}
+
+    @GetInitialRestriction
+    public SomeRestriction getInitialRestriction(Integer element) {
+      return null;
+    }
+  }
+
+  private static class BaseFnWithContinuation extends DoFn<Integer, String> {
+    @ProcessElement
+    public ProcessContinuation processElement(
+        ProcessContext context, SomeRestrictionTracker tracker) {
+      return null;
+    }
+
+    @GetInitialRestriction
+    public SomeRestriction getInitialRestriction(Integer element) {
+      return null;
+    }
+  }
+
+  @Test
+  public void testSplittableBoundednessInferredFromReturnValue() throws Exception {
+    assertEquals(
+        PCollection.IsBounded.BOUNDED,
+        DoFnSignatures.getSignature(BaseFnWithoutContinuation.class).isBoundedPerElement());
+    assertEquals(
+        PCollection.IsBounded.UNBOUNDED,
+        DoFnSignatures.getSignature(BaseFnWithContinuation.class).isBoundedPerElement());
+  }
+
+  @Test
+  public void testSplittableRespectsBoundednessAnnotation() throws Exception {
+    @BoundedPerElement
+    class BoundedFnWithContinuation extends BaseFnWithContinuation {}
+
+    assertEquals(
+        PCollection.IsBounded.BOUNDED,
+        DoFnSignatures.getSignature(BoundedFnWithContinuation.class).isBoundedPerElement());
+
+    @UnboundedPerElement
+    class UnboundedFnWithContinuation extends BaseFnWithContinuation {}
+
+    assertEquals(
+        PCollection.IsBounded.UNBOUNDED,
+        DoFnSignatures.getSignature(UnboundedFnWithContinuation.class).isBoundedPerElement());
+  }
   @Test
   public void testUnsplittableIsBounded() throws Exception {
     class UnsplittableFn extends DoFn<Integer, String> {
@@ -172,8 +231,10 @@ public class DoFnSignaturesSplittableDoFnTest {
   public void testSplittableWithAllFunctions() throws Exception {
     class GoodSplittableDoFn extends DoFn<Integer, String> {
       @ProcessElement
-      public void processElement(
-          ProcessContext context, SomeRestrictionTracker tracker) {}
+      public ProcessContinuation processElement(
+          ProcessContext context, SomeRestrictionTracker tracker) {
+        return null;
+      }
 
       @GetInitialRestriction
       public SomeRestriction getInitialRestriction(Integer element) {
@@ -198,6 +259,7 @@ public class DoFnSignaturesSplittableDoFnTest {
     DoFnSignature signature = DoFnSignatures.getSignature(GoodSplittableDoFn.class);
     assertEquals(SomeRestrictionTracker.class, signature.processElement().trackerT().getRawType());
     assertTrue(signature.processElement().isSplittable());
+    assertTrue(signature.processElement().hasReturnValue());
     assertEquals(
         SomeRestriction.class, signature.getInitialRestriction().restrictionT().getRawType());
     assertEquals(SomeRestriction.class, signature.splitRestriction().restrictionT().getRawType());
@@ -214,7 +276,9 @@ public class DoFnSignaturesSplittableDoFnTest {
   public void testSplittableWithAllFunctionsGeneric() throws Exception {
     class GoodGenericSplittableDoFn<RestrictionT, TrackerT, CoderT> extends DoFn<Integer, String> {
       @ProcessElement
-      public void processElement(ProcessContext context, TrackerT tracker) {}
+      public ProcessContinuation processElement(ProcessContext context, TrackerT tracker) {
+        return null;
+      }
 
       @GetInitialRestriction
       public RestrictionT getInitialRestriction(Integer element) {
@@ -242,6 +306,7 @@ public class DoFnSignaturesSplittableDoFnTest {
                 SomeRestriction, SomeRestrictionTracker, SomeRestrictionCoder>() {}.getClass());
     assertEquals(SomeRestrictionTracker.class, signature.processElement().trackerT().getRawType());
     assertTrue(signature.processElement().isSplittable());
+    assertTrue(signature.processElement().hasReturnValue());
     assertEquals(
         SomeRestriction.class, signature.getInitialRestriction().restrictionT().getRawType());
     assertEquals(SomeRestriction.class, signature.splitRestriction().restrictionT().getRawType());


[43/50] [abbrv] beam git commit: Cleanup and fix ptransform_fn decorator.

Posted by ta...@apache.org.
Cleanup and fix ptransform_fn decorator.

Previously CallablePTransform was being used both as the
factory and the transform itself, which could result in state
getting carried between pipelines.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bd2a8cca
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bd2a8cca
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bd2a8cca

Branch: refs/heads/DSL_SQL
Commit: bd2a8cca8c64eba5e362ffd78a868e3deb3755e4
Parents: fcb06f3
Author: Robert Bradshaw <ro...@gmail.com>
Authored: Tue Jul 11 14:32:47 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:02 2017 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/transforms/combiners.py |  8 ++++
 .../apache_beam/transforms/combiners_test.py    |  7 +---
 .../python/apache_beam/transforms/ptransform.py | 41 +++++++++-----------
 3 files changed, 28 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/bd2a8cca/sdks/python/apache_beam/transforms/combiners.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/combiners.py b/sdks/python/apache_beam/transforms/combiners.py
index fa0742d..875306f 100644
--- a/sdks/python/apache_beam/transforms/combiners.py
+++ b/sdks/python/apache_beam/transforms/combiners.py
@@ -149,6 +149,7 @@ class Top(object):
   """Combiners for obtaining extremal elements."""
   # pylint: disable=no-self-argument
 
+  @staticmethod
   @ptransform.ptransform_fn
   def Of(pcoll, n, compare=None, *args, **kwargs):
     """Obtain a list of the compare-most N elements in a PCollection.
@@ -177,6 +178,7 @@ class Top(object):
     return pcoll | core.CombineGlobally(
         TopCombineFn(n, compare, key, reverse), *args, **kwargs)
 
+  @staticmethod
   @ptransform.ptransform_fn
   def PerKey(pcoll, n, compare=None, *args, **kwargs):
     """Identifies the compare-most N elements associated with each key.
@@ -210,21 +212,25 @@ class Top(object):
     return pcoll | core.CombinePerKey(
         TopCombineFn(n, compare, key, reverse), *args, **kwargs)
 
+  @staticmethod
   @ptransform.ptransform_fn
   def Largest(pcoll, n):
     """Obtain a list of the greatest N elements in a PCollection."""
     return pcoll | Top.Of(n)
 
+  @staticmethod
   @ptransform.ptransform_fn
   def Smallest(pcoll, n):
     """Obtain a list of the least N elements in a PCollection."""
     return pcoll | Top.Of(n, reverse=True)
 
+  @staticmethod
   @ptransform.ptransform_fn
   def LargestPerKey(pcoll, n):
     """Identifies the N greatest elements associated with each key."""
     return pcoll | Top.PerKey(n)
 
+  @staticmethod
   @ptransform.ptransform_fn
   def SmallestPerKey(pcoll, n, reverse=True):
     """Identifies the N least elements associated with each key."""
@@ -369,10 +375,12 @@ class Sample(object):
   """Combiners for sampling n elements without replacement."""
   # pylint: disable=no-self-argument
 
+  @staticmethod
   @ptransform.ptransform_fn
   def FixedSizeGlobally(pcoll, n):
     return pcoll | core.CombineGlobally(SampleCombineFn(n))
 
+  @staticmethod
   @ptransform.ptransform_fn
   def FixedSizePerKey(pcoll, n):
     return pcoll | core.CombinePerKey(SampleCombineFn(n))

http://git-wip-us.apache.org/repos/asf/beam/blob/bd2a8cca/sdks/python/apache_beam/transforms/combiners_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/combiners_test.py b/sdks/python/apache_beam/transforms/combiners_test.py
index c79fec8..cd2b595 100644
--- a/sdks/python/apache_beam/transforms/combiners_test.py
+++ b/sdks/python/apache_beam/transforms/combiners_test.py
@@ -156,14 +156,11 @@ class CombineTest(unittest.TestCase):
 
   def test_combine_sample_display_data(self):
     def individual_test_per_key_dd(sampleFn, args, kwargs):
-      trs = [beam.CombinePerKey(sampleFn(*args, **kwargs)),
-             beam.CombineGlobally(sampleFn(*args, **kwargs))]
+      trs = [sampleFn(*args, **kwargs)]
       for transform in trs:
         dd = DisplayData.create_from(transform)
         expected_items = [
-            DisplayDataItemMatcher('fn', sampleFn.fn.__name__),
-            DisplayDataItemMatcher('combine_fn',
-                                   transform.fn.__class__)]
+            DisplayDataItemMatcher('fn', transform._fn.__name__)]
         if args:
           expected_items.append(
               DisplayDataItemMatcher('args', str(args)))

http://git-wip-us.apache.org/repos/asf/beam/blob/bd2a8cca/sdks/python/apache_beam/transforms/ptransform.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/ptransform.py b/sdks/python/apache_beam/transforms/ptransform.py
index 6041353..cd84122 100644
--- a/sdks/python/apache_beam/transforms/ptransform.py
+++ b/sdks/python/apache_beam/transforms/ptransform.py
@@ -595,32 +595,23 @@ class PTransformWithSideInputs(PTransform):
     return '%s(%s)' % (self.__class__.__name__, self.fn.default_label())
 
 
-class CallablePTransform(PTransform):
+class _PTransformFnPTransform(PTransform):
   """A class wrapper for a function-based transform."""
 
-  def __init__(self, fn):
-    # pylint: disable=super-init-not-called
-    # This  is a helper class for a function decorator. Only when the class
-    # is called (and __call__ invoked) we will have all the information
-    # needed to initialize the super class.
-    self.fn = fn
-    self._args = ()
-    self._kwargs = {}
+  def __init__(self, fn, *args, **kwargs):
+    super(_PTransformFnPTransform, self).__init__()
+    self._fn = fn
+    self._args = args
+    self._kwargs = kwargs
 
   def display_data(self):
-    res = {'fn': (self.fn.__name__
-                  if hasattr(self.fn, '__name__')
-                  else self.fn.__class__),
+    res = {'fn': (self._fn.__name__
+                  if hasattr(self._fn, '__name__')
+                  else self._fn.__class__),
            'args': DisplayDataItem(str(self._args)).drop_if_default('()'),
            'kwargs': DisplayDataItem(str(self._kwargs)).drop_if_default('{}')}
     return res
 
-  def __call__(self, *args, **kwargs):
-    super(CallablePTransform, self).__init__()
-    self._args = args
-    self._kwargs = kwargs
-    return self
-
   def expand(self, pcoll):
     # Since the PTransform will be implemented entirely as a function
     # (once called), we need to pass through any type-hinting information that
@@ -629,18 +620,18 @@ class CallablePTransform(PTransform):
     kwargs = dict(self._kwargs)
     args = tuple(self._args)
     try:
-      if 'type_hints' in inspect.getargspec(self.fn).args:
+      if 'type_hints' in inspect.getargspec(self._fn).args:
         args = (self.get_type_hints(),) + args
     except TypeError:
       # Might not be a function.
       pass
-    return self.fn(pcoll, *args, **kwargs)
+    return self._fn(pcoll, *args, **kwargs)
 
   def default_label(self):
     if self._args:
       return '%s(%s)' % (
-          label_from_callable(self.fn), label_from_callable(self._args[0]))
-    return label_from_callable(self.fn)
+          label_from_callable(self._fn), label_from_callable(self._args[0]))
+    return label_from_callable(self._fn)
 
 
 def ptransform_fn(fn):
@@ -684,7 +675,11 @@ def ptransform_fn(fn):
   operator (i.e., `|`) will inject the pcoll argument in its proper place
   (first argument if no label was specified and second argument otherwise).
   """
-  return CallablePTransform(fn)
+  # TODO(robertwb): Consider removing staticmethod to allow for self parameter.
+
+  def callable_ptransform_factory(*args, **kwargs):
+    return _PTransformFnPTransform(fn, *args, **kwargs)
+  return callable_ptransform_factory
 
 
 def label_from_callable(fn):


[28/50] [abbrv] beam git commit: [BEAM-2570] Fix breakage after cloud-bigquery updated

Posted by ta...@apache.org.
[BEAM-2570] Fix breakage after cloud-bigquery updated


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/eee0c9c3
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/eee0c9c3
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/eee0c9c3

Branch: refs/heads/DSL_SQL
Commit: eee0c9c38ea33d123c9be6ee81fd8f31bcb44d14
Parents: b1313ff
Author: Mark Liu <ma...@google.com>
Authored: Fri Jul 7 15:20:12 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:01 2017 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py      | 6 +++---
 sdks/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py | 2 +-
 sdks/python/setup.py                                          | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/eee0c9c3/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py b/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py
index 844cbc5..d6f0e97 100644
--- a/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py
+++ b/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher.py
@@ -92,9 +92,9 @@ class BigqueryMatcher(BaseMatcher):
     page_token = None
     results = []
     while True:
-      rows, _, page_token = query.fetch_data(page_token=page_token)
-      results.extend(rows)
-      if not page_token:
+      for row in query.fetch_data(page_token=page_token):
+        results.append(row)
+      if results:
         break
 
     return results

http://git-wip-us.apache.org/repos/asf/beam/blob/eee0c9c3/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py b/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py
index f12293e..5b72285 100644
--- a/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py
+++ b/sdks/python/apache_beam/io/gcp/tests/bigquery_matcher_test.py
@@ -53,7 +53,7 @@ class BigqueryMatcherTest(unittest.TestCase):
     matcher = bq_verifier.BigqueryMatcher(
         'mock_project',
         'mock_query',
-        'da39a3ee5e6b4b0d3255bfef95601890afd80709')
+        '59f9d6bdee30d67ea73b8aded121c3a0280f9cd8')
     hc_assert_that(self._mock_result, matcher)
 
   @patch.object(bigquery, 'Client')

http://git-wip-us.apache.org/repos/asf/beam/blob/eee0c9c3/sdks/python/setup.py
----------------------------------------------------------------------
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 8a0c9ae..da82466 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -122,7 +122,7 @@ GCP_REQUIREMENTS = [
   'googledatastore==7.0.1',
   'google-cloud-pubsub==0.26.0',
   # GCP packages required by tests
-  'google-cloud-bigquery>=0.23.0,<0.26.0',
+  'google-cloud-bigquery==0.25.0',
 ]
 
 


[12/50] [abbrv] beam git commit: Fix DoFn javadoc: StateSpec does not require a key

Posted by ta...@apache.org.
Fix DoFn javadoc: StateSpec does not require a key


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4bf16155
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4bf16155
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4bf16155

Branch: refs/heads/DSL_SQL
Commit: 4bf1615575338e7af64e4db1bfe11856495aa91d
Parents: c73e69a
Author: Kenneth Knowles <kl...@google.com>
Authored: Fri Jun 30 21:42:17 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:59 2017 -0700

----------------------------------------------------------------------
 .../core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/4bf16155/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
index fb6d0ee..a2e5c16 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
@@ -385,7 +385,7 @@ public abstract class DoFn<InputT, OutputT> implements Serializable, HasDisplayD
    * <pre><code>{@literal new DoFn<KV<Key, Foo>, Baz>()} {
    *
    *  {@literal @StateId("my-state-id")}
-   *  {@literal private final StateSpec<K, ValueState<MyState>>} myStateSpec =
+   *  {@literal private final StateSpec<ValueState<MyState>>} myStateSpec =
    *       StateSpecs.value(new MyStateCoder());
    *
    *  {@literal @ProcessElement}


[22/50] [abbrv] beam git commit: [BEAM-1347] Add DoFnRunner specific to Fn Api.

Posted by ta...@apache.org.
[BEAM-1347] Add DoFnRunner specific to Fn Api.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/78a39bd5
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/78a39bd5
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/78a39bd5

Branch: refs/heads/DSL_SQL
Commit: 78a39bd54136ad29a0c8b7fab2dfe895c502e4f5
Parents: 513ccdf
Author: Luke Cwik <lc...@google.com>
Authored: Fri Jun 23 14:34:36 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 sdks/java/harness/pom.xml                       |  10 +
 .../beam/runners/core/FnApiDoFnRunner.java      | 483 ++++++++++++++++---
 .../beam/runners/core/FnApiDoFnRunnerTest.java  |   7 +-
 3 files changed, 438 insertions(+), 62 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/78a39bd5/sdks/java/harness/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/harness/pom.xml b/sdks/java/harness/pom.xml
index 9cfadc2..fe5c2f1 100644
--- a/sdks/java/harness/pom.xml
+++ b/sdks/java/harness/pom.xml
@@ -83,6 +83,11 @@
 
     <dependency>
       <groupId>org.apache.beam</groupId>
+      <artifactId>beam-runners-core-construction-java</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.beam</groupId>
       <artifactId>beam-runners-google-cloud-dataflow-java</artifactId>
     </dependency>
 
@@ -150,6 +155,11 @@
     </dependency>
 
     <dependency>
+      <groupId>joda-time</groupId>
+      <artifactId>joda-time</artifactId>
+    </dependency>
+
+    <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-api</artifactId>
     </dependency>

http://git-wip-us.apache.org/repos/asf/beam/blob/78a39bd5/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java
index adf735a..b3cf3a7 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java
@@ -27,49 +27,59 @@ import com.google.common.collect.Multimap;
 import com.google.protobuf.ByteString;
 import com.google.protobuf.BytesValue;
 import com.google.protobuf.InvalidProtocolBufferException;
-import java.util.ArrayList;
 import java.util.Collection;
 import java.util.HashSet;
+import java.util.Iterator;
 import java.util.Map;
 import java.util.Objects;
 import java.util.function.Consumer;
 import java.util.function.Supplier;
 import org.apache.beam.fn.harness.data.BeamFnDataClient;
-import org.apache.beam.fn.harness.fake.FakeStepContext;
 import org.apache.beam.fn.harness.fn.ThrowingConsumer;
 import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.runners.core.DoFnRunners.OutputManager;
+import org.apache.beam.runners.core.construction.ParDoTranslation;
 import org.apache.beam.runners.dataflow.util.DoFnInfo;
 import org.apache.beam.sdk.common.runner.v1.RunnerApi;
 import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.state.State;
+import org.apache.beam.sdk.state.TimeDomain;
+import org.apache.beam.sdk.state.Timer;
 import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.DoFn.OnTimerContext;
+import org.apache.beam.sdk.transforms.DoFn.ProcessContext;
+import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
+import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
+import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
 import org.apache.beam.sdk.util.SerializableUtils;
+import org.apache.beam.sdk.util.UserCodeException;
 import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.WindowingStrategy;
+import org.joda.time.Instant;
 
 /**
- * Classes associated with converting {@link RunnerApi.PTransform}s to {@link DoFnRunner}s.
- *
- * <p>TODO: Move DoFnRunners into SDK harness and merge the methods below into it removing this
- * class.
+ * A {@link DoFnRunner} specific to integrating with the Fn Api. This is to remove the layers
+ * of abstraction caused by StateInternals/TimerInternals since they model state and timer
+ * concepts differently.
  */
-public class FnApiDoFnRunner {
-
-  private static final String URN = "urn:org.apache.beam:dofn:java:0.1";
-
-  /** A registrar which provides a factory to handle Java {@link DoFn}s. */
+public class FnApiDoFnRunner<InputT, OutputT> implements DoFnRunner<InputT, OutputT> {
+  /**
+   * A registrar which provides a factory to handle Java {@link DoFn}s.
+   */
   @AutoService(PTransformRunnerFactory.Registrar.class)
   public static class Registrar implements
       PTransformRunnerFactory.Registrar {
 
     @Override
     public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
-      return ImmutableMap.of(URN, new Factory());
+      return ImmutableMap.of(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN, new Factory());
     }
   }
 
-  /** A factory for {@link DoFnRunner}s. */
+  /** A factory for {@link FnApiDoFnRunner}. */
   static class Factory<InputT, OutputT>
       implements PTransformRunnerFactory<DoFnRunner<InputT, OutputT>> {
 
@@ -105,9 +115,9 @@ public class FnApiDoFnRunner {
         throw new IllegalArgumentException(
             String.format("Unable to unwrap DoFn %s", pTransform.getSpec()), e);
       }
-      DoFnInfo<?, ?> doFnInfo =
-          (DoFnInfo<?, ?>)
-              SerializableUtils.deserializeFromByteArray(serializedFn.toByteArray(), "DoFnInfo");
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      DoFnInfo<InputT, OutputT> doFnInfo = (DoFnInfo) SerializableUtils.deserializeFromByteArray(
+          serializedFn.toByteArray(), "DoFnInfo");
 
       // Verify that the DoFnInfo tag to output map matches the output map on the PTransform.
       checkArgument(
@@ -119,54 +129,26 @@ public class FnApiDoFnRunner {
           doFnInfo.getOutputMap());
 
       ImmutableMultimap.Builder<TupleTag<?>,
-          ThrowingConsumer<WindowedValue<OutputT>>> tagToOutput =
+          ThrowingConsumer<WindowedValue<?>>> tagToOutputMapBuilder =
           ImmutableMultimap.builder();
       for (Map.Entry<Long, TupleTag<?>> entry : doFnInfo.getOutputMap().entrySet()) {
         @SuppressWarnings({"unchecked", "rawtypes"})
-        Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers =
-            (Collection) outputMap.get(Long.toString(entry.getKey()));
-        tagToOutput.putAll(entry.getValue(), consumers);
+        Collection<ThrowingConsumer<WindowedValue<?>>> consumers =
+            outputMap.get(Long.toString(entry.getKey()));
+        tagToOutputMapBuilder.putAll(entry.getValue(), consumers);
       }
 
+      ImmutableMultimap<TupleTag<?>, ThrowingConsumer<WindowedValue<?>>> tagToOutputMap =
+          tagToOutputMapBuilder.build();
+
       @SuppressWarnings({"unchecked", "rawtypes"})
-      Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tagBasedOutputMap =
-          (Map) tagToOutput.build().asMap();
-
-      OutputManager outputManager =
-          new OutputManager() {
-            Map<TupleTag<?>, Collection<ThrowingConsumer<WindowedValue<?>>>> tupleTagToOutput =
-                tagBasedOutputMap;
-
-            @Override
-            public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
-              try {
-                Collection<ThrowingConsumer<WindowedValue<?>>> consumers =
-                    tupleTagToOutput.get(tag);
-                if (consumers == null) {
-                    /* This is a normal case, e.g., if a DoFn has output but that output is not
-                     * consumed. Drop the output. */
-                  return;
-                }
-                for (ThrowingConsumer<WindowedValue<?>> consumer : consumers) {
-                  consumer.accept(output);
-                }
-              } catch (Throwable t) {
-                throw new RuntimeException(t);
-              }
-            }
-          };
-
-      @SuppressWarnings({"unchecked", "rawtypes", "deprecation"})
-      DoFnRunner<InputT, OutputT> runner =
-          DoFnRunners.simpleRunner(
-              pipelineOptions,
-              (DoFn) doFnInfo.getDoFn(),
-              NullSideInputReader.empty(), /* TODO */
-              outputManager,
-              (TupleTag) doFnInfo.getOutputMap().get(doFnInfo.getMainOutput()),
-              new ArrayList<>(doFnInfo.getOutputMap().values()),
-              new FakeStepContext(),
-              (WindowingStrategy) doFnInfo.getWindowingStrategy());
+      DoFnRunner<InputT, OutputT> runner = new FnApiDoFnRunner<>(
+          pipelineOptions,
+          doFnInfo.getDoFn(),
+          (Collection<ThrowingConsumer<WindowedValue<OutputT>>>) (Collection)
+              tagToOutputMap.get(doFnInfo.getOutputMap().get(doFnInfo.getMainOutput())),
+          tagToOutputMap,
+          doFnInfo.getWindowingStrategy());
 
       // Register the appropriate handlers.
       addStartFunction.accept(runner::startBundle);
@@ -179,4 +161,387 @@ public class FnApiDoFnRunner {
       return runner;
     }
   }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  private final PipelineOptions pipelineOptions;
+  private final DoFn<InputT, OutputT> doFn;
+  private final Collection<ThrowingConsumer<WindowedValue<OutputT>>> mainOutputConsumers;
+  private final Multimap<TupleTag<?>, ThrowingConsumer<WindowedValue<?>>> outputMap;
+  private final DoFnInvoker<InputT, OutputT> doFnInvoker;
+  private final StartBundleContext startBundleContext;
+  private final ProcessBundleContext processBundleContext;
+  private final FinishBundleContext finishBundleContext;
+
+  /**
+   * The lifetime of this member is only valid during {@link #processElement(WindowedValue)}.
+   */
+  private WindowedValue<InputT> currentElement;
+
+  /**
+   * The lifetime of this member is only valid during {@link #processElement(WindowedValue)}.
+   */
+  private BoundedWindow currentWindow;
+
+  FnApiDoFnRunner(
+      PipelineOptions pipelineOptions,
+      DoFn<InputT, OutputT> doFn,
+      Collection<ThrowingConsumer<WindowedValue<OutputT>>> mainOutputConsumers,
+      Multimap<TupleTag<?>, ThrowingConsumer<WindowedValue<?>>> outputMap,
+      WindowingStrategy windowingStrategy) {
+    this.pipelineOptions = pipelineOptions;
+    this.doFn = doFn;
+    this.mainOutputConsumers = mainOutputConsumers;
+    this.outputMap = outputMap;
+    this.doFnInvoker = DoFnInvokers.invokerFor(doFn);
+    this.startBundleContext = new StartBundleContext();
+    this.processBundleContext = new ProcessBundleContext();
+    this.finishBundleContext = new FinishBundleContext();
+  }
+
+  @Override
+  public void startBundle() {
+    doFnInvoker.invokeStartBundle(startBundleContext);
+  }
+
+  @Override
+  public void processElement(WindowedValue<InputT> elem) {
+    currentElement = elem;
+    try {
+      Iterator<BoundedWindow> windowIterator =
+          (Iterator<BoundedWindow>) elem.getWindows().iterator();
+      while (windowIterator.hasNext()) {
+        currentWindow = windowIterator.next();
+        doFnInvoker.invokeProcessElement(processBundleContext);
+      }
+    } finally {
+      currentElement = null;
+      currentWindow = null;
+    }
+  }
+
+  @Override
+  public void onTimer(
+      String timerId,
+      BoundedWindow window,
+      Instant timestamp,
+      TimeDomain timeDomain) {
+    throw new UnsupportedOperationException("TODO: Add support for timers");
+  }
+
+  @Override
+  public void finishBundle() {
+    doFnInvoker.invokeFinishBundle(finishBundleContext);
+  }
+
+  /**
+   * Outputs the given element to the specified set of consumers wrapping any exceptions.
+   */
+  private <T> void outputTo(
+      Collection<ThrowingConsumer<WindowedValue<T>>> consumers,
+      WindowedValue<T> output) {
+    Iterator<ThrowingConsumer<WindowedValue<T>>> consumerIterator;
+    try {
+      for (ThrowingConsumer<WindowedValue<T>> consumer : consumers) {
+        consumer.accept(output);
+      }
+    } catch (Throwable t) {
+      throw UserCodeException.wrap(t);
+    }
+  }
+
+  /**
+   * Provides arguments for a {@link DoFnInvoker} for {@link DoFn.StartBundle @StartBundle}.
+   */
+  private class StartBundleContext
+      extends DoFn<InputT, OutputT>.StartBundleContext
+      implements DoFnInvoker.ArgumentProvider<InputT, OutputT> {
+
+    private StartBundleContext() {
+      doFn.super();
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public PipelineOptions pipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public BoundedWindow window() {
+      throw new UnsupportedOperationException(
+          "Cannot access window outside of @ProcessElement and @OnTimer methods.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.StartBundleContext startBundleContext(
+        DoFn<InputT, OutputT> doFn) {
+      return this;
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.FinishBundleContext finishBundleContext(
+        DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access FinishBundleContext outside of @FinishBundle method.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.ProcessContext processContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access ProcessContext outside of @ProcessElement method.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.OnTimerContext onTimerContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access OnTimerContext outside of @OnTimer methods.");
+    }
+
+    @Override
+    public RestrictionTracker<?> restrictionTracker() {
+      throw new UnsupportedOperationException(
+          "Cannot access RestrictionTracker outside of @ProcessElement method.");
+    }
+
+    @Override
+    public State state(String stateId) {
+      throw new UnsupportedOperationException(
+          "Cannot access state outside of @ProcessElement and @OnTimer methods.");
+    }
+
+    @Override
+    public Timer timer(String timerId) {
+      throw new UnsupportedOperationException(
+          "Cannot access timers outside of @ProcessElement and @OnTimer methods.");
+    }
+  }
+
+  /**
+   * Provides arguments for a {@link DoFnInvoker} for {@link DoFn.ProcessElement @ProcessElement}.
+   */
+  private class ProcessBundleContext
+      extends DoFn<InputT, OutputT>.ProcessContext
+      implements DoFnInvoker.ArgumentProvider<InputT, OutputT> {
+
+    private ProcessBundleContext() {
+      doFn.super();
+    }
+
+    @Override
+    public BoundedWindow window() {
+      return currentWindow;
+    }
+
+    @Override
+    public DoFn.StartBundleContext startBundleContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access StartBundleContext outside of @StartBundle method.");
+    }
+
+    @Override
+    public DoFn.FinishBundleContext finishBundleContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access FinishBundleContext outside of @FinishBundle method.");
+    }
+
+    @Override
+    public ProcessContext processContext(DoFn<InputT, OutputT> doFn) {
+      return this;
+    }
+
+    @Override
+    public OnTimerContext onTimerContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException("TODO: Add support for timers");
+    }
+
+    @Override
+    public RestrictionTracker<?> restrictionTracker() {
+      throw new UnsupportedOperationException("TODO: Add support for SplittableDoFn");
+    }
+
+    @Override
+    public State state(String stateId) {
+      throw new UnsupportedOperationException("TODO: Add support for state");
+    }
+
+    @Override
+    public Timer timer(String timerId) {
+      throw new UnsupportedOperationException("TODO: Add support for timers");
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public PipelineOptions pipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public void output(OutputT output) {
+      outputTo(mainOutputConsumers,
+          WindowedValue.of(
+              output,
+              currentElement.getTimestamp(),
+              currentWindow,
+              currentElement.getPane()));
+    }
+
+    @Override
+    public void outputWithTimestamp(OutputT output, Instant timestamp) {
+      outputTo(mainOutputConsumers,
+          WindowedValue.of(
+              output,
+              timestamp,
+              currentWindow,
+              currentElement.getPane()));
+    }
+
+    @Override
+    public <T> void output(TupleTag<T> tag, T output) {
+      Collection<ThrowingConsumer<WindowedValue<T>>> consumers = (Collection) outputMap.get(tag);
+      if (consumers == null) {
+        throw new IllegalArgumentException(String.format("Unknown output tag %s", tag));
+      }
+      outputTo(consumers,
+          WindowedValue.of(
+              output,
+              currentElement.getTimestamp(),
+              currentWindow,
+              currentElement.getPane()));
+    }
+
+    @Override
+    public <T> void outputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      Collection<ThrowingConsumer<WindowedValue<T>>> consumers = (Collection) outputMap.get(tag);
+      if (consumers == null) {
+        throw new IllegalArgumentException(String.format("Unknown output tag %s", tag));
+      }
+      outputTo(consumers,
+          WindowedValue.of(
+              output,
+              timestamp,
+              currentWindow,
+              currentElement.getPane()));
+    }
+
+    @Override
+    public InputT element() {
+      return currentElement.getValue();
+    }
+
+    @Override
+    public <T> T sideInput(PCollectionView<T> view) {
+      throw new UnsupportedOperationException("TODO: Support side inputs");
+    }
+
+    @Override
+    public Instant timestamp() {
+      return currentElement.getTimestamp();
+    }
+
+    @Override
+    public PaneInfo pane() {
+      return currentElement.getPane();
+    }
+
+    @Override
+    public void updateWatermark(Instant watermark) {
+      throw new UnsupportedOperationException("TODO: Add support for SplittableDoFn");
+    }
+  }
+
+  /**
+   * Provides arguments for a {@link DoFnInvoker} for {@link DoFn.FinishBundle @FinishBundle}.
+   */
+  private class FinishBundleContext
+      extends DoFn<InputT, OutputT>.FinishBundleContext
+      implements DoFnInvoker.ArgumentProvider<InputT, OutputT> {
+
+    private FinishBundleContext() {
+      doFn.super();
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public PipelineOptions pipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public BoundedWindow window() {
+      throw new UnsupportedOperationException(
+          "Cannot access window outside of @ProcessElement and @OnTimer methods.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.StartBundleContext startBundleContext(
+        DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access StartBundleContext outside of @StartBundle method.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.FinishBundleContext finishBundleContext(
+        DoFn<InputT, OutputT> doFn) {
+      return this;
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.ProcessContext processContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access ProcessContext outside of @ProcessElement method.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.OnTimerContext onTimerContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access OnTimerContext outside of @OnTimer methods.");
+    }
+
+    @Override
+    public RestrictionTracker<?> restrictionTracker() {
+      throw new UnsupportedOperationException(
+          "Cannot access RestrictionTracker outside of @ProcessElement method.");
+    }
+
+    @Override
+    public State state(String stateId) {
+      throw new UnsupportedOperationException(
+          "Cannot access state outside of @ProcessElement and @OnTimer methods.");
+    }
+
+    @Override
+    public Timer timer(String timerId) {
+      throw new UnsupportedOperationException(
+          "Cannot access timers outside of @ProcessElement and @OnTimer methods.");
+    }
+
+    @Override
+    public void output(OutputT output, Instant timestamp, BoundedWindow window) {
+      outputTo(mainOutputConsumers,
+          WindowedValue.of(output, timestamp, window, PaneInfo.NO_FIRING));
+    }
+
+    @Override
+    public <T> void output(TupleTag<T> tag, T output, Instant timestamp, BoundedWindow window) {
+      Collection<ThrowingConsumer<WindowedValue<T>>> consumers = (Collection) outputMap.get(tag);
+      if (consumers == null) {
+        throw new IllegalArgumentException(String.format("Unknown output tag %s", tag));
+      }
+      outputTo(consumers,
+          WindowedValue.of(output, timestamp, window, PaneInfo.NO_FIRING));
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/78a39bd5/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java
index ae5cbac..c4df77a 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java
@@ -44,6 +44,7 @@ import java.util.ServiceLoader;
 import org.apache.beam.fn.harness.fn.ThrowingConsumer;
 import org.apache.beam.fn.harness.fn.ThrowingRunnable;
 import org.apache.beam.runners.core.PTransformRunnerFactory.Registrar;
+import org.apache.beam.runners.core.construction.ParDoTranslation;
 import org.apache.beam.runners.dataflow.util.CloudObjects;
 import org.apache.beam.runners.dataflow.util.DoFnInfo;
 import org.apache.beam.sdk.coders.Coder;
@@ -71,7 +72,6 @@ public class FnApiDoFnRunnerTest {
       WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE);
   private static final String STRING_CODER_SPEC_ID = "999L";
   private static final RunnerApi.Coder STRING_CODER_SPEC;
-  private static final String URN = "urn:org.apache.beam:dofn:java:0.1";
 
   static {
     try {
@@ -132,7 +132,7 @@ public class FnApiDoFnRunnerTest {
             Long.parseLong(mainOutputId), TestDoFn.mainOutput,
             Long.parseLong(additionalOutputId), TestDoFn.additionalOutput));
     RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder()
-        .setUrn("urn:org.apache.beam:dofn:java:0.1")
+        .setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN)
         .setParameter(Any.pack(BytesValue.newBuilder()
             .setValue(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnInfo)))
             .build()))
@@ -200,7 +200,8 @@ public class FnApiDoFnRunnerTest {
     for (Registrar registrar :
         ServiceLoader.load(Registrar.class)) {
       if (registrar instanceof FnApiDoFnRunner.Registrar) {
-        assertThat(registrar.getPTransformRunnerFactories(), IsMapContaining.hasKey(URN));
+        assertThat(registrar.getPTransformRunnerFactories(),
+            IsMapContaining.hasKey(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN));
         return;
       }
     }


[16/50] [abbrv] beam git commit: Simplified ByteBuddyOnTimerInvokerFactory

Posted by ta...@apache.org.
Simplified ByteBuddyOnTimerInvokerFactory


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/8512153b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/8512153b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/8512153b

Branch: refs/heads/DSL_SQL
Commit: 8512153b24bf13f5ba6e0298eeb8629ab2875da4
Parents: 02774b9
Author: Innocent Djiofack <dj...@gmail.com>
Authored: Wed Jun 28 22:15:11 2017 -0400
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 .../reflect/ByteBuddyOnTimerInvokerFactory.java | 73 ++++++++------------
 .../reflect/OnTimerMethodSpecifier.java         | 37 ++++++++++
 2 files changed, 65 insertions(+), 45 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/8512153b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyOnTimerInvokerFactory.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyOnTimerInvokerFactory.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyOnTimerInvokerFactory.java
index e031337..5e31f2e 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyOnTimerInvokerFactory.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/ByteBuddyOnTimerInvokerFactory.java
@@ -17,6 +17,7 @@
  */
 package org.apache.beam.sdk.transforms.reflect;
 
+
 import com.google.common.base.CharMatcher;
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
@@ -61,13 +62,14 @@ class ByteBuddyOnTimerInvokerFactory implements OnTimerInvokerFactory {
 
     @SuppressWarnings("unchecked")
     Class<? extends DoFn<?, ?>> fnClass = (Class<? extends DoFn<?, ?>>) fn.getClass();
-
     try {
-      Constructor<?> constructor = constructorCache.get(fnClass).get(timerId);
-      @SuppressWarnings("unchecked")
-      OnTimerInvoker<InputT, OutputT> invoker =
+        OnTimerMethodSpecifier onTimerMethodSpecifier =
+                OnTimerMethodSpecifier.forClassAndTimerId(fnClass, timerId);
+        Constructor<?> constructor = constructorCache.get(onTimerMethodSpecifier);
+
+        OnTimerInvoker<InputT, OutputT> invoker =
           (OnTimerInvoker<InputT, OutputT>) constructor.newInstance(fn);
-      return invoker;
+        return invoker;
     } catch (InstantiationException
         | IllegalAccessException
         | IllegalArgumentException
@@ -97,50 +99,31 @@ class ByteBuddyOnTimerInvokerFactory implements OnTimerInvokerFactory {
   private static final String FN_DELEGATE_FIELD_NAME = "delegate";
 
   /**
-   * A cache of constructors of generated {@link OnTimerInvoker} classes, keyed by {@link DoFn}
-   * class and then by {@link TimerId}.
+   * A cache of constructors of generated {@link OnTimerInvoker} classes,
+   * keyed by {@link OnTimerMethodSpecifier}.
    *
    * <p>Needed because generating an invoker class is expensive, and to avoid generating an
    * excessive number of classes consuming PermGen memory in Java's that still have PermGen.
    */
-  private final LoadingCache<Class<? extends DoFn<?, ?>>, LoadingCache<String, Constructor<?>>>
-      constructorCache =
-          CacheBuilder.newBuilder()
-              .build(
-                  new CacheLoader<
-                      Class<? extends DoFn<?, ?>>, LoadingCache<String, Constructor<?>>>() {
-                    @Override
-                    public LoadingCache<String, Constructor<?>> load(
-                        final Class<? extends DoFn<?, ?>> fnClass) throws Exception {
-                      return CacheBuilder.newBuilder().build(new OnTimerConstructorLoader(fnClass));
-                    }
-                  });
-
-  /**
-   * A cache loader fixed to a particular {@link DoFn} class that loads constructors for the
-   * invokers for its {@link OnTimer @OnTimer} methods.
-   */
-  private static class OnTimerConstructorLoader extends CacheLoader<String, Constructor<?>> {
-
-    private final DoFnSignature signature;
-
-    public OnTimerConstructorLoader(Class<? extends DoFn<?, ?>> clazz) {
-      this.signature = DoFnSignatures.getSignature(clazz);
-    }
-
-    @Override
-    public Constructor<?> load(String timerId) throws Exception {
-      Class<? extends OnTimerInvoker<?, ?>> invokerClass =
-          generateOnTimerInvokerClass(signature, timerId);
-      try {
-        return invokerClass.getConstructor(signature.fnClass());
-      } catch (IllegalArgumentException | NoSuchMethodException | SecurityException e) {
-        throw new RuntimeException(e);
-      }
-    }
-  }
-
-  /**
+  private final LoadingCache<OnTimerMethodSpecifier, Constructor<?>> constructorCache =
+          CacheBuilder.newBuilder().build(
+          new CacheLoader<OnTimerMethodSpecifier, Constructor<?>>() {
+              @Override
+              public Constructor<?> load(final OnTimerMethodSpecifier onTimerMethodSpecifier)
+                      throws Exception {
+                  DoFnSignature signature =
+                          DoFnSignatures.getSignature(onTimerMethodSpecifier.fnClass());
+                  Class<? extends OnTimerInvoker<?, ?>> invokerClass =
+                          generateOnTimerInvokerClass(signature, onTimerMethodSpecifier.timerId());
+                  try {
+                      return invokerClass.getConstructor(signature.fnClass());
+                  } catch (IllegalArgumentException | NoSuchMethodException | SecurityException e) {
+                      throw new RuntimeException(e);
+                  }
+
+              }
+          });
+    /**
    * Generates a {@link OnTimerInvoker} class for the given {@link DoFnSignature} and {@link
    * TimerId}.
    */

http://git-wip-us.apache.org/repos/asf/beam/blob/8512153b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/OnTimerMethodSpecifier.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/OnTimerMethodSpecifier.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/OnTimerMethodSpecifier.java
new file mode 100644
index 0000000..edf7e3c
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/OnTimerMethodSpecifier.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.transforms.reflect;
+
+import com.google.auto.value.AutoValue;
+import org.apache.beam.sdk.transforms.DoFn;
+
+/**
+ * Used by {@link ByteBuddyOnTimerInvokerFactory} to Dynamically generate
+ * {@link OnTimerInvoker} instances for invoking a particular
+ * {@link DoFn.TimerId} on a particular {@link DoFn}.
+ */
+
+@AutoValue
+abstract class OnTimerMethodSpecifier {
+    public abstract Class<? extends DoFn<?, ?>> fnClass();
+    public abstract String timerId();
+    public static OnTimerMethodSpecifier
+    forClassAndTimerId(Class<? extends DoFn<?, ?>> fnClass, String timerId){
+        return  new AutoValue_OnTimerMethodSpecifier(fnClass, timerId);
+    }
+}


[06/50] [abbrv] beam git commit: Website Mergebot Job

Posted by ta...@apache.org.
Website Mergebot Job

Signed-off-by: Jason Kuster <ja...@google.com>


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f2c337cc
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f2c337cc
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f2c337cc

Branch: refs/heads/DSL_SQL
Commit: f2c337cc006101de050781a50ee70ad940dbf28e
Parents: a32db07
Author: Jason Kuster <ja...@google.com>
Authored: Fri Jun 9 01:39:15 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:59 2017 -0700

----------------------------------------------------------------------
 .../jenkins/common_job_properties.groovy        |  5 +-
 .../job_beam_PreCommit_Website_Merge.groovy     | 59 ++++++++++++++++++++
 2 files changed, 62 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/f2c337cc/.test-infra/jenkins/common_job_properties.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/common_job_properties.groovy b/.test-infra/jenkins/common_job_properties.groovy
index 0e047ea..70534c6 100644
--- a/.test-infra/jenkins/common_job_properties.groovy
+++ b/.test-infra/jenkins/common_job_properties.groovy
@@ -23,11 +23,12 @@
 class common_job_properties {
 
   // Sets common top-level job properties for website repository jobs.
-  static void setTopLevelWebsiteJobProperties(context) {
+  static void setTopLevelWebsiteJobProperties(context,
+                                              String branch = 'asf-site') {
     setTopLevelJobProperties(
             context,
             'beam-site',
-            'asf-site',
+            branch,
             'beam',
             30)
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/f2c337cc/.test-infra/jenkins/job_beam_PreCommit_Website_Merge.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PreCommit_Website_Merge.groovy b/.test-infra/jenkins/job_beam_PreCommit_Website_Merge.groovy
new file mode 100644
index 0000000..0e2ae3f
--- /dev/null
+++ b/.test-infra/jenkins/job_beam_PreCommit_Website_Merge.groovy
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import common_job_properties
+
+// Defines a job.
+job('beam_PreCommit_Website_Merge') {
+  description('Runs website tests for mergebot.')
+
+  // Set common parameters.
+  common_job_properties.setTopLevelWebsiteJobProperties(delegate, 'mergebot')
+
+  triggers {
+    githubPush()
+  }
+
+  steps {
+    // Run the following shell script as a build step.
+    shell '''
+        # Install RVM per instructions at https://rvm.io/rvm/install.
+        RVM_GPG_KEY=409B6B1796C275462A1703113804BB82D39DC0E3
+        gpg --keyserver hkp://keys.gnupg.net --recv-keys $RVM_GPG_KEY
+            
+        \\curl -sSL https://get.rvm.io | bash
+        source /home/jenkins/.rvm/scripts/rvm
+
+        # Install Ruby.
+        RUBY_VERSION_NUM=2.3.0
+        rvm install ruby $RUBY_VERSION_NUM --autolibs=read-only
+
+        # Install Bundler gem
+        PATH=~/.gem/ruby/$RUBY_VERSION_NUM/bin:$PATH
+        GEM_PATH=~/.gem/ruby/$RUBY_VERSION_NUM/:$GEM_PATH
+        gem install bundler --user-install
+
+        # Install all needed gems.
+        bundle install --path ~/.gem/
+
+        # Build the new site and test it.
+        rm -fr ./content/
+        bundle exec rake test
+    '''.stripIndent().trim()
+  }
+}


[03/50] [abbrv] beam git commit: [BEAM-2534] Handle offset gaps in Kafka messages.

Posted by ta...@apache.org.
[BEAM-2534] Handle offset gaps in Kafka messages.

KafkaIO logged a warning when there is a gap in offstes for messages.
Kafka also support 'KV' store style topics where some of the messages
are deleted leading gaps in offsets. This PR removes the log and
accounts for offset gaps in backlog estimate.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/48627038
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/48627038
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/48627038

Branch: refs/heads/DSL_SQL
Commit: 48627038a331a4f142d260ebf347693941113b75
Parents: da3206c
Author: Raghu Angadi <ra...@google.com>
Authored: Wed Jun 28 12:07:06 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:00:59 2017 -0700

----------------------------------------------------------------------
 .../org/apache/beam/sdk/io/kafka/KafkaIO.java   | 49 ++++++++++++--------
 1 file changed, 29 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/48627038/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
index 702bdd3..e520367 100644
--- a/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
+++ b/sdks/java/io/kafka/src/main/java/org/apache/beam/sdk/io/kafka/KafkaIO.java
@@ -904,6 +904,22 @@ public class KafkaIO {
       return name;
     }
 
+    // Maintains approximate average over last 1000 elements
+    private static class MovingAvg {
+      private static final int MOVING_AVG_WINDOW = 1000;
+      private double avg = 0;
+      private long numUpdates = 0;
+
+      void update(double quantity) {
+        numUpdates++;
+        avg += (quantity - avg) / Math.min(MOVING_AVG_WINDOW, numUpdates);
+      }
+
+      double get() {
+        return avg;
+      }
+    }
+
     // maintains state of each assigned partition (buffered records, consumed offset, etc)
     private static class PartitionState {
       private final TopicPartition topicPartition;
@@ -911,9 +927,8 @@ public class KafkaIO {
       private long latestOffset;
       private Iterator<ConsumerRecord<byte[], byte[]>> recordIter = Collections.emptyIterator();
 
-      // simple moving average for size of each record in bytes
-      private double avgRecordSize = 0;
-      private static final int movingAvgWindow = 1000; // very roughly avg of last 1000 elements
+      private MovingAvg avgRecordSize = new MovingAvg();
+      private MovingAvg avgOffsetGap = new MovingAvg(); // > 0 only when log compaction is enabled.
 
       PartitionState(TopicPartition partition, long nextOffset) {
         this.topicPartition = partition;
@@ -921,17 +936,13 @@ public class KafkaIO {
         this.latestOffset = UNINITIALIZED_OFFSET;
       }
 
-      // update consumedOffset and avgRecordSize
-      void recordConsumed(long offset, int size) {
+      // Update consumedOffset, avgRecordSize, and avgOffsetGap
+      void recordConsumed(long offset, int size, long offsetGap) {
         nextOffset = offset + 1;
 
-        // this is always updated from single thread. probably not worth making it an AtomicDouble
-        if (avgRecordSize <= 0) {
-          avgRecordSize = size;
-        } else {
-          // initially, first record heavily contributes to average.
-          avgRecordSize += ((size - avgRecordSize) / movingAvgWindow);
-        }
+        // This is always updated from single thread. Probably not worth making atomic.
+        avgRecordSize.update(size);
+        avgOffsetGap.update(offsetGap);
       }
 
       synchronized void setLatestOffset(long latestOffset) {
@@ -944,14 +955,15 @@ public class KafkaIO {
         if (backlogMessageCount == UnboundedReader.BACKLOG_UNKNOWN) {
           return UnboundedReader.BACKLOG_UNKNOWN;
         }
-        return (long) (backlogMessageCount * avgRecordSize);
+        return (long) (backlogMessageCount * avgRecordSize.get());
       }
 
       synchronized long backlogMessageCount() {
         if (latestOffset < 0 || nextOffset < 0) {
           return UnboundedReader.BACKLOG_UNKNOWN;
         }
-        return Math.max(0, (latestOffset - nextOffset));
+        double remaining = (latestOffset - nextOffset) / (1 + avgOffsetGap.get());
+        return Math.max(0, (long) Math.ceil(remaining));
       }
     }
 
@@ -1154,14 +1166,11 @@ public class KafkaIO {
             continue;
           }
 
-          // sanity check
-          if (offset != expected) {
-            LOG.warn("{}: gap in offsets for {} at {}. {} records missing.",
-                this, pState.topicPartition, expected, offset - expected);
-          }
+          long offsetGap = offset - expected; // could be > 0 when Kafka log compaction is enabled.
 
           if (curRecord == null) {
             LOG.info("{}: first record offset {}", name, offset);
+            offsetGap = 0;
           }
 
           curRecord = null; // user coders below might throw.
@@ -1182,7 +1191,7 @@ public class KafkaIO {
 
           int recordSize = (rawRecord.key() == null ? 0 : rawRecord.key().length)
               + (rawRecord.value() == null ? 0 : rawRecord.value().length);
-          pState.recordConsumed(offset, recordSize);
+          pState.recordConsumed(offset, recordSize, offsetGap);
           bytesRead.inc(recordSize);
           bytesReadBySplit.inc(recordSize);
           return true;


[15/50] [abbrv] beam git commit: Made DataflowRunner TransformTranslator public

Posted by ta...@apache.org.
Made DataflowRunner TransformTranslator public


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/5e0f2587
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/5e0f2587
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/5e0f2587

Branch: refs/heads/DSL_SQL
Commit: 5e0f2587b95016d0bbf0a1adcebf55ceb7cbc111
Parents: 7b4fa89
Author: Jeremie Lenfant-Engelmann <je...@google.com>
Authored: Wed Jun 28 16:11:21 2017 -0700
Committer: Tyler Akidau <ta...@apache.org>
Committed: Wed Jul 12 20:01:00 2017 -0700

----------------------------------------------------------------------
 .../org/apache/beam/runners/dataflow/TransformTranslator.java     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/5e0f2587/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/TransformTranslator.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/TransformTranslator.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/TransformTranslator.java
index a7452b2..7f61b6c 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/TransformTranslator.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/TransformTranslator.java
@@ -36,7 +36,8 @@ import org.apache.beam.sdk.values.TupleTag;
  * A {@link TransformTranslator} knows how to translate a particular subclass of {@link PTransform}
  * for the Cloud Dataflow service. It does so by mutating the {@link TranslationContext}.
  */
-interface TransformTranslator<TransformT extends PTransform> {
+@Internal
+public interface TransformTranslator<TransformT extends PTransform> {
   void translate(TransformT transform, TranslationContext context);
 
   /**