You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@beam.apache.org by dh...@apache.org on 2017/04/19 19:14:35 UTC

[01/50] [abbrv] beam git commit: Changed snappy version to 1.1.4-M3

Repository: beam
Updated Branches:
  refs/heads/DSL_SQL ca8760373 -> aa07a1d41


Changed snappy version to 1.1.4-M3


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/09e0f776
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/09e0f776
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/09e0f776

Branch: refs/heads/DSL_SQL
Commit: 09e0f77657d2673b8f5a78022c8f90ded51799ff
Parents: d988150
Author: Vassil Kolarov <va...@vas.io>
Authored: Wed Mar 29 15:02:36 2017 +0100
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Apr 18 17:57:56 2017 -0700

----------------------------------------------------------------------
 sdks/java/core/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/09e0f776/sdks/java/core/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/core/pom.xml b/sdks/java/core/pom.xml
index 4ba8e3b..2b12481 100644
--- a/sdks/java/core/pom.xml
+++ b/sdks/java/core/pom.xml
@@ -241,7 +241,7 @@
     <dependency>
       <groupId>org.xerial.snappy</groupId>
       <artifactId>snappy-java</artifactId>
-      <version>1.1.2.1</version>
+      <version>1.1.4-M3</version>
     </dependency>
 
     <dependency>

[06/50] [abbrv] beam git commit: Extracts interface from PushbackSideInputDoFnRunner

Posted by dh...@apache.org.

Extracts interface from PushbackSideInputDoFnRunner


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7e1a2675
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7e1a2675
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7e1a2675

Branch: refs/heads/DSL_SQL
Commit: 7e1a2675699ef14291e8c112010be66fff4b8581
Parents: 1cc16b0
Author: Eugene Kirpichov <ki...@google.com>
Authored: Mon Apr 17 14:41:53 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 18:02:06 2017 -0700

----------------------------------------------------------------------
 .../operators/ApexParDoOperator.java            |   3 +-
 .../core/PushbackSideInputDoFnRunner.java       | 106 +------
 .../core/SimplePushbackSideInputDoFnRunner.java | 115 ++++++++
 .../core/PushbackSideInputDoFnRunnerTest.java   | 282 -------------------
 .../SimplePushbackSideInputDoFnRunnerTest.java  | 282 +++++++++++++++++++
 .../beam/runners/direct/ParDoEvaluator.java     |   3 +-
 .../wrappers/streaming/DoFnOperator.java        |  12 +-
 .../streaming/SplittableDoFnOperator.java       |   2 +-
 .../wrappers/streaming/WindowDoFnOperator.java  |   2 +-
 9 files changed, 424 insertions(+), 383 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/7e1a2675/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java
----------------------------------------------------------------------
diff --git a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java
index bad5be2..52d1d43 100644
--- a/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java
+++ b/runners/apex/src/main/java/org/apache/beam/runners/apex/translation/operators/ApexParDoOperator.java
@@ -48,6 +48,7 @@ import org.apache.beam.runners.core.InMemoryTimerInternals;
 import org.apache.beam.runners.core.KeyedWorkItem;
 import org.apache.beam.runners.core.PushbackSideInputDoFnRunner;
 import org.apache.beam.runners.core.SideInputHandler;
+import org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner;
 import org.apache.beam.runners.core.StateInternals;
 import org.apache.beam.runners.core.StateNamespace;
 import org.apache.beam.runners.core.StatefulDoFnRunner;
@@ -368,7 +369,7 @@ public class ApexParDoOperator<InputT, OutputT> extends BaseOperator implements
     }
 
     pushbackDoFnRunner =
-        PushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
+        SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
 
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/7e1a2675/runners/core-java/src/main/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunner.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunner.java
index 4ad20b5..bab1dc7 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunner.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunner.java
@@ -17,113 +17,35 @@
  */
 package org.apache.beam.runners.core;
 
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
+import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.util.ReadyCheckingSideInputReader;
 import org.apache.beam.sdk.util.TimeDomain;
 import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.PCollectionView;
 import org.joda.time.Instant;
 
 /**
- * A {@link DoFnRunner} that can refuse to process elements that are not ready, instead returning
- * them via the {@link #processElementInReadyWindows(WindowedValue)}.
+ * Interface for runners of {@link DoFn}'s that support pushback when reading side inputs,
+ * i.e. return elements that could not be processed because they require reading a side input
+ * window that is not ready.
  */
-public class PushbackSideInputDoFnRunner<InputT, OutputT> implements DoFnRunner<InputT, OutputT> {
-  private final DoFnRunner<InputT, OutputT> underlying;
-  private final Collection<PCollectionView<?>> views;
-  private final ReadyCheckingSideInputReader sideInputReader;
-
-  private Set<BoundedWindow> notReadyWindows;
-
-  public static <InputT, OutputT> PushbackSideInputDoFnRunner<InputT, OutputT> create(
-      DoFnRunner<InputT, OutputT> underlying,
-      Collection<PCollectionView<?>> views,
-      ReadyCheckingSideInputReader sideInputReader) {
-    return new PushbackSideInputDoFnRunner<>(underlying, views, sideInputReader);
-  }
-
-  private PushbackSideInputDoFnRunner(
-      DoFnRunner<InputT, OutputT> underlying,
-      Collection<PCollectionView<?>> views,
-      ReadyCheckingSideInputReader sideInputReader) {
-    this.underlying = underlying;
-    this.views = views;
-    this.sideInputReader = sideInputReader;
-  }
-
-  @Override
-  public void startBundle() {
-    notReadyWindows = new HashSet<>();
-    underlying.startBundle();
-  }
+public interface PushbackSideInputDoFnRunner<InputT, OutputT> {
+  /** Calls the underlying {@link DoFn.StartBundle} method. */
+  void startBundle();
 
   /**
-   * Call the underlying {@link DoFnRunner#processElement(WindowedValue)} for the provided element
+   * Call the underlying {@link DoFn.ProcessElement} method for the provided element
    * for each window the element is in that is ready.
    *
    * @param elem the element to process in all ready windows
    * @return each element that could not be processed because it requires a side input window
    * that is not ready.
    */
-  public Iterable<WindowedValue<InputT>> processElementInReadyWindows(WindowedValue<InputT> elem) {
-    if (views.isEmpty()) {
-      // When there are no side inputs, we can preserve the compressed representation.
-      processElement(elem);
-      return Collections.emptyList();
-    }
-    ImmutableList.Builder<WindowedValue<InputT>> pushedBack = ImmutableList.builder();
-    for (WindowedValue<InputT> windowElem : elem.explodeWindows()) {
-      BoundedWindow mainInputWindow = Iterables.getOnlyElement(windowElem.getWindows());
-      if (isReady(mainInputWindow)) {
-        // When there are any side inputs, we have to process the element in each window
-        // individually, to disambiguate access to per-window side inputs.
-        processElement(windowElem);
-      } else {
-        notReadyWindows.add(mainInputWindow);
-        pushedBack.add(windowElem);
-      }
-    }
-    return pushedBack.build();
-  }
-
-  private boolean isReady(BoundedWindow mainInputWindow) {
-    if (notReadyWindows.contains(mainInputWindow)) {
-      return false;
-    }
-    for (PCollectionView<?> view : views) {
-      BoundedWindow sideInputWindow =
-          view.getWindowMappingFn().getSideInputWindow(mainInputWindow);
-      if (!sideInputReader.isReady(view, sideInputWindow)) {
-        return false;
-      }
-    }
-    return true;
-  }
+  Iterable<WindowedValue<InputT>> processElementInReadyWindows(WindowedValue<InputT> elem);
 
-  @Override
-  public void processElement(WindowedValue<InputT> elem) {
-    underlying.processElement(elem);
-  }
+  /** Calls the underlying {@link DoFn.OnTimer} method. */
+  void onTimer(String timerId, BoundedWindow window, Instant timestamp,
+               TimeDomain timeDomain);
 
-  @Override
-  public void onTimer(String timerId, BoundedWindow window, Instant timestamp,
-      TimeDomain timeDomain) {
-    underlying.onTimer(timerId, window, timestamp, timeDomain);
-  }
-
-  /**
-   * Call the underlying {@link DoFnRunner#finishBundle()}.
-   */
-  @Override
-  public void finishBundle() {
-    notReadyWindows = null;
-    underlying.finishBundle();
-  }
+  /** Calls the underlying {@link DoFn.FinishBundle} method. */
+  void finishBundle();
 }
-

http://git-wip-us.apache.org/repos/asf/beam/blob/7e1a2675/runners/core-java/src/main/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunner.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunner.java
new file mode 100644
index 0000000..50d301b
--- /dev/null
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunner.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.core;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.ReadyCheckingSideInputReader;
+import org.apache.beam.sdk.util.TimeDomain;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.joda.time.Instant;
+
+/**
+ * A {@link DoFnRunner} that can refuse to process elements that are not ready, instead returning
+ * them via the {@link #processElementInReadyWindows(WindowedValue)}.
+ */
+public class SimplePushbackSideInputDoFnRunner<InputT, OutputT>
+    implements PushbackSideInputDoFnRunner<InputT, OutputT> {
+  private final DoFnRunner<InputT, OutputT> underlying;
+  private final Collection<PCollectionView<?>> views;
+  private final ReadyCheckingSideInputReader sideInputReader;
+
+  private Set<BoundedWindow> notReadyWindows;
+
+  public static <InputT, OutputT> SimplePushbackSideInputDoFnRunner<InputT, OutputT> create(
+      DoFnRunner<InputT, OutputT> underlying,
+      Collection<PCollectionView<?>> views,
+      ReadyCheckingSideInputReader sideInputReader) {
+    return new SimplePushbackSideInputDoFnRunner<>(underlying, views, sideInputReader);
+  }
+
+  private SimplePushbackSideInputDoFnRunner(
+      DoFnRunner<InputT, OutputT> underlying,
+      Collection<PCollectionView<?>> views,
+      ReadyCheckingSideInputReader sideInputReader) {
+    this.underlying = underlying;
+    this.views = views;
+    this.sideInputReader = sideInputReader;
+  }
+
+  @Override
+  public void startBundle() {
+    notReadyWindows = new HashSet<>();
+    underlying.startBundle();
+  }
+
+  @Override
+  public Iterable<WindowedValue<InputT>> processElementInReadyWindows(WindowedValue<InputT> elem) {
+    if (views.isEmpty()) {
+      // When there are no side inputs, we can preserve the compressed representation.
+      underlying.processElement(elem);
+      return Collections.emptyList();
+    }
+    ImmutableList.Builder<WindowedValue<InputT>> pushedBack = ImmutableList.builder();
+    for (WindowedValue<InputT> windowElem : elem.explodeWindows()) {
+      BoundedWindow mainInputWindow = Iterables.getOnlyElement(windowElem.getWindows());
+      if (isReady(mainInputWindow)) {
+        // When there are any side inputs, we have to process the element in each window
+        // individually, to disambiguate access to per-window side inputs.
+        underlying.processElement(windowElem);
+      } else {
+        notReadyWindows.add(mainInputWindow);
+        pushedBack.add(windowElem);
+      }
+    }
+    return pushedBack.build();
+  }
+
+  private boolean isReady(BoundedWindow mainInputWindow) {
+    if (notReadyWindows.contains(mainInputWindow)) {
+      return false;
+    }
+    for (PCollectionView<?> view : views) {
+      BoundedWindow sideInputWindow =
+          view.getWindowMappingFn().getSideInputWindow(mainInputWindow);
+      if (!sideInputReader.isReady(view, sideInputWindow)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @Override
+  public void onTimer(String timerId, BoundedWindow window, Instant timestamp,
+                      TimeDomain timeDomain) {
+    underlying.onTimer(timerId, window, timestamp, timeDomain);
+  }
+
+  @Override
+  public void finishBundle() {
+    notReadyWindows = null;
+    underlying.finishBundle();
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/beam/blob/7e1a2675/runners/core-java/src/test/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunnerTest.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunnerTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunnerTest.java
deleted file mode 100644
index cb057b8..0000000
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/PushbackSideInputDoFnRunnerTest.java
+++ /dev/null
@@ -1,282 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.core;
-
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.hamcrest.Matchers.emptyIterable;
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.is;
-import static org.junit.Assert.assertThat;
-import static org.mockito.Mockito.when;
-
-import com.google.common.collect.ImmutableList;
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.beam.runners.core.TimerInternals.TimerData;
-import org.apache.beam.sdk.testing.TestPipeline;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.Sum;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.util.IdentitySideInputWindowFn;
-import org.apache.beam.sdk.util.ReadyCheckingSideInputReader;
-import org.apache.beam.sdk.util.TimeDomain;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.hamcrest.Matchers;
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Mock;
-import org.mockito.Mockito;
-import org.mockito.MockitoAnnotations;
-
-/**
- * Tests for {@link PushbackSideInputDoFnRunner}.
- */
-@RunWith(JUnit4.class)
-public class PushbackSideInputDoFnRunnerTest {
-  @Mock private ReadyCheckingSideInputReader reader;
-  private TestDoFnRunner<Integer, Integer> underlying;
-  private PCollectionView<Integer> singletonView;
-
-  @Rule
-  public TestPipeline p = TestPipeline.create().enableAbandonedNodeEnforcement(false);
-
-  @Before
-  public void setup() {
-    MockitoAnnotations.initMocks(this);
-    PCollection<Integer> created = p.apply(Create.of(1, 2, 3));
-    singletonView =
-        created
-            .apply(Window.into(new IdentitySideInputWindowFn()))
-            .apply(Sum.integersGlobally().asSingletonView());
-
-    underlying = new TestDoFnRunner<>();
-  }
-
-  private PushbackSideInputDoFnRunner<Integer, Integer> createRunner(
-      ImmutableList<PCollectionView<?>> views) {
-    PushbackSideInputDoFnRunner<Integer, Integer> runner =
-        PushbackSideInputDoFnRunner.create(underlying, views, reader);
-    runner.startBundle();
-    return runner;
-  }
-
-  @Test
-  public void startFinishBundleDelegates() {
-    PushbackSideInputDoFnRunner runner =
-        createRunner(ImmutableList.<PCollectionView<?>>of(singletonView));
-
-    assertThat(underlying.started, is(true));
-    assertThat(underlying.finished, is(false));
-    runner.finishBundle();
-    assertThat(underlying.finished, is(true));
-  }
-
-  @Test
-  public void processElementSideInputNotReady() {
-    when(reader.isReady(Mockito.eq(singletonView), Mockito.any(BoundedWindow.class)))
-        .thenReturn(false);
-
-    PushbackSideInputDoFnRunner<Integer, Integer> runner =
-        createRunner(ImmutableList.<PCollectionView<?>>of(singletonView));
-
-    WindowedValue<Integer> oneWindow =
-        WindowedValue.of(
-            2,
-            new Instant(-2),
-            new IntervalWindow(new Instant(-500L), new Instant(0L)),
-            PaneInfo.ON_TIME_AND_ONLY_FIRING);
-    Iterable<WindowedValue<Integer>> oneWindowPushback =
-        runner.processElementInReadyWindows(oneWindow);
-    assertThat(oneWindowPushback, containsInAnyOrder(oneWindow));
-    assertThat(underlying.inputElems, Matchers.<WindowedValue<Integer>>emptyIterable());
-  }
-
-  @Test
-  public void processElementSideInputNotReadyMultipleWindows() {
-    when(reader.isReady(Mockito.eq(singletonView), Mockito.any(BoundedWindow.class)))
-        .thenReturn(false);
-
-    PushbackSideInputDoFnRunner<Integer, Integer> runner =
-        createRunner(ImmutableList.<PCollectionView<?>>of(singletonView));
-
-    WindowedValue<Integer> multiWindow =
-        WindowedValue.of(
-            2,
-            new Instant(-2),
-            ImmutableList.of(
-                new IntervalWindow(new Instant(-500L), new Instant(0L)),
-                new IntervalWindow(BoundedWindow.TIMESTAMP_MIN_VALUE, new Instant(250L)),
-                GlobalWindow.INSTANCE),
-            PaneInfo.ON_TIME_AND_ONLY_FIRING);
-    Iterable<WindowedValue<Integer>> multiWindowPushback =
-        runner.processElementInReadyWindows(multiWindow);
-    assertThat(multiWindowPushback, equalTo(multiWindow.explodeWindows()));
-    assertThat(underlying.inputElems, Matchers.<WindowedValue<Integer>>emptyIterable());
-  }
-
-  @Test
-  public void processElementSideInputNotReadySomeWindows() {
-    when(reader.isReady(Mockito.eq(singletonView), Mockito.eq(GlobalWindow.INSTANCE)))
-        .thenReturn(false);
-    when(
-            reader.isReady(
-                Mockito.eq(singletonView),
-                org.mockito.AdditionalMatchers.not(Mockito.eq(GlobalWindow.INSTANCE))))
-        .thenReturn(true);
-
-    PushbackSideInputDoFnRunner<Integer, Integer> runner =
-        createRunner(ImmutableList.<PCollectionView<?>>of(singletonView));
-
-    IntervalWindow littleWindow = new IntervalWindow(new Instant(-500L), new Instant(0L));
-    IntervalWindow bigWindow =
-        new IntervalWindow(BoundedWindow.TIMESTAMP_MIN_VALUE, new Instant(250L));
-    WindowedValue<Integer> multiWindow =
-        WindowedValue.of(
-            2,
-            new Instant(-2),
-            ImmutableList.of(littleWindow, bigWindow, GlobalWindow.INSTANCE),
-            PaneInfo.NO_FIRING);
-    Iterable<WindowedValue<Integer>> multiWindowPushback =
-        runner.processElementInReadyWindows(multiWindow);
-    assertThat(
-        multiWindowPushback,
-        containsInAnyOrder(WindowedValue.timestampedValueInGlobalWindow(2, new Instant(-2L))));
-    assertThat(
-        underlying.inputElems,
-        containsInAnyOrder(
-            WindowedValue.of(
-                2, new Instant(-2), ImmutableList.of(littleWindow), PaneInfo.NO_FIRING),
-            WindowedValue.of(2, new Instant(-2), ImmutableList.of(bigWindow), PaneInfo.NO_FIRING)));
-  }
-
-  @Test
-  public void processElementSideInputReadyAllWindows() {
-    when(reader.isReady(Mockito.eq(singletonView), Mockito.any(BoundedWindow.class)))
-        .thenReturn(true);
-
-    ImmutableList<PCollectionView<?>> views = ImmutableList.<PCollectionView<?>>of(singletonView);
-    PushbackSideInputDoFnRunner<Integer, Integer> runner = createRunner(views);
-
-    WindowedValue<Integer> multiWindow =
-        WindowedValue.of(
-            2,
-            new Instant(-2),
-            ImmutableList.of(
-                new IntervalWindow(new Instant(-500L), new Instant(0L)),
-                new IntervalWindow(BoundedWindow.TIMESTAMP_MIN_VALUE, new Instant(250L)),
-                GlobalWindow.INSTANCE),
-            PaneInfo.ON_TIME_AND_ONLY_FIRING);
-    Iterable<WindowedValue<Integer>> multiWindowPushback =
-        runner.processElementInReadyWindows(multiWindow);
-    assertThat(multiWindowPushback, emptyIterable());
-    assertThat(
-        underlying.inputElems,
-        containsInAnyOrder(ImmutableList.copyOf(multiWindow.explodeWindows()).toArray()));
-  }
-
-  @Test
-  public void processElementNoSideInputs() {
-    PushbackSideInputDoFnRunner<Integer, Integer> runner =
-        createRunner(ImmutableList.<PCollectionView<?>>of());
-
-    WindowedValue<Integer> multiWindow =
-        WindowedValue.of(
-            2,
-            new Instant(-2),
-            ImmutableList.of(
-                new IntervalWindow(new Instant(-500L), new Instant(0L)),
-                new IntervalWindow(BoundedWindow.TIMESTAMP_MIN_VALUE, new Instant(250L)),
-                GlobalWindow.INSTANCE),
-            PaneInfo.ON_TIME_AND_ONLY_FIRING);
-    Iterable<WindowedValue<Integer>> multiWindowPushback =
-        runner.processElementInReadyWindows(multiWindow);
-    assertThat(multiWindowPushback, emptyIterable());
-    // Should preserve the compressed representation when there's no side inputs.
-    assertThat(underlying.inputElems, containsInAnyOrder(multiWindow));
-  }
-
-  /** Tests that a call to onTimer gets delegated. */
-  @Test
-  public void testOnTimerCalled() {
-    PushbackSideInputDoFnRunner<Integer, Integer> runner =
-        createRunner(ImmutableList.<PCollectionView<?>>of());
-
-    String timerId = "fooTimer";
-    IntervalWindow window = new IntervalWindow(new Instant(4), new Instant(16));
-    Instant timestamp = new Instant(72);
-
-    // Mocking is not easily compatible with annotation analysis, so we manually record
-    // the method call.
-    runner.onTimer(timerId, window, new Instant(timestamp), TimeDomain.EVENT_TIME);
-
-    assertThat(
-        underlying.firedTimers,
-        contains(
-            TimerData.of(
-                timerId,
-                StateNamespaces.window(IntervalWindow.getCoder(), window),
-                timestamp,
-                TimeDomain.EVENT_TIME)));
-  }
-
-  private static class TestDoFnRunner<InputT, OutputT> implements DoFnRunner<InputT, OutputT> {
-    List<WindowedValue<InputT>> inputElems;
-    List<TimerData> firedTimers;
-    private boolean started = false;
-    private boolean finished = false;
-
-    @Override
-    public void startBundle() {
-      started = true;
-      inputElems = new ArrayList<>();
-      firedTimers = new ArrayList<>();
-    }
-
-    @Override
-    public void processElement(WindowedValue<InputT> elem) {
-      inputElems.add(elem);
-    }
-
-    @Override
-    public void onTimer(String timerId, BoundedWindow window, Instant timestamp,
-        TimeDomain timeDomain) {
-      firedTimers.add(
-          TimerData.of(
-              timerId,
-              StateNamespaces.window(IntervalWindow.getCoder(), (IntervalWindow) window),
-              timestamp,
-              timeDomain));
-    }
-
-    @Override
-    public void finishBundle() {
-      finished = true;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/7e1a2675/runners/core-java/src/test/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunnerTest.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunnerTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunnerTest.java
new file mode 100644
index 0000000..ba3f926
--- /dev/null
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/SimplePushbackSideInputDoFnRunnerTest.java
@@ -0,0 +1,282 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.core;
+
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.emptyIterable;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.is;
+import static org.junit.Assert.assertThat;
+import static org.mockito.Mockito.when;
+
+import com.google.common.collect.ImmutableList;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.runners.core.TimerInternals.TimerData;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.Sum;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.util.IdentitySideInputWindowFn;
+import org.apache.beam.sdk.util.ReadyCheckingSideInputReader;
+import org.apache.beam.sdk.util.TimeDomain;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Mock;
+import org.mockito.Mockito;
+import org.mockito.MockitoAnnotations;
+
+/**
+ * Tests for {@link SimplePushbackSideInputDoFnRunner}.
+ */
+@RunWith(JUnit4.class)
+public class SimplePushbackSideInputDoFnRunnerTest {
+  @Mock private ReadyCheckingSideInputReader reader;
+  private TestDoFnRunner<Integer, Integer> underlying;
+  private PCollectionView<Integer> singletonView;
+
+  @Rule
+  public TestPipeline p = TestPipeline.create().enableAbandonedNodeEnforcement(false);
+
+  @Before
+  public void setup() {
+    MockitoAnnotations.initMocks(this);
+    PCollection<Integer> created = p.apply(Create.of(1, 2, 3));
+    singletonView =
+        created
+            .apply(Window.into(new IdentitySideInputWindowFn()))
+            .apply(Sum.integersGlobally().asSingletonView());
+
+    underlying = new TestDoFnRunner<>();
+  }
+
+  private SimplePushbackSideInputDoFnRunner<Integer, Integer> createRunner(
+      ImmutableList<PCollectionView<?>> views) {
+    SimplePushbackSideInputDoFnRunner<Integer, Integer> runner =
+        SimplePushbackSideInputDoFnRunner.create(underlying, views, reader);
+    runner.startBundle();
+    return runner;
+  }
+
+  @Test
+  public void startFinishBundleDelegates() {
+    PushbackSideInputDoFnRunner runner =
+        createRunner(ImmutableList.<PCollectionView<?>>of(singletonView));
+
+    assertThat(underlying.started, is(true));
+    assertThat(underlying.finished, is(false));
+    runner.finishBundle();
+    assertThat(underlying.finished, is(true));
+  }
+
+  @Test
+  public void processElementSideInputNotReady() {
+    when(reader.isReady(Mockito.eq(singletonView), Mockito.any(BoundedWindow.class)))
+        .thenReturn(false);
+
+    SimplePushbackSideInputDoFnRunner<Integer, Integer> runner =
+        createRunner(ImmutableList.<PCollectionView<?>>of(singletonView));
+
+    WindowedValue<Integer> oneWindow =
+        WindowedValue.of(
+            2,
+            new Instant(-2),
+            new IntervalWindow(new Instant(-500L), new Instant(0L)),
+            PaneInfo.ON_TIME_AND_ONLY_FIRING);
+    Iterable<WindowedValue<Integer>> oneWindowPushback =
+        runner.processElementInReadyWindows(oneWindow);
+    assertThat(oneWindowPushback, containsInAnyOrder(oneWindow));
+    assertThat(underlying.inputElems, Matchers.<WindowedValue<Integer>>emptyIterable());
+  }
+
+  @Test
+  public void processElementSideInputNotReadyMultipleWindows() {
+    when(reader.isReady(Mockito.eq(singletonView), Mockito.any(BoundedWindow.class)))
+        .thenReturn(false);
+
+    SimplePushbackSideInputDoFnRunner<Integer, Integer> runner =
+        createRunner(ImmutableList.<PCollectionView<?>>of(singletonView));
+
+    WindowedValue<Integer> multiWindow =
+        WindowedValue.of(
+            2,
+            new Instant(-2),
+            ImmutableList.of(
+                new IntervalWindow(new Instant(-500L), new Instant(0L)),
+                new IntervalWindow(BoundedWindow.TIMESTAMP_MIN_VALUE, new Instant(250L)),
+                GlobalWindow.INSTANCE),
+            PaneInfo.ON_TIME_AND_ONLY_FIRING);
+    Iterable<WindowedValue<Integer>> multiWindowPushback =
+        runner.processElementInReadyWindows(multiWindow);
+    assertThat(multiWindowPushback, equalTo(multiWindow.explodeWindows()));
+    assertThat(underlying.inputElems, Matchers.<WindowedValue<Integer>>emptyIterable());
+  }
+
+  @Test
+  public void processElementSideInputNotReadySomeWindows() {
+    when(reader.isReady(Mockito.eq(singletonView), Mockito.eq(GlobalWindow.INSTANCE)))
+        .thenReturn(false);
+    when(
+            reader.isReady(
+                Mockito.eq(singletonView),
+                org.mockito.AdditionalMatchers.not(Mockito.eq(GlobalWindow.INSTANCE))))
+        .thenReturn(true);
+
+    SimplePushbackSideInputDoFnRunner<Integer, Integer> runner =
+        createRunner(ImmutableList.<PCollectionView<?>>of(singletonView));
+
+    IntervalWindow littleWindow = new IntervalWindow(new Instant(-500L), new Instant(0L));
+    IntervalWindow bigWindow =
+        new IntervalWindow(BoundedWindow.TIMESTAMP_MIN_VALUE, new Instant(250L));
+    WindowedValue<Integer> multiWindow =
+        WindowedValue.of(
+            2,
+            new Instant(-2),
+            ImmutableList.of(littleWindow, bigWindow, GlobalWindow.INSTANCE),
+            PaneInfo.NO_FIRING);
+    Iterable<WindowedValue<Integer>> multiWindowPushback =
+        runner.processElementInReadyWindows(multiWindow);
+    assertThat(
+        multiWindowPushback,
+        containsInAnyOrder(WindowedValue.timestampedValueInGlobalWindow(2, new Instant(-2L))));
+    assertThat(
+        underlying.inputElems,
+        containsInAnyOrder(
+            WindowedValue.of(
+                2, new Instant(-2), ImmutableList.of(littleWindow), PaneInfo.NO_FIRING),
+            WindowedValue.of(2, new Instant(-2), ImmutableList.of(bigWindow), PaneInfo.NO_FIRING)));
+  }
+
+  @Test
+  public void processElementSideInputReadyAllWindows() {
+    when(reader.isReady(Mockito.eq(singletonView), Mockito.any(BoundedWindow.class)))
+        .thenReturn(true);
+
+    ImmutableList<PCollectionView<?>> views = ImmutableList.<PCollectionView<?>>of(singletonView);
+    SimplePushbackSideInputDoFnRunner<Integer, Integer> runner = createRunner(views);
+
+    WindowedValue<Integer> multiWindow =
+        WindowedValue.of(
+            2,
+            new Instant(-2),
+            ImmutableList.of(
+                new IntervalWindow(new Instant(-500L), new Instant(0L)),
+                new IntervalWindow(BoundedWindow.TIMESTAMP_MIN_VALUE, new Instant(250L)),
+                GlobalWindow.INSTANCE),
+            PaneInfo.ON_TIME_AND_ONLY_FIRING);
+    Iterable<WindowedValue<Integer>> multiWindowPushback =
+        runner.processElementInReadyWindows(multiWindow);
+    assertThat(multiWindowPushback, emptyIterable());
+    assertThat(
+        underlying.inputElems,
+        containsInAnyOrder(ImmutableList.copyOf(multiWindow.explodeWindows()).toArray()));
+  }
+
+  @Test
+  public void processElementNoSideInputs() {
+    SimplePushbackSideInputDoFnRunner<Integer, Integer> runner =
+        createRunner(ImmutableList.<PCollectionView<?>>of());
+
+    WindowedValue<Integer> multiWindow =
+        WindowedValue.of(
+            2,
+            new Instant(-2),
+            ImmutableList.of(
+                new IntervalWindow(new Instant(-500L), new Instant(0L)),
+                new IntervalWindow(BoundedWindow.TIMESTAMP_MIN_VALUE, new Instant(250L)),
+                GlobalWindow.INSTANCE),
+            PaneInfo.ON_TIME_AND_ONLY_FIRING);
+    Iterable<WindowedValue<Integer>> multiWindowPushback =
+        runner.processElementInReadyWindows(multiWindow);
+    assertThat(multiWindowPushback, emptyIterable());
+    // Should preserve the compressed representation when there's no side inputs.
+    assertThat(underlying.inputElems, containsInAnyOrder(multiWindow));
+  }
+
+  /** Tests that a call to onTimer gets delegated. */
+  @Test
+  public void testOnTimerCalled() {
+    PushbackSideInputDoFnRunner<Integer, Integer> runner =
+        createRunner(ImmutableList.<PCollectionView<?>>of());
+
+    String timerId = "fooTimer";
+    IntervalWindow window = new IntervalWindow(new Instant(4), new Instant(16));
+    Instant timestamp = new Instant(72);
+
+    // Mocking is not easily compatible with annotation analysis, so we manually record
+    // the method call.
+    runner.onTimer(timerId, window, new Instant(timestamp), TimeDomain.EVENT_TIME);
+
+    assertThat(
+        underlying.firedTimers,
+        contains(
+            TimerData.of(
+                timerId,
+                StateNamespaces.window(IntervalWindow.getCoder(), window),
+                timestamp,
+                TimeDomain.EVENT_TIME)));
+  }
+
+  private static class TestDoFnRunner<InputT, OutputT> implements DoFnRunner<InputT, OutputT> {
+    List<WindowedValue<InputT>> inputElems;
+    List<TimerData> firedTimers;
+    private boolean started = false;
+    private boolean finished = false;
+
+    @Override
+    public void startBundle() {
+      started = true;
+      inputElems = new ArrayList<>();
+      firedTimers = new ArrayList<>();
+    }
+
+    @Override
+    public void processElement(WindowedValue<InputT> elem) {
+      inputElems.add(elem);
+    }
+
+    @Override
+    public void onTimer(String timerId, BoundedWindow window, Instant timestamp,
+        TimeDomain timeDomain) {
+      firedTimers.add(
+          TimerData.of(
+              timerId,
+              StateNamespaces.window(IntervalWindow.getCoder(), (IntervalWindow) window),
+              timestamp,
+              timeDomain));
+    }
+
+    @Override
+    public void finishBundle() {
+      finished = true;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/7e1a2675/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
index 131716f..bab7b2c 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
@@ -26,6 +26,7 @@ import org.apache.beam.runners.core.DoFnRunner;
 import org.apache.beam.runners.core.DoFnRunners;
 import org.apache.beam.runners.core.DoFnRunners.OutputManager;
 import org.apache.beam.runners.core.PushbackSideInputDoFnRunner;
+import org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner;
 import org.apache.beam.runners.core.TimerInternals.TimerData;
 import org.apache.beam.runners.direct.DirectExecutionContext.DirectStepContext;
 import org.apache.beam.runners.direct.DirectRunner.UncommittedBundle;
@@ -85,7 +86,7 @@ class ParDoEvaluator<InputT> implements TransformEvaluator<InputT> {
             aggregatorChanges,
             windowingStrategy);
     PushbackSideInputDoFnRunner<InputT, OutputT> runner =
-        PushbackSideInputDoFnRunner.create(underlying, sideInputs, sideInputReader);
+        SimplePushbackSideInputDoFnRunner.create(underlying, sideInputs, sideInputReader);
 
     try {
       runner.startBundle();

http://git-wip-us.apache.org/repos/asf/beam/blob/7e1a2675/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
index 5496f71..8a09286 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
@@ -37,6 +37,7 @@ import org.apache.beam.runners.core.ExecutionContext;
 import org.apache.beam.runners.core.GroupAlsoByWindowViaWindowSetNewDoFn;
 import org.apache.beam.runners.core.PushbackSideInputDoFnRunner;
 import org.apache.beam.runners.core.SideInputHandler;
+import org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner;
 import org.apache.beam.runners.core.StateInternals;
 import org.apache.beam.runners.core.StateNamespace;
 import org.apache.beam.runners.core.StateNamespaces;
@@ -119,6 +120,7 @@ public class DoFnOperator<InputT, FnOutputT, OutputT>
 
   protected final OutputManagerFactory<OutputT> outputManagerFactory;
 
+  protected transient DoFnRunner<InputT, FnOutputT> doFnRunner;
   protected transient PushbackSideInputDoFnRunner<InputT, FnOutputT> pushbackDoFnRunner;
 
   protected transient SideInputHandler sideInputHandler;
@@ -269,7 +271,7 @@ public class DoFnOperator<InputT, FnOutputT, OutputT>
 
     ExecutionContext.StepContext stepContext = createStepContext();
 
-    DoFnRunner<InputT, FnOutputT> doFnRunner = DoFnRunners.simpleRunner(
+    doFnRunner = DoFnRunners.simpleRunner(
         serializedOptions.getPipelineOptions(),
         doFn,
         sideInputReader,
@@ -320,7 +322,7 @@ public class DoFnOperator<InputT, FnOutputT, OutputT>
     }
 
     pushbackDoFnRunner =
-        PushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
+        SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
   }
 
   @Override
@@ -362,9 +364,9 @@ public class DoFnOperator<InputT, FnOutputT, OutputT>
   @Override
   public final void processElement(
       StreamRecord<WindowedValue<InputT>> streamRecord) throws Exception {
-    pushbackDoFnRunner.startBundle();
-    pushbackDoFnRunner.processElement(streamRecord.getValue());
-    pushbackDoFnRunner.finishBundle();
+    doFnRunner.startBundle();
+    doFnRunner.processElement(streamRecord.getValue());
+    doFnRunner.finishBundle();
   }
 
   private void setPushedBackWatermark(long watermark) {

http://git-wip-us.apache.org/repos/asf/beam/blob/7e1a2675/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java
index 1a636c9..40f70e4 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java
@@ -142,7 +142,7 @@ public class SplittableDoFnOperator<
 
   @Override
   public void fireTimer(InternalTimer<?, TimerInternals.TimerData> timer) {
-    pushbackDoFnRunner.processElement(WindowedValue.valueInGlobalWindow(
+    doFnRunner.processElement(WindowedValue.valueInGlobalWindow(
         KeyedWorkItems.<String, ElementAndRestriction<InputT, RestrictionT>>timersWorkItem(
             (String) stateInternals.getKey(),
             Collections.singletonList(timer.getNamespace()))));

http://git-wip-us.apache.org/repos/asf/beam/blob/7e1a2675/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java
index 7b899f4..9b2136c 100644
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java
+++ b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java
@@ -108,7 +108,7 @@ public class WindowDoFnOperator<K, InputT, OutputT>
 
   @Override
   public void fireTimer(InternalTimer<?, TimerData> timer) {
-    pushbackDoFnRunner.processElement(WindowedValue.valueInGlobalWindow(
+    doFnRunner.processElement(WindowedValue.valueInGlobalWindow(
         KeyedWorkItems.<K, InputT>timersWorkItem(
             (K) stateInternals.getKey(),
             Collections.singletonList(timer.getNamespace()))));

[32/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TestCountingSource.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TestCountingSource.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TestCountingSource.java
deleted file mode 100644
index 3a08088..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TestCountingSource.java
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.streaming;
-
-import static org.apache.beam.sdk.util.CoderUtils.encodeToByteArray;
-
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.ThreadLocalRandom;
-import javax.annotation.Nullable;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.DelegateCoder;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.coders.VarIntCoder;
-import org.apache.beam.sdk.io.UnboundedSource;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.values.KV;
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * An unbounded source for testing the unbounded sources framework code.
- *
- * <p>Each split of this sources produces records of the form KV(split_id, i),
- * where i counts up from 0.  Each record has a timestamp of i, and the watermark
- * accurately tracks these timestamps.  The reader will occasionally return false
- * from {@code advance}, in order to simulate a source where not all the data is
- * available immediately.
- */
-public class TestCountingSource
-    extends UnboundedSource<KV<Integer, Integer>, TestCountingSource.CounterMark> {
-  private static final Logger LOG = LoggerFactory.getLogger(TestCountingSource.class);
-
-  private static List<Integer> finalizeTracker;
-  private final int numMessagesPerShard;
-  private final int shardNumber;
-  private final boolean dedup;
-  private final boolean throwOnFirstSnapshot;
-  private final boolean allowSplitting;
-
-  /**
-   * We only allow an exception to be thrown from getCheckpointMark
-   * at most once. This must be static since the entire TestCountingSource
-   * instance may re-serialized when the pipeline recovers and retries.
-   */
-  private static boolean thrown = false;
-
-  public static void setFinalizeTracker(List<Integer> finalizeTracker) {
-    TestCountingSource.finalizeTracker = finalizeTracker;
-  }
-
-  public TestCountingSource(int numMessagesPerShard) {
-    this(numMessagesPerShard, 0, false, false, true);
-  }
-
-  public TestCountingSource withDedup() {
-    return new TestCountingSource(
-        numMessagesPerShard, shardNumber, true, throwOnFirstSnapshot, true);
-  }
-
-  private TestCountingSource withShardNumber(int shardNumber) {
-    return new TestCountingSource(
-        numMessagesPerShard, shardNumber, dedup, throwOnFirstSnapshot, true);
-  }
-
-  public TestCountingSource withThrowOnFirstSnapshot(boolean throwOnFirstSnapshot) {
-    return new TestCountingSource(
-        numMessagesPerShard, shardNumber, dedup, throwOnFirstSnapshot, true);
-  }
-
-  public TestCountingSource withoutSplitting() {
-    return new TestCountingSource(
-        numMessagesPerShard, shardNumber, dedup, throwOnFirstSnapshot, false);
-  }
-
-  private TestCountingSource(int numMessagesPerShard, int shardNumber, boolean dedup,
-                             boolean throwOnFirstSnapshot, boolean allowSplitting) {
-    this.numMessagesPerShard = numMessagesPerShard;
-    this.shardNumber = shardNumber;
-    this.dedup = dedup;
-    this.throwOnFirstSnapshot = throwOnFirstSnapshot;
-    this.allowSplitting = allowSplitting;
-  }
-
-  public int getShardNumber() {
-    return shardNumber;
-  }
-
-  @Override
-  public List<TestCountingSource> split(
-      int desiredNumSplits, PipelineOptions options) {
-    List<TestCountingSource> splits = new ArrayList<>();
-    int numSplits = allowSplitting ? desiredNumSplits : 1;
-    for (int i = 0; i < numSplits; i++) {
-      splits.add(withShardNumber(i));
-    }
-    return splits;
-  }
-
-  class CounterMark implements UnboundedSource.CheckpointMark {
-    int current;
-
-    public CounterMark(int current) {
-      this.current = current;
-    }
-
-    @Override
-    public void finalizeCheckpoint() {
-      if (finalizeTracker != null) {
-        finalizeTracker.add(current);
-      }
-    }
-  }
-
-  @Override
-  public Coder<CounterMark> getCheckpointMarkCoder() {
-    return DelegateCoder.of(
-        VarIntCoder.of(),
-        new DelegateCoder.CodingFunction<CounterMark, Integer>() {
-          @Override
-          public Integer apply(CounterMark input) {
-            return input.current;
-          }
-        },
-        new DelegateCoder.CodingFunction<Integer, CounterMark>() {
-          @Override
-          public CounterMark apply(Integer input) {
-            return new CounterMark(input);
-          }
-        });
-  }
-
-  @Override
-  public boolean requiresDeduping() {
-    return dedup;
-  }
-
-  /**
-   * Public only so that the checkpoint can be conveyed from {@link #getCheckpointMark()} to
-   * {@link TestCountingSource#createReader(PipelineOptions, CounterMark)} without cast.
-   */
-  public class CountingSourceReader extends UnboundedReader<KV<Integer, Integer>> {
-    private int current;
-
-    public CountingSourceReader(int startingPoint) {
-      this.current = startingPoint;
-    }
-
-    @Override
-    public boolean start() {
-      return advance();
-    }
-
-    @Override
-    public boolean advance() {
-      if (current >= numMessagesPerShard - 1) {
-        return false;
-      }
-      // If testing dedup, occasionally insert a duplicate value;
-      if (current >= 0 && dedup && ThreadLocalRandom.current().nextInt(5) == 0) {
-        return true;
-      }
-      current++;
-      return true;
-    }
-
-    @Override
-    public KV<Integer, Integer> getCurrent() {
-      return KV.of(shardNumber, current);
-    }
-
-    @Override
-    public Instant getCurrentTimestamp() {
-      return new Instant(current);
-    }
-
-    @Override
-    public byte[] getCurrentRecordId() {
-      try {
-        return encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), getCurrent());
-      } catch (IOException e) {
-        throw new RuntimeException(e);
-      }
-    }
-
-    @Override
-    public void close() {}
-
-    @Override
-    public TestCountingSource getCurrentSource() {
-      return TestCountingSource.this;
-    }
-
-    @Override
-    public Instant getWatermark() {
-      // The watermark is a promise about future elements, and the timestamps of elements are
-      // strictly increasing for this source.
-      return new Instant(current + 1);
-    }
-
-    @Override
-    public CounterMark getCheckpointMark() {
-      if (throwOnFirstSnapshot && !thrown) {
-        thrown = true;
-        LOG.error("Throwing exception while checkpointing counter");
-        throw new RuntimeException("failed during checkpoint");
-      }
-      // The checkpoint can assume all records read, including the current, have
-      // been commited.
-      return new CounterMark(current);
-    }
-
-    @Override
-    public long getSplitBacklogBytes() {
-      return 7L;
-    }
-  }
-
-  @Override
-  public CountingSourceReader createReader(
-      PipelineOptions options, @Nullable CounterMark checkpointMark) {
-    if (checkpointMark == null) {
-      LOG.debug("creating reader");
-    } else {
-      LOG.debug("restoring reader from checkpoint with current = {}", checkpointMark.current);
-    }
-    return new CountingSourceReader(checkpointMark != null ? checkpointMark.current : -1);
-  }
-
-  @Override
-  public void validate() {}
-
-  @Override
-  public Coder<KV<Integer, Integer>> getDefaultOutputCoder() {
-    return KvCoder.of(VarIntCoder.of(), VarIntCoder.of());
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java
deleted file mode 100644
index 9e6bba8..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.streaming;
-
-import com.google.api.services.bigquery.model.TableRow;
-import com.google.common.base.Joiner;
-import java.io.Serializable;
-import java.util.Arrays;
-import org.apache.beam.runners.flink.FlinkTestPipeline;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.windowing.Sessions;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.flink.streaming.util.StreamingProgramTestBase;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-
-/**
- * Session window test.
- */
-public class TopWikipediaSessionsITCase extends StreamingProgramTestBase implements Serializable {
-  protected String resultPath;
-
-  public TopWikipediaSessionsITCase(){
-  }
-
-  static final String[] EXPECTED_RESULT = new String[] {
-      "user: user1 value:3",
-      "user: user1 value:1",
-      "user: user2 value:4",
-      "user: user2 value:6",
-      "user: user3 value:7",
-      "user: user3 value:2"
-  };
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-
-    Pipeline p = FlinkTestPipeline.createForStreaming();
-
-    Long now = (System.currentTimeMillis() + 10000) / 1000;
-
-    PCollection<KV<String, Long>> output =
-      p.apply(Create.of(Arrays.asList(new TableRow().set("timestamp", now).set
-          ("contributor_username", "user1"), new TableRow().set("timestamp", now + 10).set
-          ("contributor_username", "user3"), new TableRow().set("timestamp", now).set
-          ("contributor_username", "user2"), new TableRow().set("timestamp", now).set
-          ("contributor_username", "user1"), new TableRow().set("timestamp", now + 2).set
-          ("contributor_username", "user1"), new TableRow().set("timestamp", now).set
-          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 1).set
-          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 5).set
-          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 7).set
-          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 8).set
-          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 200).set
-          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 230).set
-          ("contributor_username", "user1"), new TableRow().set("timestamp", now + 230).set
-          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 240).set
-          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 245).set
-          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 235).set
-          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 236).set
-          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 237).set
-          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 238).set
-          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 239).set
-          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 240).set
-          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 241).set
-          ("contributor_username", "user2"), new TableRow().set("timestamp", now)
-          .set("contributor_username", "user3"))))
-
-
-
-      .apply(ParDo.of(new DoFn<TableRow, String>() {
-        @ProcessElement
-        public void processElement(ProcessContext c) throws Exception {
-          TableRow row = c.element();
-          long timestamp = (Integer) row.get("timestamp");
-          String userName = (String) row.get("contributor_username");
-          if (userName != null) {
-            // Sets the timestamp field to be used in windowing.
-            c.outputWithTimestamp(userName, new Instant(timestamp * 1000L));
-          }
-        }
-      }))
-
-      .apply(Window.<String>into(Sessions.withGapDuration(Duration.standardMinutes(1))))
-
-      .apply(Count.<String>perElement());
-
-    PCollection<String> format = output.apply(ParDo.of(new DoFn<KV<String, Long>, String>() {
-      @ProcessElement
-      public void processElement(ProcessContext c) throws Exception {
-        KV<String, Long> el = c.element();
-        String out = "user: " + el.getKey() + " value:" + el.getValue();
-        c.output(out);
-      }
-    }));
-
-    format.apply(TextIO.Write.to(resultPath));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/UnboundedSourceWrapperTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/UnboundedSourceWrapperTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/UnboundedSourceWrapperTest.java
deleted file mode 100644
index 90f95d6..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/UnboundedSourceWrapperTest.java
+++ /dev/null
@@ -1,464 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.streaming;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-import static org.mockito.Mockito.mock;
-import static org.mockito.Mockito.when;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.io.UnboundedSource;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.KV;
-import org.apache.flink.api.common.ExecutionConfig;
-import org.apache.flink.api.common.accumulators.Accumulator;
-import org.apache.flink.api.common.state.ListState;
-import org.apache.flink.api.common.state.ListStateDescriptor;
-import org.apache.flink.api.common.state.OperatorStateStore;
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.runtime.execution.Environment;
-import org.apache.flink.runtime.operators.testutils.DummyEnvironment;
-import org.apache.flink.runtime.state.StateInitializationContext;
-import org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl;
-import org.apache.flink.streaming.api.TimeCharacteristic;
-import org.apache.flink.streaming.api.graph.StreamConfig;
-import org.apache.flink.streaming.api.operators.Output;
-import org.apache.flink.streaming.api.operators.StreamSource;
-import org.apache.flink.streaming.api.watermark.Watermark;
-import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker;
-import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
-import org.apache.flink.streaming.runtime.tasks.StreamTask;
-import org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService;
-import org.apache.flink.util.InstantiationUtil;
-import org.junit.Test;
-import org.junit.experimental.runners.Enclosed;
-import org.junit.runner.RunWith;
-import org.junit.runners.Parameterized;
-import org.mockito.Matchers;
-
-/**
- * Tests for {@link UnboundedSourceWrapper}.
- */
-@RunWith(Enclosed.class)
-public class UnboundedSourceWrapperTest {
-
-  /**
-   * Parameterized tests.
-   */
-  @RunWith(Parameterized.class)
-  public static class UnboundedSourceWrapperTestWithParams {
-    private final int numTasks;
-    private final int numSplits;
-
-    public UnboundedSourceWrapperTestWithParams(int numTasks, int numSplits) {
-      this.numTasks = numTasks;
-      this.numSplits = numSplits;
-    }
-
-    @Parameterized.Parameters
-    public static Collection<Object[]> data() {
-      /*
-       * Parameters for initializing the tests:
-       * {numTasks, numSplits}
-       * The test currently assumes powers of two for some assertions.
-       */
-      return Arrays.asList(new Object[][]{
-          {1, 1}, {1, 2}, {1, 4},
-          {2, 1}, {2, 2}, {2, 4},
-          {4, 1}, {4, 2}, {4, 4}
-      });
-    }
-
-    /**
-     * Creates a {@link UnboundedSourceWrapper} that has one or multiple readers per source.
-     * If numSplits > numTasks the source has one source will manage multiple readers.
-     */
-    @Test
-    public void testReaders() throws Exception {
-      final int numElements = 20;
-      final Object checkpointLock = new Object();
-      PipelineOptions options = PipelineOptionsFactory.create();
-
-      // this source will emit exactly NUM_ELEMENTS across all parallel readers,
-      // afterwards it will stall. We check whether we also receive NUM_ELEMENTS
-      // elements later.
-      TestCountingSource source = new TestCountingSource(numElements);
-      UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> flinkWrapper =
-          new UnboundedSourceWrapper<>(options, source, numSplits);
-
-      assertEquals(numSplits, flinkWrapper.getSplitSources().size());
-
-      StreamSource<WindowedValue<
-          KV<Integer, Integer>>,
-          UnboundedSourceWrapper<
-              KV<Integer, Integer>,
-              TestCountingSource.CounterMark>> sourceOperator = new StreamSource<>(flinkWrapper);
-
-      setupSourceOperator(sourceOperator, numTasks);
-
-      try {
-        sourceOperator.open();
-        sourceOperator.run(checkpointLock,
-            new Output<StreamRecord<WindowedValue<KV<Integer, Integer>>>>() {
-              private int count = 0;
-
-              @Override
-              public void emitWatermark(Watermark watermark) {
-              }
-
-              @Override
-              public void emitLatencyMarker(LatencyMarker latencyMarker) {
-              }
-
-              @Override
-              public void collect(
-                  StreamRecord<WindowedValue<KV<Integer, Integer>>> windowedValueStreamRecord) {
-
-                count++;
-                if (count >= numElements) {
-                  throw new SuccessException();
-                }
-              }
-
-              @Override
-              public void close() {
-
-              }
-            });
-      } catch (SuccessException e) {
-
-        assertEquals(Math.max(1, numSplits / numTasks), flinkWrapper.getLocalSplitSources().size());
-
-        // success
-        return;
-      }
-      fail("Read terminated without producing expected number of outputs");
-    }
-
-    /**
-     * Verify that snapshot/restore work as expected. We bring up a source and cancel
-     * after seeing a certain number of elements. Then we snapshot that source,
-     * bring up a completely new source that we restore from the snapshot and verify
-     * that we see all expected elements in the end.
-     */
-    @Test
-    public void testRestore() throws Exception {
-      final int numElements = 20;
-      final Object checkpointLock = new Object();
-      PipelineOptions options = PipelineOptionsFactory.create();
-
-      // this source will emit exactly NUM_ELEMENTS across all parallel readers,
-      // afterwards it will stall. We check whether we also receive NUM_ELEMENTS
-      // elements later.
-      TestCountingSource source = new TestCountingSource(numElements);
-      UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> flinkWrapper =
-          new UnboundedSourceWrapper<>(options, source, numSplits);
-
-      assertEquals(numSplits, flinkWrapper.getSplitSources().size());
-
-      StreamSource<
-          WindowedValue<KV<Integer, Integer>>,
-          UnboundedSourceWrapper<
-              KV<Integer, Integer>,
-              TestCountingSource.CounterMark>> sourceOperator = new StreamSource<>(flinkWrapper);
-
-
-      OperatorStateStore backend = mock(OperatorStateStore.class);
-
-      TestingListState<KV<UnboundedSource, TestCountingSource.CounterMark>>
-          listState = new TestingListState<>();
-
-      when(backend.getOperatorState(Matchers.any(ListStateDescriptor.class)))
-          .thenReturn(listState);
-
-      StateInitializationContext initializationContext = mock(StateInitializationContext.class);
-
-      when(initializationContext.getOperatorStateStore()).thenReturn(backend);
-      when(initializationContext.isRestored()).thenReturn(false, true);
-
-      flinkWrapper.initializeState(initializationContext);
-
-      setupSourceOperator(sourceOperator, numTasks);
-
-      final Set<KV<Integer, Integer>> emittedElements = new HashSet<>();
-
-      boolean readFirstBatchOfElements = false;
-
-      try {
-        sourceOperator.open();
-        sourceOperator.run(checkpointLock,
-            new Output<StreamRecord<WindowedValue<KV<Integer, Integer>>>>() {
-              private int count = 0;
-
-              @Override
-              public void emitWatermark(Watermark watermark) {
-              }
-
-              @Override
-              public void emitLatencyMarker(LatencyMarker latencyMarker) {
-              }
-
-              @Override
-              public void collect(
-                  StreamRecord<WindowedValue<KV<Integer, Integer>>> windowedValueStreamRecord) {
-
-                emittedElements.add(windowedValueStreamRecord.getValue().getValue());
-                count++;
-                if (count >= numElements / 2) {
-                  throw new SuccessException();
-                }
-              }
-
-              @Override
-              public void close() {
-
-              }
-            });
-      } catch (SuccessException e) {
-        // success
-        readFirstBatchOfElements = true;
-      }
-
-      assertTrue("Did not successfully read first batch of elements.", readFirstBatchOfElements);
-
-      // draw a snapshot
-      flinkWrapper.snapshotState(new StateSnapshotContextSynchronousImpl(0, 0));
-
-      // test snapshot offsets
-      assertEquals(flinkWrapper.getLocalSplitSources().size(),
-          listState.getList().size());
-      int totalEmit = 0;
-      for (KV<UnboundedSource, TestCountingSource.CounterMark> kv : listState.get()) {
-        totalEmit += kv.getValue().current + 1;
-      }
-      assertEquals(numElements / 2, totalEmit);
-
-      // test that finalizeCheckpoint on CheckpointMark is called
-      final ArrayList<Integer> finalizeList = new ArrayList<>();
-      TestCountingSource.setFinalizeTracker(finalizeList);
-      flinkWrapper.notifyCheckpointComplete(0);
-      assertEquals(flinkWrapper.getLocalSplitSources().size(), finalizeList.size());
-
-      // create a completely new source but restore from the snapshot
-      TestCountingSource restoredSource = new TestCountingSource(numElements);
-      UnboundedSourceWrapper<
-          KV<Integer, Integer>, TestCountingSource.CounterMark> restoredFlinkWrapper =
-          new UnboundedSourceWrapper<>(options, restoredSource, numSplits);
-
-      assertEquals(numSplits, restoredFlinkWrapper.getSplitSources().size());
-
-      StreamSource<
-          WindowedValue<KV<Integer, Integer>>,
-          UnboundedSourceWrapper<
-              KV<Integer, Integer>,
-              TestCountingSource.CounterMark>> restoredSourceOperator =
-          new StreamSource<>(restoredFlinkWrapper);
-
-      setupSourceOperator(restoredSourceOperator, numTasks);
-
-      // restore snapshot
-      restoredFlinkWrapper.initializeState(initializationContext);
-
-      boolean readSecondBatchOfElements = false;
-
-      // run again and verify that we see the other elements
-      try {
-        restoredSourceOperator.open();
-        restoredSourceOperator.run(checkpointLock,
-            new Output<StreamRecord<WindowedValue<KV<Integer, Integer>>>>() {
-              private int count = 0;
-
-              @Override
-              public void emitWatermark(Watermark watermark) {
-              }
-
-              @Override
-              public void emitLatencyMarker(LatencyMarker latencyMarker) {
-              }
-
-              @Override
-              public void collect(
-                  StreamRecord<WindowedValue<KV<Integer, Integer>>> windowedValueStreamRecord) {
-                emittedElements.add(windowedValueStreamRecord.getValue().getValue());
-                count++;
-                if (count >= numElements / 2) {
-                  throw new SuccessException();
-                }
-              }
-
-              @Override
-              public void close() {
-
-              }
-            });
-      } catch (SuccessException e) {
-        // success
-        readSecondBatchOfElements = true;
-      }
-
-      assertEquals(Math.max(1, numSplits / numTasks), flinkWrapper.getLocalSplitSources().size());
-
-      assertTrue("Did not successfully read second batch of elements.", readSecondBatchOfElements);
-
-      // verify that we saw all NUM_ELEMENTS elements
-      assertTrue(emittedElements.size() == numElements);
-    }
-
-    @Test
-    public void testNullCheckpoint() throws Exception {
-      final int numElements = 20;
-      PipelineOptions options = PipelineOptionsFactory.create();
-
-      TestCountingSource source = new TestCountingSource(numElements) {
-        @Override
-        public Coder<CounterMark> getCheckpointMarkCoder() {
-          return null;
-        }
-      };
-      UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> flinkWrapper =
-          new UnboundedSourceWrapper<>(options, source, numSplits);
-
-      OperatorStateStore backend = mock(OperatorStateStore.class);
-
-      TestingListState<KV<UnboundedSource, TestCountingSource.CounterMark>>
-          listState = new TestingListState<>();
-
-      when(backend.getOperatorState(Matchers.any(ListStateDescriptor.class)))
-          .thenReturn(listState);
-
-      StateInitializationContext initializationContext = mock(StateInitializationContext.class);
-
-      when(initializationContext.getOperatorStateStore()).thenReturn(backend);
-      when(initializationContext.isRestored()).thenReturn(false, true);
-
-      flinkWrapper.initializeState(initializationContext);
-
-      StreamSource sourceOperator = new StreamSource<>(flinkWrapper);
-      setupSourceOperator(sourceOperator, numTasks);
-      sourceOperator.open();
-
-      flinkWrapper.snapshotState(new StateSnapshotContextSynchronousImpl(0, 0));
-
-      assertEquals(0, listState.getList().size());
-
-      UnboundedSourceWrapper<
-          KV<Integer, Integer>, TestCountingSource.CounterMark> restoredFlinkWrapper =
-          new UnboundedSourceWrapper<>(options, new TestCountingSource(numElements),
-              numSplits);
-
-      StreamSource restoredSourceOperator = new StreamSource<>(flinkWrapper);
-      setupSourceOperator(restoredSourceOperator, numTasks);
-      sourceOperator.open();
-
-      restoredFlinkWrapper.initializeState(initializationContext);
-
-      assertEquals(Math.max(1, numSplits / numTasks), flinkWrapper.getLocalSplitSources().size());
-
-    }
-
-    @SuppressWarnings("unchecked")
-    private static <T> void setupSourceOperator(StreamSource<T, ?> operator, int numSubTasks) {
-      ExecutionConfig executionConfig = new ExecutionConfig();
-      StreamConfig cfg = new StreamConfig(new Configuration());
-
-      cfg.setTimeCharacteristic(TimeCharacteristic.EventTime);
-
-      Environment env = new DummyEnvironment("MockTwoInputTask", numSubTasks, 0);
-
-      StreamTask<?, ?> mockTask = mock(StreamTask.class);
-      when(mockTask.getName()).thenReturn("Mock Task");
-      when(mockTask.getCheckpointLock()).thenReturn(new Object());
-      when(mockTask.getConfiguration()).thenReturn(cfg);
-      when(mockTask.getEnvironment()).thenReturn(env);
-      when(mockTask.getExecutionConfig()).thenReturn(executionConfig);
-      when(mockTask.getAccumulatorMap())
-          .thenReturn(Collections.<String, Accumulator<?, ?>>emptyMap());
-      TestProcessingTimeService testProcessingTimeService = new TestProcessingTimeService();
-      when(mockTask.getProcessingTimeService()).thenReturn(testProcessingTimeService);
-
-      operator.setup(mockTask, cfg, (Output<StreamRecord<T>>) mock(Output.class));
-    }
-
-    /**
-     * A special {@link RuntimeException} that we throw to signal that the test was successful.
-     */
-    private static class SuccessException extends RuntimeException {
-    }
-  }
-
-  /**
-   * Not parameterized tests.
-   */
-  public static class BasicTest {
-
-    /**
-     * Check serialization a {@link UnboundedSourceWrapper}.
-     */
-    @Test
-    public void testSerialization() throws Exception {
-      final int parallelism = 1;
-      final int numElements = 20;
-      PipelineOptions options = PipelineOptionsFactory.create();
-
-      TestCountingSource source = new TestCountingSource(numElements);
-      UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> flinkWrapper =
-          new UnboundedSourceWrapper<>(options, source, parallelism);
-
-      InstantiationUtil.serializeObject(flinkWrapper);
-    }
-
-  }
-
-  private static final class TestingListState<T> implements ListState<T> {
-
-    private final List<T> list = new ArrayList<>();
-
-    @Override
-    public void clear() {
-      list.clear();
-    }
-
-    @Override
-    public Iterable<T> get() throws Exception {
-      return list;
-    }
-
-    @Override
-    public void add(T value) throws Exception {
-      list.add(value);
-    }
-
-    public List<T> getList() {
-      return list;
-    }
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/package-info.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/package-info.java
deleted file mode 100644
index 08a1e03..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Internal implementation of the Beam runner for Apache Flink.
- */
-package org.apache.beam.runners.flink.streaming;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/resources/log4j-test.properties
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/resources/log4j-test.properties b/runners/flink/runner/src/test/resources/log4j-test.properties
deleted file mode 100644
index 4c74d85..0000000
--- a/runners/flink/runner/src/test/resources/log4j-test.properties
+++ /dev/null
@@ -1,27 +0,0 @@
-################################################################################
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-################################################################################
-
-# Set root logger level to OFF to not flood build logs
-# set manually to INFO for debugging purposes
-log4j.rootLogger=OFF, testlogger
-
-# A1 is set to be a ConsoleAppender.
-log4j.appender.testlogger=org.apache.log4j.ConsoleAppender
-log4j.appender.testlogger.target = System.err
-log4j.appender.testlogger.layout=org.apache.log4j.PatternLayout
-log4j.appender.testlogger.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/DefaultParallelismFactory.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/DefaultParallelismFactory.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/DefaultParallelismFactory.java
new file mode 100644
index 0000000..b745f0b
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/DefaultParallelismFactory.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import org.apache.beam.sdk.options.DefaultValueFactory;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.flink.configuration.ConfigConstants;
+import org.apache.flink.configuration.GlobalConfiguration;
+
+/**
+ * {@link DefaultValueFactory} for getting a default value for the parallelism option
+ * on {@link FlinkPipelineOptions}.
+ *
+ * <p>This will return either the default value from {@link GlobalConfiguration} or {@code 1}.
+ * A valid {@link GlobalConfiguration} is only available if the program is executed by the Flink
+ * run scripts.
+ */
+public class DefaultParallelismFactory implements DefaultValueFactory<Integer> {
+  @Override
+  public Integer create(PipelineOptions options) {
+    return GlobalConfiguration.loadConfiguration()
+        .getInteger(ConfigConstants.DEFAULT_PARALLELISM_KEY, 1);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchPipelineTranslator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchPipelineTranslator.java
new file mode 100644
index 0000000..854b674
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchPipelineTranslator.java
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.runners.TransformHierarchy;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.api.java.io.DiscardingOutputFormat;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * {@link Pipeline.PipelineVisitor} for executing a {@link Pipeline} as a
+ * Flink batch job.
+ */
+class FlinkBatchPipelineTranslator extends FlinkPipelineTranslator {
+
+  private static final Logger LOG = LoggerFactory.getLogger(FlinkBatchPipelineTranslator.class);
+
+  /**
+   * The necessary context in the case of a batch job.
+   */
+  private final FlinkBatchTranslationContext batchContext;
+
+  private int depth = 0;
+
+  public FlinkBatchPipelineTranslator(ExecutionEnvironment env, PipelineOptions options) {
+    this.batchContext = new FlinkBatchTranslationContext(env, options);
+  }
+
+  @Override
+  @SuppressWarnings("rawtypes, unchecked")
+  public void translate(Pipeline pipeline) {
+    super.translate(pipeline);
+
+    // terminate dangling DataSets
+    for (DataSet<?> dataSet: batchContext.getDanglingDataSets().values()) {
+      dataSet.output(new DiscardingOutputFormat());
+    }
+  }
+
+  // --------------------------------------------------------------------------------------------
+  //  Pipeline Visitor Methods
+  // --------------------------------------------------------------------------------------------
+
+  @Override
+  public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) {
+    LOG.info("{} enterCompositeTransform- {}", genSpaces(this.depth), node.getFullName());
+    this.depth++;
+
+    BatchTransformTranslator<?> translator = getTranslator(node);
+
+    if (translator != null) {
+      applyBatchTransform(node.getTransform(), node, translator);
+      LOG.info("{} translated- {}", genSpaces(this.depth), node.getFullName());
+      return CompositeBehavior.DO_NOT_ENTER_TRANSFORM;
+    } else {
+      return CompositeBehavior.ENTER_TRANSFORM;
+    }
+  }
+
+  @Override
+  public void leaveCompositeTransform(TransformHierarchy.Node node) {
+    this.depth--;
+    LOG.info("{} leaveCompositeTransform- {}", genSpaces(this.depth), node.getFullName());
+  }
+
+  @Override
+  public void visitPrimitiveTransform(TransformHierarchy.Node node) {
+    LOG.info("{} visitPrimitiveTransform- {}", genSpaces(this.depth), node.getFullName());
+
+    // get the transformation corresponding to the node we are
+    // currently visiting and translate it into its Flink alternative.
+    PTransform<?, ?> transform = node.getTransform();
+    BatchTransformTranslator<?> translator =
+        FlinkBatchTransformTranslators.getTranslator(transform);
+    if (translator == null) {
+      LOG.info(node.getTransform().getClass().toString());
+      throw new UnsupportedOperationException("The transform " + transform
+          + " is currently not supported.");
+    }
+    applyBatchTransform(transform, node, translator);
+  }
+
+  private <T extends PTransform<?, ?>> void applyBatchTransform(
+      PTransform<?, ?> transform,
+      TransformHierarchy.Node node,
+      BatchTransformTranslator<?> translator) {
+
+    @SuppressWarnings("unchecked")
+    T typedTransform = (T) transform;
+
+    @SuppressWarnings("unchecked")
+    BatchTransformTranslator<T> typedTranslator = (BatchTransformTranslator<T>) translator;
+
+    // create the applied PTransform on the batchContext
+    batchContext.setCurrentTransform(node.toAppliedPTransform());
+    typedTranslator.translateNode(typedTransform, batchContext);
+  }
+
+  /**
+   * A translator of a {@link PTransform}.
+   */
+  public interface BatchTransformTranslator<TransformT extends PTransform> {
+    void translateNode(TransformT transform, FlinkBatchTranslationContext context);
+  }
+
+  /**
+   * Returns a translator for the given node, if it is possible, otherwise null.
+   */
+  private static BatchTransformTranslator<?> getTranslator(TransformHierarchy.Node node) {
+    PTransform<?, ?> transform = node.getTransform();
+
+    // Root of the graph is null
+    if (transform == null) {
+      return null;
+    }
+
+    return FlinkBatchTransformTranslators.getTranslator(transform);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
new file mode 100644
index 0000000..ff9521c
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
@@ -0,0 +1,723 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkState;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import org.apache.beam.runners.flink.translation.functions.FlinkAssignWindows;
+import org.apache.beam.runners.flink.translation.functions.FlinkDoFnFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkMergingNonShuffleReduceFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkMergingPartialReduceFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkMergingReduceFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkMultiOutputPruningFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkPartialReduceFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkReduceFunction;
+import org.apache.beam.runners.flink.translation.functions.FlinkStatefulDoFnFunction;
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+import org.apache.beam.runners.flink.translation.types.KvKeySelector;
+import org.apache.beam.runners.flink.translation.wrappers.SourceInputFormat;
+import org.apache.beam.sdk.coders.CannotProvideCoderException;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderRegistry;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.coders.VoidCoder;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.Flatten;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.View;
+import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.transforms.join.UnionCoder;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.Reshuffle;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.api.common.functions.FilterFunction;
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.operators.DataSource;
+import org.apache.flink.api.java.operators.FlatMapOperator;
+import org.apache.flink.api.java.operators.GroupCombineOperator;
+import org.apache.flink.api.java.operators.GroupReduceOperator;
+import org.apache.flink.api.java.operators.Grouping;
+import org.apache.flink.api.java.operators.MapPartitionOperator;
+import org.apache.flink.api.java.operators.SingleInputUdfOperator;
+import org.apache.flink.util.Collector;
+
+/**
+ * Translators for transforming {@link PTransform PTransforms} to
+ * Flink {@link DataSet DataSets}.
+ */
+class FlinkBatchTransformTranslators {
+
+  // --------------------------------------------------------------------------------------------
+  //  Transform Translator Registry
+  // --------------------------------------------------------------------------------------------
+
+  @SuppressWarnings("rawtypes")
+  private static final Map<
+      Class<? extends PTransform>,
+      FlinkBatchPipelineTranslator.BatchTransformTranslator> TRANSLATORS = new HashMap<>();
+
+  static {
+    TRANSLATORS.put(View.CreatePCollectionView.class, new CreatePCollectionViewTranslatorBatch());
+
+    TRANSLATORS.put(Combine.PerKey.class, new CombinePerKeyTranslatorBatch());
+    TRANSLATORS.put(GroupByKey.class, new GroupByKeyTranslatorBatch());
+    TRANSLATORS.put(Reshuffle.class, new ReshuffleTranslatorBatch());
+
+    TRANSLATORS.put(Flatten.PCollections.class, new FlattenPCollectionTranslatorBatch());
+
+    TRANSLATORS.put(Window.Assign.class, new WindowAssignTranslatorBatch());
+
+    TRANSLATORS.put(ParDo.MultiOutput.class, new ParDoTranslatorBatch());
+
+    TRANSLATORS.put(Read.Bounded.class, new ReadSourceTranslatorBatch());
+  }
+
+
+  static FlinkBatchPipelineTranslator.BatchTransformTranslator<?> getTranslator(
+      PTransform<?, ?> transform) {
+    return TRANSLATORS.get(transform.getClass());
+  }
+
+  private static class ReadSourceTranslatorBatch<T>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Read.Bounded<T>> {
+
+    @Override
+    public void translateNode(Read.Bounded<T> transform, FlinkBatchTranslationContext context) {
+      String name = transform.getName();
+      BoundedSource<T> source = transform.getSource();
+      PCollection<T> output = context.getOutput(transform);
+
+      TypeInformation<WindowedValue<T>> typeInformation = context.getTypeInfo(output);
+
+      DataSource<WindowedValue<T>> dataSource = new DataSource<>(
+          context.getExecutionEnvironment(),
+          new SourceInputFormat<>(source, context.getPipelineOptions()),
+          typeInformation,
+          name);
+
+      context.setOutputDataSet(output, dataSource);
+    }
+  }
+
+  private static class WindowAssignTranslatorBatch<T>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Window.Assign<T>> {
+
+    @Override
+    public void translateNode(Window.Assign<T> transform, FlinkBatchTranslationContext context) {
+      PValue input = context.getInput(transform);
+
+      TypeInformation<WindowedValue<T>> resultTypeInfo =
+          context.getTypeInfo(context.getOutput(transform));
+
+      DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input);
+
+      @SuppressWarnings("unchecked")
+      final WindowingStrategy<T, ? extends BoundedWindow> windowingStrategy =
+          (WindowingStrategy<T, ? extends BoundedWindow>)
+              context.getOutput(transform).getWindowingStrategy();
+
+      WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn();
+
+      FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction =
+          new FlinkAssignWindows<>(windowFn);
+
+      DataSet<WindowedValue<T>> resultDataSet = inputDataSet
+          .flatMap(assignWindowsFunction)
+          .name(context.getOutput(transform).getName())
+          .returns(resultTypeInfo);
+
+      context.setOutputDataSet(context.getOutput(transform), resultDataSet);
+    }
+  }
+
+  private static class GroupByKeyTranslatorBatch<K, InputT>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<GroupByKey<K, InputT>> {
+
+    @Override
+    public void translateNode(
+        GroupByKey<K, InputT> transform,
+        FlinkBatchTranslationContext context) {
+
+      // for now, this is copied from the Combine.PerKey translater. Once we have the new runner API
+      // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn
+
+      DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
+          context.getInputDataSet(context.getInput(transform));
+
+      Combine.KeyedCombineFn<K, InputT, List<InputT>, List<InputT>> combineFn =
+          new Concatenate<InputT>().asKeyedFn();
+
+      KvCoder<K, InputT> inputCoder =
+          (KvCoder<K, InputT>) context.getInput(transform).getCoder();
+
+      Coder<List<InputT>> accumulatorCoder;
+
+      try {
+        accumulatorCoder =
+            combineFn.getAccumulatorCoder(
+                context.getInput(transform).getPipeline().getCoderRegistry(),
+                inputCoder.getKeyCoder(),
+                inputCoder.getValueCoder());
+      } catch (CannotProvideCoderException e) {
+        throw new RuntimeException(e);
+      }
+
+      WindowingStrategy<?, ?> windowingStrategy =
+          context.getInput(transform).getWindowingStrategy();
+
+      TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo =
+          new CoderTypeInformation<>(
+              WindowedValue.getFullCoder(
+                  KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
+                  windowingStrategy.getWindowFn().windowCoder()));
+
+
+      Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
+          inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder()));
+
+      FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction;
+      FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction;
+
+      if (windowingStrategy.getWindowFn().isNonMerging()) {
+        @SuppressWarnings("unchecked")
+        WindowingStrategy<?, BoundedWindow> boundedStrategy =
+            (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
+
+        partialReduceFunction = new FlinkPartialReduceFunction<>(
+            combineFn,
+            boundedStrategy,
+            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+            context.getPipelineOptions());
+
+        reduceFunction = new FlinkReduceFunction<>(
+            combineFn,
+            boundedStrategy,
+            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+            context.getPipelineOptions());
+
+      } else {
+        if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
+          throw new UnsupportedOperationException(
+              "Merging WindowFn with windows other than IntervalWindow are not supported.");
+        }
+
+        @SuppressWarnings("unchecked")
+        WindowingStrategy<?, IntervalWindow> intervalStrategy =
+            (WindowingStrategy<?, IntervalWindow>) windowingStrategy;
+
+        partialReduceFunction = new FlinkMergingPartialReduceFunction<>(
+            combineFn,
+            intervalStrategy,
+            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+            context.getPipelineOptions());
+
+        reduceFunction = new FlinkMergingReduceFunction<>(
+            combineFn,
+            intervalStrategy,
+            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
+            context.getPipelineOptions());
+      }
+
+      // Partially GroupReduce the values into the intermediate format AccumT (combine)
+      GroupCombineOperator<
+          WindowedValue<KV<K, InputT>>,
+          WindowedValue<KV<K, List<InputT>>>> groupCombine =
+          new GroupCombineOperator<>(
+              inputGrouping,
+              partialReduceTypeInfo,
+              partialReduceFunction,
+              "GroupCombine: " + transform.getName());
+
+      Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping =
+          groupCombine.groupBy(new KvKeySelector<List<InputT>, K>(inputCoder.getKeyCoder()));
+
+      // Fully reduce the values and create output format VO
+      GroupReduceOperator<
+          WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>> outputDataSet =
+          new GroupReduceOperator<>(
+              intermediateGrouping, partialReduceTypeInfo, reduceFunction, transform.getName());
+
+      context.setOutputDataSet(context.getOutput(transform), outputDataSet);
+
+    }
+
+  }
+
+  private static class ReshuffleTranslatorBatch<K, InputT>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Reshuffle<K, InputT>> {
+
+    @Override
+    public void translateNode(
+        Reshuffle<K, InputT> transform,
+        FlinkBatchTranslationContext context) {
+
+      DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
+          context.getInputDataSet(context.getInput(transform));
+
+      context.setOutputDataSet(context.getOutput(transform), inputDataSet.rebalance());
+
+    }
+
+  }
+
+  /**
+   * Combiner that combines {@code T}s into a single {@code List<T>} containing all inputs.
+   *
+   * <p>For internal use to translate {@link GroupByKey}. For a large {@link PCollection} this
+   * is expected to crash!
+   *
+   * <p>This is copied from the dataflow runner code.
+   *
+   * @param <T> the type of elements to concatenate.
+   */
+  private static class Concatenate<T> extends Combine.CombineFn<T, List<T>, List<T>> {
+    @Override
+    public List<T> createAccumulator() {
+      return new ArrayList<>();
+    }
+
+    @Override
+    public List<T> addInput(List<T> accumulator, T input) {
+      accumulator.add(input);
+      return accumulator;
+    }
+
+    @Override
+    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
+      List<T> result = createAccumulator();
+      for (List<T> accumulator : accumulators) {
+        result.addAll(accumulator);
+      }
+      return result;
+    }
+
+    @Override
+    public List<T> extractOutput(List<T> accumulator) {
+      return accumulator;
+    }
+
+    @Override
+    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
+    }
+
+    @Override
+    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
+    }
+  }
+
+
+  private static class CombinePerKeyTranslatorBatch<K, InputT, AccumT, OutputT>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+          Combine.PerKey<K, InputT, OutputT>> {
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public void translateNode(
+        Combine.PerKey<K, InputT, OutputT> transform,
+        FlinkBatchTranslationContext context) {
+      DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
+          context.getInputDataSet(context.getInput(transform));
+
+      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> combineFn =
+          (CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT>) transform.getFn();
+
+      KvCoder<K, InputT> inputCoder =
+          (KvCoder<K, InputT>) context.getInput(transform).getCoder();
+
+      Coder<AccumT> accumulatorCoder;
+
+      try {
+        accumulatorCoder =
+            combineFn.getAccumulatorCoder(
+                context.getInput(transform).getPipeline().getCoderRegistry(),
+                inputCoder.getKeyCoder(),
+                inputCoder.getValueCoder());
+      } catch (CannotProvideCoderException e) {
+        throw new RuntimeException(e);
+      }
+
+      WindowingStrategy<?, ?> windowingStrategy =
+          context.getInput(transform).getWindowingStrategy();
+
+      TypeInformation<WindowedValue<KV<K, AccumT>>> partialReduceTypeInfo =
+          context.getTypeInfo(
+              KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
+              windowingStrategy);
+
+      Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
+          inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder()));
+
+      // construct a map from side input to WindowingStrategy so that
+      // the DoFn runner can map main-input windows to side input windows
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
+      for (PCollectionView<?> sideInput: transform.getSideInputs()) {
+        sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
+      }
+
+      if (windowingStrategy.getWindowFn().isNonMerging()) {
+        WindowingStrategy<?, BoundedWindow> boundedStrategy =
+            (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
+
+        FlinkPartialReduceFunction<K, InputT, AccumT, ?> partialReduceFunction =
+            new FlinkPartialReduceFunction<>(
+                combineFn,
+                boundedStrategy,
+                sideInputStrategies,
+                context.getPipelineOptions());
+
+        FlinkReduceFunction<K, AccumT, OutputT, ?> reduceFunction =
+            new FlinkReduceFunction<>(
+                combineFn,
+                boundedStrategy,
+                sideInputStrategies,
+                context.getPipelineOptions());
+
+        // Partially GroupReduce the values into the intermediate format AccumT (combine)
+        GroupCombineOperator<
+            WindowedValue<KV<K, InputT>>,
+            WindowedValue<KV<K, AccumT>>> groupCombine =
+            new GroupCombineOperator<>(
+                inputGrouping,
+                partialReduceTypeInfo,
+                partialReduceFunction,
+                "GroupCombine: " + transform.getName());
+
+        transformSideInputs(transform.getSideInputs(), groupCombine, context);
+
+        TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo =
+            context.getTypeInfo(context.getOutput(transform));
+
+        Grouping<WindowedValue<KV<K, AccumT>>> intermediateGrouping =
+            groupCombine.groupBy(new KvKeySelector<AccumT, K>(inputCoder.getKeyCoder()));
+
+        // Fully reduce the values and create output format OutputT
+        GroupReduceOperator<
+            WindowedValue<KV<K, AccumT>>, WindowedValue<KV<K, OutputT>>> outputDataSet =
+            new GroupReduceOperator<>(
+                intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName());
+
+        transformSideInputs(transform.getSideInputs(), outputDataSet, context);
+
+        context.setOutputDataSet(context.getOutput(transform), outputDataSet);
+
+      } else {
+        if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
+          throw new UnsupportedOperationException(
+              "Merging WindowFn with windows other than IntervalWindow are not supported.");
+        }
+
+        // for merging windows we can't to a pre-shuffle combine step since
+        // elements would not be in their correct windows for side-input access
+
+        WindowingStrategy<?, IntervalWindow> intervalStrategy =
+            (WindowingStrategy<?, IntervalWindow>) windowingStrategy;
+
+        FlinkMergingNonShuffleReduceFunction<K, InputT, AccumT, OutputT, ?> reduceFunction =
+            new FlinkMergingNonShuffleReduceFunction<>(
+                combineFn,
+                intervalStrategy,
+                sideInputStrategies,
+                context.getPipelineOptions());
+
+        TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo =
+            context.getTypeInfo(context.getOutput(transform));
+
+        Grouping<WindowedValue<KV<K, InputT>>> grouping =
+            inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder()));
+
+        // Fully reduce the values and create output format OutputT
+        GroupReduceOperator<
+            WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, OutputT>>> outputDataSet =
+            new GroupReduceOperator<>(
+                grouping, reduceTypeInfo, reduceFunction, transform.getName());
+
+        transformSideInputs(transform.getSideInputs(), outputDataSet, context);
+
+        context.setOutputDataSet(context.getOutput(transform), outputDataSet);
+      }
+
+
+    }
+  }
+
+  private static void rejectSplittable(DoFn<?, ?> doFn) {
+    DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
+    if (signature.processElement().isSplittable()) {
+      throw new UnsupportedOperationException(
+          String.format(
+              "%s does not currently support splittable DoFn: %s",
+              FlinkRunner.class.getSimpleName(), doFn));
+    }
+  }
+
+  private static class ParDoTranslatorBatch<InputT, OutputT>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+      ParDo.MultiOutput<InputT, OutputT>> {
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public void translateNode(
+        ParDo.MultiOutput<InputT, OutputT> transform,
+        FlinkBatchTranslationContext context) {
+      DoFn<InputT, OutputT> doFn = transform.getFn();
+      rejectSplittable(doFn);
+      DataSet<WindowedValue<InputT>> inputDataSet =
+          context.getInputDataSet(context.getInput(transform));
+
+      Map<TupleTag<?>, PValue> outputs = context.getOutputs(transform);
+
+      Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
+      // put the main output at index 0, FlinkMultiOutputDoFnFunction  expects this
+      outputMap.put(transform.getMainOutputTag(), 0);
+      int count = 1;
+      for (TupleTag<?> tag : outputs.keySet()) {
+        if (!outputMap.containsKey(tag)) {
+          outputMap.put(tag, count++);
+        }
+      }
+
+      // assume that the windowing strategy is the same for all outputs
+      WindowingStrategy<?, ?> windowingStrategy = null;
+
+      // collect all output Coders and create a UnionCoder for our tagged outputs
+      List<Coder<?>> outputCoders = Lists.newArrayList();
+      for (PValue taggedValue : outputs.values()) {
+        checkState(
+            taggedValue instanceof PCollection,
+            "Within ParDo, got a non-PCollection output %s of type %s",
+            taggedValue,
+            taggedValue.getClass().getSimpleName());
+        PCollection<?> coll = (PCollection<?>) taggedValue;
+        outputCoders.add(coll.getCoder());
+        windowingStrategy = coll.getWindowingStrategy();
+      }
+
+      if (windowingStrategy == null) {
+        throw new IllegalStateException("No outputs defined.");
+      }
+
+      UnionCoder unionCoder = UnionCoder.of(outputCoders);
+
+      TypeInformation<WindowedValue<RawUnionValue>> typeInformation =
+          new CoderTypeInformation<>(
+              WindowedValue.getFullCoder(
+                  unionCoder,
+                  windowingStrategy.getWindowFn().windowCoder()));
+
+      List<PCollectionView<?>> sideInputs = transform.getSideInputs();
+
+      // construct a map from side input to WindowingStrategy so that
+      // the DoFn runner can map main-input windows to side input windows
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
+      for (PCollectionView<?> sideInput: sideInputs) {
+        sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
+      }
+
+      SingleInputUdfOperator<WindowedValue<InputT>, WindowedValue<RawUnionValue>, ?> outputDataSet;
+      DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass());
+      if (signature.stateDeclarations().size() > 0
+          || signature.timerDeclarations().size() > 0) {
+
+        // Based on the fact that the signature is stateful, DoFnSignatures ensures
+        // that it is also keyed
+        KvCoder<?, InputT> inputCoder =
+            (KvCoder<?, InputT>) context.getInput(transform).getCoder();
+
+        FlinkStatefulDoFnFunction<?, ?, OutputT> doFnWrapper = new FlinkStatefulDoFnFunction<>(
+            (DoFn) doFn, windowingStrategy, sideInputStrategies, context.getPipelineOptions(),
+            outputMap, transform.getMainOutputTag()
+        );
+
+        Grouping<WindowedValue<InputT>> grouping =
+            inputDataSet.groupBy(new KvKeySelector(inputCoder.getKeyCoder()));
+
+        outputDataSet =
+            new GroupReduceOperator(grouping, typeInformation, doFnWrapper, transform.getName());
+
+      } else {
+        FlinkDoFnFunction<InputT, RawUnionValue> doFnWrapper =
+            new FlinkDoFnFunction(
+                doFn,
+                windowingStrategy,
+                sideInputStrategies,
+                context.getPipelineOptions(),
+                outputMap,
+                transform.getMainOutputTag());
+
+        outputDataSet = new MapPartitionOperator<>(
+            inputDataSet, typeInformation,
+            doFnWrapper, transform.getName());
+
+      }
+
+      transformSideInputs(sideInputs, outputDataSet, context);
+
+      for (Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
+        pruneOutput(
+            outputDataSet,
+            context,
+            outputMap.get(output.getKey()),
+            (PCollection) output.getValue());
+      }
+
+    }
+
+    private <T> void pruneOutput(
+        DataSet<WindowedValue<RawUnionValue>> taggedDataSet,
+        FlinkBatchTranslationContext context,
+        int integerTag,
+        PCollection<T> collection) {
+      TypeInformation<WindowedValue<T>> outputType = context.getTypeInfo(collection);
+
+      FlinkMultiOutputPruningFunction<T> pruningFunction =
+          new FlinkMultiOutputPruningFunction<>(integerTag);
+
+      FlatMapOperator<WindowedValue<RawUnionValue>, WindowedValue<T>> pruningOperator =
+          new FlatMapOperator<>(
+              taggedDataSet,
+              outputType,
+              pruningFunction,
+              collection.getName());
+
+      context.setOutputDataSet(collection, pruningOperator);
+    }
+  }
+
+  private static class FlattenPCollectionTranslatorBatch<T>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+      Flatten.PCollections<T>> {
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public void translateNode(
+        Flatten.PCollections<T> transform,
+        FlinkBatchTranslationContext context) {
+
+      Map<TupleTag<?>, PValue> allInputs = context.getInputs(transform);
+      DataSet<WindowedValue<T>> result = null;
+
+      if (allInputs.isEmpty()) {
+
+        // create an empty dummy source to satisfy downstream operations
+        // we cannot create an empty source in Flink, therefore we have to
+        // add the flatMap that simply never forwards the single element
+        DataSource<String> dummySource =
+            context.getExecutionEnvironment().fromElements("dummy");
+        result = dummySource.flatMap(new FlatMapFunction<String, WindowedValue<T>>() {
+          @Override
+          public void flatMap(String s, Collector<WindowedValue<T>> collector) throws Exception {
+            // never return anything
+          }
+        }).returns(
+            new CoderTypeInformation<>(
+                WindowedValue.getFullCoder(
+                    (Coder<T>) VoidCoder.of(),
+                    GlobalWindow.Coder.INSTANCE)));
+      } else {
+        for (PValue taggedPc : allInputs.values()) {
+          checkArgument(
+              taggedPc instanceof PCollection,
+              "Got non-PCollection input to flatten: %s of type %s",
+              taggedPc,
+              taggedPc.getClass().getSimpleName());
+          PCollection<T> collection = (PCollection<T>) taggedPc;
+          DataSet<WindowedValue<T>> current = context.getInputDataSet(collection);
+          if (result == null) {
+            result = current;
+          } else {
+            result = result.union(current);
+          }
+        }
+      }
+
+      // insert a dummy filter, there seems to be a bug in Flink
+      // that produces duplicate elements after the union in some cases
+      // if we don't
+      result = result.filter(new FilterFunction<WindowedValue<T>>() {
+        @Override
+        public boolean filter(WindowedValue<T> tWindowedValue) throws Exception {
+          return true;
+        }
+      }).name("UnionFixFilter");
+      context.setOutputDataSet(context.getOutput(transform), result);
+    }
+  }
+
+  private static class CreatePCollectionViewTranslatorBatch<ElemT, ViewT>
+      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
+          View.CreatePCollectionView<ElemT, ViewT>> {
+
+    @Override
+    public void translateNode(
+        View.CreatePCollectionView<ElemT, ViewT> transform,
+        FlinkBatchTranslationContext context) {
+      DataSet<WindowedValue<ElemT>> inputDataSet =
+          context.getInputDataSet(context.getInput(transform));
+
+      PCollectionView<ViewT> input = transform.getView();
+
+      context.setSideInputDataSet(input, inputDataSet);
+    }
+  }
+
+  private static void transformSideInputs(
+      List<PCollectionView<?>> sideInputs,
+      SingleInputUdfOperator<?, ?, ?> outputDataSet,
+      FlinkBatchTranslationContext context) {
+    // get corresponding Flink broadcast DataSets
+    for (PCollectionView<?> input : sideInputs) {
+      DataSet<?> broadcastSet = context.getSideInputDataSet(input);
+      outputDataSet.withBroadcastSet(broadcastSet, input.getTagInternal().getId());
+    }
+  }
+
+  private FlinkBatchTransformTranslators() {}
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTranslationContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTranslationContext.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTranslationContext.java
new file mode 100644
index 0000000..98dd0fb
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkBatchTranslationContext.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import com.google.common.collect.Iterables;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.AppliedPTransform;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.java.DataSet;
+import org.apache.flink.api.java.ExecutionEnvironment;
+
+/**
+ * Helper for {@link FlinkBatchPipelineTranslator} and translators in
+ * {@link FlinkBatchTransformTranslators}.
+ */
+class FlinkBatchTranslationContext {
+
+  private final Map<PValue, DataSet<?>> dataSets;
+  private final Map<PCollectionView<?>, DataSet<?>> broadcastDataSets;
+
+  /**
+   * For keeping track about which DataSets don't have a successor. We
+   * need to terminate these with a discarding sink because the Beam
+   * model allows dangling operations.
+   */
+  private final Map<PValue, DataSet<?>> danglingDataSets;
+
+  private final ExecutionEnvironment env;
+  private final PipelineOptions options;
+
+  private AppliedPTransform<?, ?, ?> currentTransform;
+
+  // ------------------------------------------------------------------------
+
+  public FlinkBatchTranslationContext(ExecutionEnvironment env, PipelineOptions options) {
+    this.env = env;
+    this.options = options;
+    this.dataSets = new HashMap<>();
+    this.broadcastDataSets = new HashMap<>();
+
+    this.danglingDataSets = new HashMap<>();
+  }
+
+  // ------------------------------------------------------------------------
+
+  public Map<PValue, DataSet<?>> getDanglingDataSets() {
+    return danglingDataSets;
+  }
+
+  public ExecutionEnvironment getExecutionEnvironment() {
+    return env;
+  }
+
+  public PipelineOptions getPipelineOptions() {
+    return options;
+  }
+
+  @SuppressWarnings("unchecked")
+  public <T> DataSet<WindowedValue<T>> getInputDataSet(PValue value) {
+    // assume that the DataSet is used as an input if retrieved here
+    danglingDataSets.remove(value);
+    return (DataSet<WindowedValue<T>>) dataSets.get(value);
+  }
+
+  public <T> void setOutputDataSet(PValue value, DataSet<WindowedValue<T>> set) {
+    if (!dataSets.containsKey(value)) {
+      dataSets.put(value, set);
+      danglingDataSets.put(value, set);
+    }
+  }
+
+  /**
+   * Sets the AppliedPTransform which carries input/output.
+   * @param currentTransform
+   */
+  public void setCurrentTransform(AppliedPTransform<?, ?, ?> currentTransform) {
+    this.currentTransform = currentTransform;
+  }
+
+  @SuppressWarnings("unchecked")
+  public <T> DataSet<T> getSideInputDataSet(PCollectionView<?> value) {
+    return (DataSet<T>) broadcastDataSets.get(value);
+  }
+
+  public <ViewT, ElemT> void setSideInputDataSet(
+      PCollectionView<ViewT> value,
+      DataSet<WindowedValue<ElemT>> set) {
+    if (!broadcastDataSets.containsKey(value)) {
+      broadcastDataSets.put(value, set);
+    }
+  }
+
+  @SuppressWarnings("unchecked")
+  public <T> TypeInformation<WindowedValue<T>> getTypeInfo(PCollection<T> collection) {
+    return getTypeInfo(collection.getCoder(), collection.getWindowingStrategy());
+  }
+
+  @SuppressWarnings("unchecked")
+  public <T> TypeInformation<WindowedValue<T>> getTypeInfo(
+      Coder<T> coder,
+      WindowingStrategy<?, ?> windowingStrategy) {
+    WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
+        WindowedValue.getFullCoder(
+            coder,
+            windowingStrategy.getWindowFn().windowCoder());
+
+    return new CoderTypeInformation<>(windowedValueCoder);
+  }
+
+  Map<TupleTag<?>, PValue> getInputs(PTransform<?, ?> transform) {
+    return currentTransform.getInputs();
+  }
+
+  @SuppressWarnings("unchecked")
+  <T extends PValue> T getInput(PTransform<T, ?> transform) {
+    return (T) Iterables.getOnlyElement(currentTransform.getInputs().values());
+  }
+
+  Map<TupleTag<?>, PValue> getOutputs(PTransform<?, ?> transform) {
+    return currentTransform.getOutputs();
+  }
+
+  @SuppressWarnings("unchecked")
+  <T extends PValue> T getOutput(PTransform<?, T> transform) {
+    return (T) Iterables.getOnlyElement(currentTransform.getOutputs().values());
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkDetachedRunnerResult.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkDetachedRunnerResult.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkDetachedRunnerResult.java
new file mode 100644
index 0000000..bf4395f
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkDetachedRunnerResult.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import java.io.IOException;
+
+import org.apache.beam.sdk.AggregatorRetrievalException;
+import org.apache.beam.sdk.AggregatorValues;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.metrics.MetricResults;
+import org.apache.beam.sdk.transforms.Aggregator;
+import org.joda.time.Duration;
+
+
+/**
+ * Result of a detached execution of a {@link org.apache.beam.sdk.Pipeline} with Flink.
+ * In detached execution, results and job execution are currently unavailable.
+ */
+public class FlinkDetachedRunnerResult implements PipelineResult {
+
+  FlinkDetachedRunnerResult() {}
+
+  @Override
+  public State getState() {
+    return State.UNKNOWN;
+  }
+
+  @Override
+  public <T> AggregatorValues<T> getAggregatorValues(final Aggregator<?, T> aggregator)
+      throws AggregatorRetrievalException {
+    throw new AggregatorRetrievalException(
+        "Accumulators can't be retrieved for detached Job executions.",
+        new UnsupportedOperationException());
+  }
+
+  @Override
+  public MetricResults metrics() {
+    throw new UnsupportedOperationException("The FlinkRunner does not currently support metrics.");
+  }
+
+  @Override
+  public State cancel() throws IOException {
+    throw new UnsupportedOperationException("Cancelling is not yet supported.");
+  }
+
+  @Override
+  public State waitUntilFinish() {
+    return State.UNKNOWN;
+  }
+
+  @Override
+  public State waitUntilFinish(Duration duration) {
+    return State.UNKNOWN;
+  }
+
+  @Override
+  public String toString() {
+    return "FlinkDetachedRunnerResult{}";
+  }
+}

[09/50] [abbrv] beam git commit: This closes #2556

Posted by dh...@apache.org.

This closes #2556


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e0df7d85
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e0df7d85
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e0df7d85

Branch: refs/heads/DSL_SQL
Commit: e0df7d85e80eac71f875663512bc293a0529460f
Parents: a9bcc8b 6ac3ac5
Author: Eugene Kirpichov <ki...@google.com>
Authored: Tue Apr 18 18:02:25 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 18:02:25 2017 -0700

----------------------------------------------------------------------
 .../operators/ApexParDoOperator.java            |   3 +-
 .../apache/beam/runners/core/DoFnRunners.java   |  32 +++
 .../beam/runners/core/ProcessFnRunner.java      | 127 +++++++++
 .../core/PushbackSideInputDoFnRunner.java       | 106 +------
 .../core/SimplePushbackSideInputDoFnRunner.java | 115 ++++++++
 .../beam/runners/core/SplittableParDo.java      | 110 +++++---
 .../core/PushbackSideInputDoFnRunnerTest.java   | 282 -------------------
 .../SimplePushbackSideInputDoFnRunnerTest.java  | 282 +++++++++++++++++++
 .../beam/runners/core/SplittableParDoTest.java  |  90 +++---
 ...ecycleManagerRemovingTransformEvaluator.java |   6 +-
 .../beam/runners/direct/ParDoEvaluator.java     | 127 ++++++---
 .../runners/direct/ParDoEvaluatorFactory.java   |  13 +-
 ...littableProcessElementsEvaluatorFactory.java | 106 +++++--
 .../direct/StatefulParDoEvaluatorFactory.java   |   4 +-
 .../direct/TransformEvaluatorRegistry.java      |   4 +-
 ...leManagerRemovingTransformEvaluatorTest.java |   8 +-
 .../beam/runners/direct/ParDoEvaluatorTest.java |   7 +-
 runners/flink/runner/pom.xml                    |   3 +-
 .../wrappers/streaming/DoFnOperator.java        |  12 +-
 .../streaming/SplittableDoFnOperator.java       |   2 +-
 .../wrappers/streaming/WindowDoFnOperator.java  |   2 +-
 ...esSplittableParDoWithWindowedSideInputs.java |  26 ++
 .../beam/sdk/transforms/SplittableDoFnTest.java | 104 +++++--
 23 files changed, 993 insertions(+), 578 deletions(-)
----------------------------------------------------------------------

[15/50] [abbrv] beam git commit: Separate streaming writes into two pluggable components - CreateTables, and StreamingWriteTables. Also address many code review comments. Also merge with master.

Posted by dh...@apache.org.

Separate streaming writes into two pluggable components - CreateTables, and StreamingWriteTables.
Also address many code review comments.
Also merge with master.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7d13061c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7d13061c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7d13061c

Branch: refs/heads/DSL_SQL
Commit: 7d13061cc36466c502bbc1f61d391743dd3739af
Parents: b486137
Author: Reuven Lax <re...@google.com>
Authored: Sun Apr 2 21:39:50 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 21:12:50 2017 -0700

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BatchLoads.java    | 176 ++++++++++---------
 .../sdk/io/gcp/bigquery/BigQueryHelpers.java    |  13 ++
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    |  21 ++-
 .../io/gcp/bigquery/BigQueryTableSource.java    |   4 +-
 .../beam/sdk/io/gcp/bigquery/CreateTables.java  |  95 ++++++----
 .../io/gcp/bigquery/GenerateShardedTable.java   |   3 +-
 .../beam/sdk/io/gcp/bigquery/PrepareWrite.java  |  80 +++++----
 .../beam/sdk/io/gcp/bigquery/ShardedKey.java    |   1 +
 .../sdk/io/gcp/bigquery/StreamingInserts.java   |  44 +----
 .../io/gcp/bigquery/StreamingWriteTables.java   |  86 +++++++++
 .../sdk/io/gcp/bigquery/TableDestination.java   |   1 +
 .../io/gcp/bigquery/TableDestinationCoder.java  |  62 +++----
 .../sdk/io/gcp/bigquery/TableRowWriter.java     |  14 +-
 .../sdk/io/gcp/bigquery/TagWithUniqueIds.java   |  14 +-
 .../io/gcp/bigquery/WriteBundlesToFiles.java    |  25 +--
 .../sdk/io/gcp/bigquery/WritePartition.java     | 127 ++++++++-----
 .../beam/sdk/io/gcp/bigquery/WriteRename.java   |   5 +-
 .../beam/sdk/io/gcp/bigquery/WriteTables.java   |  17 +-
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     |  66 ++++---
 19 files changed, 516 insertions(+), 338 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
index 06fdfce..236b234 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
@@ -18,7 +18,6 @@
 
 package org.apache.beam.sdk.io.gcp.bigquery;
 
-import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 import java.io.IOException;
@@ -35,7 +34,6 @@ import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
 import org.apache.beam.sdk.options.BigQueryOptions;
 import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.GroupByKey;
 import org.apache.beam.sdk.transforms.MapElements;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
@@ -47,6 +45,7 @@ import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
 import org.apache.beam.sdk.transforms.windowing.Window;
 import org.apache.beam.sdk.util.IOChannelFactory;
 import org.apache.beam.sdk.util.IOChannelUtils;
+import org.apache.beam.sdk.util.Reshuffle;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionTuple;
@@ -54,17 +53,13 @@ import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.TupleTagList;
 
-
-/**
- * PTransform that uses BigQuery batch-load jobs to write a PCollection to BigQuery.
- */
+/** PTransform that uses BigQuery batch-load jobs to write a PCollection to BigQuery. */
 class BatchLoads extends PTransform<PCollection<KV<TableDestination, TableRow>>, WriteResult> {
   BigQueryIO.Write<?> write;
 
-  private static class ConstantSchemaFunction implements
-      SerializableFunction<TableDestination, TableSchema> {
-    private final @Nullable
-    ValueProvider<String> jsonSchema;
+  private static class ConstantSchemaFunction
+      implements SerializableFunction<TableDestination, TableSchema> {
+    private final @Nullable ValueProvider<String> jsonSchema;
 
     ConstantSchemaFunction(ValueProvider<String> jsonSchema) {
       this.jsonSchema = jsonSchema;
@@ -86,7 +81,6 @@ class BatchLoads extends PTransform<PCollection<KV<TableDestination, TableRow>>,
   public WriteResult expand(PCollection<KV<TableDestination, TableRow>> input) {
     Pipeline p = input.getPipeline();
     BigQueryOptions options = p.getOptions().as(BigQueryOptions.class);
-    ValueProvider<TableReference> table = write.getTableWithDefaultProject(options);
 
     final String stepUuid = BigQueryHelpers.randomUUIDString();
 
@@ -94,40 +88,41 @@ class BatchLoads extends PTransform<PCollection<KV<TableDestination, TableRow>>,
     String tempFilePrefix;
     try {
       IOChannelFactory factory = IOChannelUtils.getFactory(tempLocation);
-      tempFilePrefix = factory.resolve(
-          factory.resolve(tempLocation, "BigQueryWriteTemp"),
-          stepUuid);
+      tempFilePrefix =
+          factory.resolve(factory.resolve(tempLocation, "BigQueryWriteTemp"), stepUuid);
     } catch (IOException e) {
       throw new RuntimeException(
-          String.format("Failed to resolve BigQuery temp location in %s", tempLocation),
-          e);
+          String.format("Failed to resolve BigQuery temp location in %s", tempLocation), e);
     }
 
     // Create a singleton job ID token at execution time. This will be used as the base for all
     // load jobs issued from this instance of the transfomr.
     PCollection<String> singleton = p.apply("Create", Create.of(tempFilePrefix));
-    PCollectionView<String> jobIdTokenView = p
-        .apply("TriggerIdCreation", Create.of("ignored"))
-        .apply("CreateJobId", MapElements.via(
-            new SimpleFunction<String, String>() {
-              @Override
-              public String apply(String input) {
-                return stepUuid;
-              }
-            }))
-        .apply(View.<String>asSingleton());
+    PCollectionView<String> jobIdTokenView =
+        p.apply("TriggerIdCreation", Create.of("ignored"))
+            .apply(
+                "CreateJobId",
+                MapElements.via(
+                    new SimpleFunction<String, String>() {
+                      @Override
+                      public String apply(String input) {
+                        return stepUuid;
+                      }
+                    }))
+            .apply(View.<String>asSingleton());
 
     PCollection<KV<TableDestination, TableRow>> inputInGlobalWindow =
-        input.apply("rewindowIntoGlobal",
+        input.apply(
+            "rewindowIntoGlobal",
             Window.<KV<TableDestination, TableRow>>into(new GlobalWindows())
                 .triggering(DefaultTrigger.of())
                 .discardingFiredPanes());
 
     // PCollection of filename, file byte size, and table destination.
-    PCollection<WriteBundlesToFiles.Result> results = inputInGlobalWindow
-        .apply("WriteBundlesToFiles",
-            ParDo.of(new WriteBundlesToFiles(tempFilePrefix)))
-        .setCoder(WriteBundlesToFiles.ResultCoder.of());
+    PCollection<WriteBundlesToFiles.Result> results =
+        inputInGlobalWindow
+            .apply("WriteBundlesToFiles", ParDo.of(new WriteBundlesToFiles(tempFilePrefix)))
+            .setCoder(WriteBundlesToFiles.ResultCoder.of());
 
     TupleTag<KV<ShardedKey<TableDestination>, List<String>>> multiPartitionsTag =
         new TupleTag<KV<ShardedKey<TableDestination>, List<String>>>("multiPartitionsTag") {};
@@ -136,20 +131,23 @@ class BatchLoads extends PTransform<PCollection<KV<TableDestination, TableRow>>,
 
     // Turn the list of files and record counts in a PCollectionView that can be used as a
     // side input.
-    PCollectionView<Iterable<WriteBundlesToFiles.Result>> resultsView = results
-        .apply("ResultsView", View.<WriteBundlesToFiles.Result>asIterable());
+    PCollectionView<Iterable<WriteBundlesToFiles.Result>> resultsView =
+        results.apply("ResultsView", View.<WriteBundlesToFiles.Result>asIterable());
     // This transform will look at the set of files written for each table, and if any table has
     // too many files or bytes, will partition that table's files into multiple partitions for
     // loading.
-    PCollectionTuple partitions = singleton.apply("WritePartition",
-        ParDo.of(new WritePartition(
-            write.getJsonTableRef(),
-            write.getTableDescription(),
-            resultsView,
-            multiPartitionsTag,
-            singlePartitionTag))
-        .withSideInputs(resultsView)
-        .withOutputTags(multiPartitionsTag, TupleTagList.of(singlePartitionTag)));
+    PCollectionTuple partitions =
+        singleton.apply(
+            "WritePartition",
+            ParDo.of(
+                    new WritePartition(
+                        write.getJsonTableRef(),
+                        write.getTableDescription(),
+                        resultsView,
+                        multiPartitionsTag,
+                        singlePartitionTag))
+                .withSideInputs(resultsView)
+                .withOutputTags(multiPartitionsTag, TupleTagList.of(singlePartitionTag)));
 
     // Since BigQueryIO.java does not yet have support for per-table schemas, inject a constant
     // schema function here. If no schema is specified, this function will return null.
@@ -158,55 +156,69 @@ class BatchLoads extends PTransform<PCollection<KV<TableDestination, TableRow>>,
         new ConstantSchemaFunction(write.getJsonSchema());
 
     Coder<KV<ShardedKey<TableDestination>, List<String>>> partitionsCoder =
-        KvCoder.of(ShardedKeyCoder.of(TableDestinationCoder.of()),
-            ListCoder.of(StringUtf8Coder.of()));
+        KvCoder.of(
+            ShardedKeyCoder.of(TableDestinationCoder.of()), ListCoder.of(StringUtf8Coder.of()));
     // If WriteBundlesToFiles produced more than MAX_NUM_FILES files or MAX_SIZE_BYTES bytes, then
     // the import needs to be split into multiple partitions, and those partitions will be
     // specified in multiPartitionsTag.
-    PCollection<KV<TableDestination, String>> tempTables = partitions.get(multiPartitionsTag)
-        .setCoder(partitionsCoder)
-        // What's this GroupByKey for? Is this so we have a deterministic temp tables? If so, maybe
-        // Reshuffle is better here.
-        .apply("MultiPartitionsGroupByKey",
-            GroupByKey.<ShardedKey<TableDestination>, List<String>>create())
-        .apply("MultiPartitionsWriteTables", ParDo.of(new WriteTables(
-            false,
-            write.getBigQueryServices(),
-            jobIdTokenView,
-            tempFilePrefix,
-            WriteDisposition.WRITE_EMPTY,
-            CreateDisposition.CREATE_IF_NEEDED,
-            schemaFunction))
-            .withSideInputs(jobIdTokenView));
+    PCollection<KV<TableDestination, String>> tempTables =
+        partitions
+            .get(multiPartitionsTag)
+            .setCoder(partitionsCoder)
+            // Reshuffle will distribute this among multiple workers, and also guard against
+            // reexecution of the WritePartitions step once WriteTables has begun.
+            .apply(
+                "MultiPartitionsReshuffle",
+                Reshuffle.<ShardedKey<TableDestination>, List<String>>of())
+            .apply(
+                "MultiPartitionsWriteTables",
+                ParDo.of(
+                        new WriteTables(
+                            false,
+                            write.getBigQueryServices(),
+                            jobIdTokenView,
+                            tempFilePrefix,
+                            WriteDisposition.WRITE_EMPTY,
+                            CreateDisposition.CREATE_IF_NEEDED,
+                            schemaFunction))
+                    .withSideInputs(jobIdTokenView));
 
     // This view maps each final table destination to the set of temporary partitioned tables
     // the PCollection was loaded into.
-    PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView = tempTables
-        .apply("TempTablesView", View.<TableDestination, String>asMultimap());
-
-    singleton.apply("WriteRename", ParDo
-        .of(new WriteRename(
-            write.getBigQueryServices(),
-            jobIdTokenView,
-            write.getWriteDisposition(),
-            write.getCreateDisposition(),
-            tempTablesView))
-        .withSideInputs(tempTablesView, jobIdTokenView));
+    PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView =
+        tempTables.apply("TempTablesView", View.<TableDestination, String>asMultimap());
+
+    singleton.apply(
+        "WriteRename",
+        ParDo.of(
+                new WriteRename(
+                    write.getBigQueryServices(),
+                    jobIdTokenView,
+                    write.getWriteDisposition(),
+                    write.getCreateDisposition(),
+                    tempTablesView))
+            .withSideInputs(tempTablesView, jobIdTokenView));
 
     // Write single partition to final table
-    partitions.get(singlePartitionTag)
+    partitions
+        .get(singlePartitionTag)
         .setCoder(partitionsCoder)
-        .apply("SinglePartitionGroupByKey",
-            GroupByKey.<ShardedKey<TableDestination>, List<String>>create())
-        .apply("SinglePartitionWriteTables", ParDo.of(new WriteTables(
-            true,
-            write.getBigQueryServices(),
-            jobIdTokenView,
-            tempFilePrefix,
-            write.getWriteDisposition(),
-            write.getCreateDisposition(),
-            schemaFunction))
-            .withSideInputs(jobIdTokenView));
+        // Reshuffle will distribute this among multiple workers, and also guard against
+        // reexecution of the WritePartitions step once WriteTables has begun.
+        .apply(
+            "SinglePartitionsReshuffle", Reshuffle.<ShardedKey<TableDestination>, List<String>>of())
+        .apply(
+            "SinglePartitionWriteTables",
+            ParDo.of(
+                    new WriteTables(
+                        true,
+                        write.getBigQueryServices(),
+                        jobIdTokenView,
+                        tempFilePrefix,
+                        write.getWriteDisposition(),
+                        write.getCreateDisposition(),
+                        schemaFunction))
+                .withSideInputs(jobIdTokenView));
 
     return WriteResult.in(input.getPipeline());
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpers.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpers.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpers.java
index 846103d..e04361c 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpers.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryHelpers.java
@@ -26,6 +26,7 @@ import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.cloud.hadoop.util.ApiErrorExtractor;
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.hash.Hashing;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -234,6 +235,18 @@ public class BigQueryHelpers {
     }
   }
 
+  // Create a unique job id for a table load.
+  static String createJobId(String prefix, TableDestination tableDestination, int partition) {
+    // Job ID must be different for each partition of each table.
+    String destinationHash =
+        Hashing.murmur3_128().hashUnencodedChars(tableDestination.toString()).toString();
+    if (partition >= 0) {
+      return String.format("%s_%s_%05d", prefix, destinationHash, partition);
+    } else {
+      return String.format("%s_%s", prefix, destinationHash);
+    }
+  }
+
   @VisibleForTesting
   static class JsonSchemaToTableSchema
       implements SerializableFunction<String, TableSchema> {

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index 54a25c7..3f5947e 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -61,7 +61,6 @@ import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.runners.PipelineRunner;
 import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.util.IOChannelFactory;
@@ -445,7 +444,8 @@ public class BigQueryIO {
       // Note that a table or query check can fail if the table or dataset are created by
       // earlier stages of the pipeline or if a query depends on earlier stages of a pipeline.
       // For these cases the withoutValidation method can be used to disable the check.
-      if (getValidate() && table != null) {
+      if (getValidate() && table != null && table.isAccessible() && table.get().getProjectId()
+          != null) {
         checkState(table.isAccessible(), "Cannot call validate if table is dynamically set.");
         // Check for source table presence for early failure notification.
         DatasetService datasetService = getBigQueryServices().getDatasetService(bqOptions);
@@ -650,6 +650,7 @@ public class BigQueryIO {
   public static <T> Write<T> write() {
     return new AutoValue_BigQueryIO_Write.Builder<T>()
         .setValidate(true)
+        .setTableDescription("")
         .setBigQueryServices(new BigQueryServicesImpl())
         .setCreateDisposition(Write.CreateDisposition.CREATE_IF_NEEDED)
         .setWriteDisposition(Write.WriteDisposition.WRITE_EMPTY)
@@ -690,7 +691,8 @@ public class BigQueryIO {
     @Nullable abstract ValueProvider<String> getJsonSchema();
     abstract CreateDisposition getCreateDisposition();
     abstract WriteDisposition getWriteDisposition();
-    @Nullable abstract String getTableDescription();
+    /** Table description. Default is empty. */
+    abstract String getTableDescription();
     /** An option to indicate if table validation is desired. Default is true. */
     abstract boolean getValidate();
     abstract BigQueryServices getBigQueryServices();
@@ -805,9 +807,6 @@ public class BigQueryIO {
     public Write<T> to(ValueProvider<String> tableSpec) {
       ensureToNotCalledYet();
       String tableDescription = getTableDescription();
-      if (tableDescription == null) {
-        tableDescription = "";
-      }
       return toBuilder()
           .setJsonTableRef(
               NestedValueProvider.of(
@@ -911,7 +910,7 @@ public class BigQueryIO {
     public void validate(PCollection<T> input) {
       BigQueryOptions options = input.getPipeline().getOptions().as(BigQueryOptions.class);
 
-      // Exactly one of the table and table reference can be configured.
+      // We must have a destination to write to!
       checkState(getTableFunction() != null,
           "must set the table reference of a BigQueryIO.Write transform");
 
@@ -972,8 +971,8 @@ public class BigQueryIO {
     @Override
     public WriteResult expand(PCollection<T> input) {
       PCollection<KV<TableDestination, TableRow>> rowsWithDestination =
-          input.apply("PrepareWrite", ParDo.of(
-              new PrepareWrite<T>(getTableFunction(), getFormatFunction())))
+          input.apply("PrepareWrite", new PrepareWrite<T>(
+              getTableFunction(), getFormatFunction()))
               .setCoder(KvCoder.of(TableDestinationCoder.of(), TableRowJsonCoder.of()));
 
 
@@ -1013,8 +1012,8 @@ public class BigQueryIO {
             .withLabel("Table WriteDisposition"))
           .addIfNotDefault(DisplayData.item("validation", getValidate())
             .withLabel("Validation Enabled"), true)
-          .addIfNotNull(DisplayData.item("tableDescription", getTableDescription())
-            .withLabel("Table Description"));
+          .addIfNotDefault(DisplayData.item("tableDescription", getTableDescription())
+            .withLabel("Table Description"), "");
     }
 
     /** Returns the table schema. */

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSource.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSource.java
index 22aba64..a28da92 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSource.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryTableSource.java
@@ -109,8 +109,8 @@ class BigQueryTableSource extends BigQuerySourceBase {
   @Override
   public synchronized long getEstimatedSizeBytes(PipelineOptions options) throws Exception {
     if (tableSizeBytes.get() == null) {
-      TableReference table = BigQueryIO.JSON_FACTORY.fromString(jsonTable.get(),
-          TableReference.class);
+      TableReference table = setDefaultProjectIfAbsent(options.as(BigQueryOptions.class),
+          BigQueryIO.JSON_FACTORY.fromString(jsonTable.get(), TableReference.class));
 
       Long numBytes = bqServices.getDatasetService(options.as(BigQueryOptions.class))
           .getTable(table).getNumBytes();

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTables.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTables.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTables.java
index e216553..a78f32d 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTables.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTables.java
@@ -1,68 +1,94 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.beam.sdk.io.gcp.bigquery;
 
 import com.google.api.services.bigquery.model.Table;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.common.annotations.VisibleForTesting;
 import java.io.IOException;
 import java.util.Collections;
 import java.util.Set;
 import java.util.concurrent.ConcurrentHashMap;
+
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
 import org.apache.beam.sdk.options.BigQueryOptions;
 import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.values.KV;
-
+import org.apache.beam.sdk.values.PCollection;
 
 /**
- * Creates any tables needed before performing streaming writes to the tables. This is a
- * side-effect {l@ink DoFn}, and returns the original collection unchanged.
+ * Creates any tables needed before performing streaming writes to the tables. This is a side-effect
+ * {@link DoFn}, and returns the original collection unchanged.
  */
-public class CreateTables extends DoFn<KV<TableDestination, TableRow>,
-    KV<TableDestination, TableRow>> {
+public class CreateTables
+    extends PTransform<
+        PCollection<KV<TableDestination, TableRow>>, PCollection<KV<TableDestination, TableRow>>> {
   private final CreateDisposition createDisposition;
   private final BigQueryServices bqServices;
   private final SerializableFunction<TableDestination, TableSchema> schemaFunction;
 
-
-  /** The list of tables created so far, so we don't try the creation
-   each time.
-   * TODO: We should put a bound on memory usage of this. Use guava cache instead.
+  /**
+   * The list of tables created so far, so we don't try the creation each time.
+   *
+   * <p>TODO: We should put a bound on memory usage of this. Use guava cache instead.
    */
   private static Set<String> createdTables =
       Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
 
-  public CreateTables(CreateDisposition createDisposition, BigQueryServices bqServices,
-                      SerializableFunction<TableDestination, TableSchema> schemaFunction) {
+  public CreateTables(
+      CreateDisposition createDisposition,
+      SerializableFunction<TableDestination, TableSchema> schemaFunction) {
+    this(createDisposition, new BigQueryServicesImpl(), schemaFunction);
+  }
+
+  private CreateTables(
+      CreateDisposition createDisposition,
+      BigQueryServices bqServices,
+      SerializableFunction<TableDestination, TableSchema> schemaFunction) {
     this.createDisposition = createDisposition;
     this.bqServices = bqServices;
     this.schemaFunction = schemaFunction;
   }
 
-  @ProcessElement
-  public void processElement(ProcessContext context) throws InterruptedException, IOException {
-    BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
-    possibleCreateTable(options, context.element().getKey());
-    context.output(context.element());
+  CreateTables withTestServices(BigQueryServices bqServices) {
+    return new CreateTables(createDisposition, bqServices, schemaFunction);
+  }
+
+  @Override
+  public PCollection<KV<TableDestination, TableRow>> expand(
+      PCollection<KV<TableDestination, TableRow>> input) {
+    return input.apply(
+        ParDo.of(
+            new DoFn<KV<TableDestination, TableRow>, KV<TableDestination, TableRow>>() {
+              @ProcessElement
+              public void processElement(ProcessContext context)
+                  throws InterruptedException, IOException {
+                BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
+                possibleCreateTable(options, context.element().getKey());
+                context.output(context.element());
+              }
+            }));
   }
 
   private void possibleCreateTable(BigQueryOptions options, TableDestination tableDestination)
@@ -70,8 +96,7 @@ public class CreateTables extends DoFn<KV<TableDestination, TableRow>,
     String tableSpec = tableDestination.getTableSpec();
     TableReference tableReference = tableDestination.getTableReference();
     String tableDescription = tableDestination.getTableDescription();
-    if (createDisposition != createDisposition.CREATE_NEVER
-        && !createdTables.contains(tableSpec)) {
+    if (createDisposition != createDisposition.CREATE_NEVER && !createdTables.contains(tableSpec)) {
       synchronized (createdTables) {
         // Another thread may have succeeded in creating the table in the meanwhile, so
         // check again. This check isn't needed for correctness, but we add it to prevent
@@ -92,6 +117,8 @@ public class CreateTables extends DoFn<KV<TableDestination, TableRow>,
     }
   }
 
+  /** This method is used by the testing fake to clear static state. */
+  @VisibleForTesting
   static void clearCreatedTables() {
     synchronized (createdTables) {
       createdTables.clear();

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java
index da3a70a..90d41a0 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java
@@ -39,8 +39,7 @@ class GenerateShardedTable extends DoFn<KV<TableDestination, TableRow>,
   @ProcessElement
   public void processElement(ProcessContext context, BoundedWindow window) throws IOException {
     ThreadLocalRandom randomGenerator = ThreadLocalRandom.current();
-    // We output on keys 0-50 to ensure that there's enough batching for
-    // BigQuery.
+    // We output on keys 0-numShards.
     String tableSpec = context.element().getKey().getTableSpec();
     context.output(KV.of(ShardedKey.of(tableSpec, randomGenerator.nextInt(0, numShards)),
         context.element().getValue()));

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java
index 7712417..a8bdb43 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java
@@ -1,20 +1,20 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.beam.sdk.io.gcp.bigquery;
 
 import com.google.api.services.bigquery.model.TableReference;
@@ -23,6 +23,8 @@ import com.google.common.base.Strings;
 import java.io.IOException;
 import org.apache.beam.sdk.options.BigQueryOptions;
 import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.values.KV;
@@ -30,37 +32,49 @@ import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.ValueInSingleWindow;
 
 /**
- * Prepare an input {@link PCollection} for writing to BigQuery. Use the table-reference
- * function to determine which tables each element is written to, and format the element into a
- * {@link TableRow} using the user-supplied format function.
+ * Prepare an input {@link PCollection} for writing to BigQuery. Use the table function to determine
+ * which tables each element is written to, and format the element into a {@link TableRow} using the
+ * user-supplied format function.
  */
-public class PrepareWrite<T> extends DoFn<T, KV<TableDestination, TableRow>> {
+public class PrepareWrite<T>
+    extends PTransform<PCollection<T>, PCollection<KV<TableDestination, TableRow>>> {
   private SerializableFunction<ValueInSingleWindow<T>, TableDestination> tableFunction;
   private SerializableFunction<T, TableRow> formatFunction;
 
-  public PrepareWrite(SerializableFunction<ValueInSingleWindow<T>, TableDestination> tableFunction,
-                      SerializableFunction<T, TableRow> formatFunction) {
+  public PrepareWrite(
+      SerializableFunction<ValueInSingleWindow<T>, TableDestination> tableFunction,
+      SerializableFunction<T, TableRow> formatFunction) {
     this.tableFunction = tableFunction;
     this.formatFunction = formatFunction;
   }
 
-  @ProcessElement
-  public void processElement(ProcessContext context, BoundedWindow window) throws IOException {
-    TableDestination tableDestination = tableSpecFromWindowedValue(
-        context.getPipelineOptions().as(BigQueryOptions.class),
-        ValueInSingleWindow.of(context.element(), context.timestamp(), window, context.pane()));
-    TableRow tableRow = formatFunction.apply(context.element());
-    context.output(KV.of(tableDestination, tableRow));
+  @Override
+  public PCollection<KV<TableDestination, TableRow>> expand(PCollection<T> input) {
+    return input.apply(
+        ParDo.of(
+            new DoFn<T, KV<TableDestination, TableRow>>() {
+              @ProcessElement
+              public void processElement(ProcessContext context, BoundedWindow window)
+                  throws IOException {
+                TableDestination tableDestination =
+                    tableSpecFromWindowedValue(
+                        context.getPipelineOptions().as(BigQueryOptions.class),
+                        ValueInSingleWindow.of(
+                            context.element(), context.timestamp(), window, context.pane()));
+                TableRow tableRow = formatFunction.apply(context.element());
+                context.output(KV.of(tableDestination, tableRow));
+              }
+            }));
   }
 
-  private TableDestination tableSpecFromWindowedValue(BigQueryOptions options,
-                                            ValueInSingleWindow<T> value) {
+  private TableDestination tableSpecFromWindowedValue(
+      BigQueryOptions options, ValueInSingleWindow<T> value) {
     TableDestination tableDestination = tableFunction.apply(value);
     TableReference tableReference = tableDestination.getTableReference();
     if (Strings.isNullOrEmpty(tableReference.getProjectId())) {
       tableReference.setProjectId(options.getProject());
-      tableDestination = new TableDestination(tableReference,
-          tableDestination.getTableDescription());
+      tableDestination =
+          new TableDestination(tableReference, tableDestination.getTableDescription());
     }
     return tableDestination;
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
index 09b4fbf..c2b739f 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
@@ -25,6 +25,7 @@ import java.util.Objects;
  * A key and a shard number.
  */
 class ShardedKey<K> implements Serializable {
+  private static final long serialVersionUID = 1L;
   private final K key;
   private final int shardNumber;
 

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
index ced1d66..efd9c31 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
@@ -22,15 +22,10 @@ import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.coders.TableRowJsonCoder;
 import org.apache.beam.sdk.coders.VoidCoder;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write;
 import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.SerializableFunction;
-import org.apache.beam.sdk.util.Reshuffle;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 
@@ -38,8 +33,8 @@ import org.apache.beam.sdk.values.PCollection;
 * PTransform that performs streaming BigQuery write. To increase consistency,
 * it leverages BigQuery best effort de-dup mechanism.
  */
-class StreamingInserts extends PTransform<PCollection<KV<TableDestination, TableRow>>,
-    WriteResult> {
+public class StreamingInserts extends
+    PTransform<PCollection<KV<TableDestination, TableRow>>, WriteResult> {
   private final Write<?> write;
 
   private static class ConstantSchemaFunction implements
@@ -74,36 +69,11 @@ class StreamingInserts extends PTransform<PCollection<KV<TableDestination, Table
     SerializableFunction<TableDestination, TableSchema> schemaFunction =
         new ConstantSchemaFunction(write.getSchema());
 
-    // A naive implementation would be to simply stream data directly to BigQuery.
-    // However, this could occasionally lead to duplicated data, e.g., when
-    // a VM that runs this code is restarted and the code is re-run.
+    PCollection<KV<TableDestination, TableRow>> writes = input
+        .apply("CreateTables", new CreateTables(write.getCreateDisposition(), schemaFunction)
+                .withTestServices(write.getBigQueryServices()));
 
-    // The above risk is mitigated in this implementation by relying on
-    // BigQuery built-in best effort de-dup mechanism.
-
-    // To use this mechanism, each input TableRow is tagged with a generated
-    // unique id, which is then passed to BigQuery and used to ignore duplicates.
-    PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged = input
-        .apply("CreateTables", ParDo.of(new CreateTables(write.getCreateDisposition(),
-            write.getBigQueryServices(), schemaFunction)))
-        // We create 50 keys per BigQuery table to generate output on. This is few enough that we
-        // get good batching into BigQuery's insert calls, and enough that we can max out the
-        // streaming insert quota.
-        .apply("ShardTableWrites", ParDo.of(new GenerateShardedTable(50)))
-        .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowJsonCoder.of()))
-        .apply("TagWithUniqueIds", ParDo.of(new TagWithUniqueIds()));
-
-    // To prevent having the same TableRow processed more than once with regenerated
-    // different unique ids, this implementation relies on "checkpointing", which is
-    // achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
-    // performed by Reshuffle.
-    tagged
-        .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()))
-        .apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of())
-        .apply("StreamingWrite",
-            ParDo.of(
-                new StreamingWriteFn(write.getBigQueryServices())));
-
-    return WriteResult.in(input.getPipeline());
+    return writes.apply(new StreamingWriteTables()
+        .withTestServices(write.getBigQueryServices()));
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java
new file mode 100644
index 0000000..4ddc1df
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteTables.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableRow;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.TableRowJsonCoder;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.util.Reshuffle;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+
+/**
+ * This transform takes in key-value pairs of {@link TableRow} entries and the
+ * {@link TableDestination} it should be written to. The BigQuery streaming-write service is used
+ * to stream these writes to the appropriate table.
+ *
+ * <p>This transform assumes that all destination tables already exist by the time it sees a write
+ * for that table.
+ */
+public class StreamingWriteTables extends PTransform<
+    PCollection<KV<TableDestination, TableRow>>, WriteResult> {
+  private BigQueryServices bigQueryServices;
+
+  public StreamingWriteTables() {
+    this(new BigQueryServicesImpl());
+  }
+
+  private StreamingWriteTables(BigQueryServices bigQueryServices) {
+    this.bigQueryServices = bigQueryServices;
+  }
+
+  StreamingWriteTables withTestServices(BigQueryServices bigQueryServices) {
+    return new StreamingWriteTables(bigQueryServices);
+  }
+
+  @Override
+  public WriteResult expand(PCollection<KV<TableDestination, TableRow>> input) {
+    // A naive implementation would be to simply stream data directly to BigQuery.
+    // However, this could occasionally lead to duplicated data, e.g., when
+    // a VM that runs this code is restarted and the code is re-run.
+
+    // The above risk is mitigated in this implementation by relying on
+    // BigQuery built-in best effort de-dup mechanism.
+
+    // To use this mechanism, each input TableRow is tagged with a generated
+    // unique id, which is then passed to BigQuery and used to ignore duplicates
+    // We create 50 keys per BigQuery table to generate output on. This is few enough that we
+    // get good batching into BigQuery's insert calls, and enough that we can max out the
+    // streaming insert quota.
+    PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged =
+        input.apply("ShardTableWrites", ParDo.of
+        (new GenerateShardedTable(50)))
+        .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowJsonCoder.of()))
+        .apply("TagWithUniqueIds", ParDo.of(new TagWithUniqueIds()));
+
+    // To prevent having the same TableRow processed more than once with regenerated
+    // different unique ids, this implementation relies on "checkpointing", which is
+    // achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
+    // performed by Reshuffle.
+    tagged
+        .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()))
+        .apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of())
+        .apply("StreamingWrite",
+            ParDo.of(
+                new StreamingWriteFn(bigQueryServices)));
+    return WriteResult.in(input.getPipeline());
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
index 36e1401..962e2cd 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
@@ -27,6 +27,7 @@ import java.util.Objects;
  * Encapsulates a BigQuery table destination.
  */
 public class TableDestination implements Serializable {
+  private static final long serialVersionUID = 1L;
   private final String tableSpec;
   private final String tableDescription;
 

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestinationCoder.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestinationCoder.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestinationCoder.java
index fa24700..262a00d 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestinationCoder.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestinationCoder.java
@@ -1,20 +1,20 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one
-* or more contributor license agreements.  See the NOTICE file
-* distributed with this work for additional information
-* regarding copyright ownership.  The ASF licenses this file
-* to you under the Apache License, Version 2.0 (the
-* "License"); you may not use this file except in compliance
-* with the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 
 package org.apache.beam.sdk.io.gcp.bigquery;
 
@@ -26,20 +26,18 @@ import org.apache.beam.sdk.coders.AtomicCoder;
 import org.apache.beam.sdk.coders.CoderException;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 
-/**
- * A coder for {@link TableDestination} objects.
- */
+/** A coder for {@link TableDestination} objects. */
 public class TableDestinationCoder extends AtomicCoder<TableDestination> {
   private static final TableDestinationCoder INSTANCE = new TableDestinationCoder();
-
+  private static final StringUtf8Coder stringCoder = StringUtf8Coder.of();
 
   @JsonCreator
   public static TableDestinationCoder of() {
-      return INSTANCE;
-    }
+    return INSTANCE;
+  }
 
   @Override
-    public void encode(TableDestination value, OutputStream outStream, Context context)
+  public void encode(TableDestination value, OutputStream outStream, Context context)
       throws IOException {
     if (value == null) {
       throw new CoderException("cannot encode a null value");
@@ -50,15 +48,13 @@ public class TableDestinationCoder extends AtomicCoder<TableDestination> {
 
   @Override
   public TableDestination decode(InputStream inStream, Context context) throws IOException {
-      return new TableDestination(
-          stringCoder.decode(inStream, context.nested()),
-          stringCoder.decode(inStream, context.nested()));
-    }
-
-    @Override
-    public void verifyDeterministic() throws NonDeterministicException {
-      return;
-    }
+    return new TableDestination(
+        stringCoder.decode(inStream, context.nested()),
+        stringCoder.decode(inStream, context.nested()));
+  }
 
-    StringUtf8Coder stringCoder = StringUtf8Coder.of();
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    return;
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
index ee8f466..91ef404 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
@@ -32,9 +32,7 @@ import org.apache.beam.sdk.util.MimeTypes;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/**
- * Writes {@TableRow} objects out to a file. Used when doing batch load jobs into BigQuery.
- */
+/** Writes {@TableRow} objects out to a file. Used when doing batch load jobs into BigQuery. */
 class TableRowWriter {
   private static final Logger LOG = LoggerFactory.getLogger(BigQueryIO.class);
 
@@ -47,16 +45,18 @@ class TableRowWriter {
   protected String mimeType = MimeTypes.TEXT;
   private CountingOutputStream out;
 
-  public class Result {
-    String filename;
-    long byteSize;
+  public static final class Result {
+    final String filename;
+    final long byteSize;
+
     public Result(String filename, long byteSize) {
       this.filename = filename;
       this.byteSize = byteSize;
     }
   }
+
   TableRowWriter(String basename) {
-      this.tempFilePrefix = basename;
+    this.tempFilePrefix = basename;
   }
 
   public final void open(String uId) throws Exception {

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
index 7379784..284691e 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
@@ -28,15 +28,14 @@ import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.values.KV;
 
 /**
- * Fn that tags each table row with a unique id and destination table.
- * To avoid calling UUID.randomUUID() for each element, which can be costly,
- * a randomUUID is generated only once per bucket of data. The actual unique
- * id is created by concatenating this randomUUID with a sequential number.
+ * Fn that tags each table row with a unique id and destination table. To avoid calling
+ * UUID.randomUUID() for each element, which can be costly, a randomUUID is generated only once per
+ * bucket of data. The actual unique id is created by concatenating this randomUUID with a
+ * sequential number.
  */
 @VisibleForTesting
 class TagWithUniqueIds
     extends DoFn<KV<ShardedKey<String>, TableRow>, KV<ShardedKey<String>, TableRowInfo>> {
-
   private transient String randomUUID;
   private transient long sequenceNo = 0L;
 
@@ -51,8 +50,9 @@ class TagWithUniqueIds
     String uniqueId = randomUUID + sequenceNo++;
     // We output on keys 0-50 to ensure that there's enough batching for
     // BigQuery.
-    context.output(KV.of(context.element().getKey(),
-        new TableRowInfo(context.element().getValue(), uniqueId)));
+    context.output(
+        KV.of(
+            context.element().getKey(), new TableRowInfo(context.element().getValue(), uniqueId)));
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
index 869e68a..a25cc90 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
@@ -51,10 +51,11 @@ class WriteBundlesToFiles extends DoFn<KV<TableDestination, TableRow>, WriteBund
    * The result of the {@link WriteBundlesToFiles} transform. Corresponds to a single output file,
    * and encapsulates the table it is destined to as well as the file byte size.
    */
-  public static class Result implements Serializable {
-    public String filename;
-    public Long fileByteSize;
-    public TableDestination tableDestination;
+  public static final class Result implements Serializable {
+    private static final long serialVersionUID = 1L;
+    public final String filename;
+    public final Long fileByteSize;
+    public final TableDestination tableDestination;
 
     public Result(String filename, Long fileByteSize, TableDestination tableDestination) {
       this.filename = filename;
@@ -68,6 +69,9 @@ class WriteBundlesToFiles extends DoFn<KV<TableDestination, TableRow>, WriteBund
    */
   public static class ResultCoder extends AtomicCoder<Result> {
     private static final ResultCoder INSTANCE = new ResultCoder();
+    private static final StringUtf8Coder stringCoder = StringUtf8Coder.of();
+    private static final VarLongCoder longCoder = VarLongCoder.of();
+    private static final TableDestinationCoder tableDestinationCoder = TableDestinationCoder.of();
 
     public static ResultCoder of() {
       return INSTANCE;
@@ -87,18 +91,15 @@ class WriteBundlesToFiles extends DoFn<KV<TableDestination, TableRow>, WriteBund
     @Override
     public Result decode(InputStream inStream, Context context)
         throws IOException {
-      return new Result(stringCoder.decode(inStream, context.nested()),
-          longCoder.decode(inStream, context.nested()),
-          tableDestinationCoder.decode(inStream, context.nested()));
+      String filename = stringCoder.decode(inStream, context.nested());
+      long fileByteSize = longCoder.decode(inStream, context.nested());
+      TableDestination tableDestination = tableDestinationCoder.decode(inStream, context.nested());
+      return new Result(filename, fileByteSize, tableDestination);
     }
 
     @Override
     public void verifyDeterministic() throws NonDeterministicException {
     }
-
-    StringUtf8Coder stringCoder = StringUtf8Coder.of();
-    VarLongCoder longCoder = VarLongCoder.of();
-    TableDestinationCoder tableDestinationCoder = TableDestinationCoder.of();
   }
 
   WriteBundlesToFiles(String tempFilePrefix) {
@@ -107,6 +108,8 @@ class WriteBundlesToFiles extends DoFn<KV<TableDestination, TableRow>, WriteBund
 
   @StartBundle
   public void startBundle(Context c) {
+    // This must be done each bundle, as by default the {@link DoFn} might be reused between
+    // bundles.
     this.writers = Maps.newHashMap();
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
index 9c48b82..9414909 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
@@ -44,7 +44,65 @@ class WritePartition extends DoFn<String, KV<ShardedKey<TableDestination>, List<
   private TupleTag<KV<ShardedKey<TableDestination>, List<String>>> multiPartitionsTag;
   private TupleTag<KV<ShardedKey<TableDestination>, List<String>>> singlePartitionTag;
 
-  public WritePartition(
+  private static class PartitionData {
+    private int numFiles = 0;
+    private long byteSize = 0;
+    private List<String> filenames = Lists.newArrayList();
+
+    int getNumFiles() {
+      return numFiles;
+    }
+
+    void addFiles(int numFiles) {
+      this.numFiles += numFiles;
+    }
+
+    long getByteSize() {
+      return byteSize;
+    }
+
+    void addBytes(long numBytes) {
+      this.byteSize += numBytes;
+    }
+
+    List<String> getFilenames() {
+      return  filenames;
+    }
+
+    void addFilename(String filename) {
+      filenames.add(filename);
+    }
+
+    // Check to see whether we can add to this partition without exceeding the maximum partition
+    // size.
+    boolean canAccept(int numFiles, long numBytes) {
+      return this.numFiles + numFiles <= Write.MAX_NUM_FILES
+          && this.byteSize + numBytes <= Write.MAX_SIZE_BYTES;
+    }
+  }
+
+  private static class DestinationData {
+    private List<PartitionData> partitions = Lists.newArrayList();
+
+    DestinationData() {
+      // Always start out with a single empty partition.
+      partitions.add(new PartitionData());
+    }
+
+    List<PartitionData> getPartitions() {
+      return partitions;
+    }
+
+    PartitionData getLatestPartition() {
+      return partitions.get(partitions.size() - 1);
+    }
+
+    void addPartition(PartitionData partition) {
+       partitions.add(partition);
+    }
+  }
+
+  WritePartition(
       ValueProvider<String> singletonOutputJsonTableRef,
       String singletonOutputTableDescription,
       PCollectionView<Iterable<WriteBundlesToFiles.Result>> resultsView,
@@ -76,54 +134,41 @@ class WritePartition extends DoFn<String, KV<ShardedKey<TableDestination>, List<
     }
 
 
-    long partitionId = 0;
-    Map<TableDestination, Integer> currNumFilesMap = Maps.newHashMap();
-    Map<TableDestination, Long> currSizeBytesMap = Maps.newHashMap();
-    Map<TableDestination, List<List<String>>> currResultsMap = Maps.newHashMap();
-    for (int i = 0; i < results.size(); ++i) {
-      WriteBundlesToFiles.Result fileResult = results.get(i);
+    Map<TableDestination, DestinationData> currentResults = Maps.newHashMap();
+    for (WriteBundlesToFiles.Result fileResult : results) {
       TableDestination tableDestination = fileResult.tableDestination;
-      List<List<String>> partitions = currResultsMap.get(tableDestination);
-      if (partitions == null) {
-        partitions = Lists.newArrayList();
-        partitions.add(Lists.<String>newArrayList());
-        currResultsMap.put(tableDestination, partitions);
+      DestinationData destinationData = currentResults.get(tableDestination);
+      if (destinationData == null) {
+        destinationData = new DestinationData();
+        currentResults.put(tableDestination, destinationData);
       }
-      int currNumFiles = getOrDefault(currNumFilesMap, tableDestination, 0);
-      long currSizeBytes = getOrDefault(currSizeBytesMap, tableDestination, 0L);
-      if (currNumFiles + 1 > Write.MAX_NUM_FILES
-          || currSizeBytes + fileResult.fileByteSize > Write.MAX_SIZE_BYTES) {
-        // Add a new partition for this table.
-        partitions.add(Lists.<String>newArrayList());
-      //  c.sideOutput(multiPartitionsTag, KV.of(++partitionId, currResults));
-        currNumFiles = 0;
-        currSizeBytes = 0;
-        currNumFilesMap.remove(tableDestination);
-        currSizeBytesMap.remove(tableDestination);
+
+      PartitionData latestPartition = destinationData.getLatestPartition();
+      if (!latestPartition.canAccept(1, fileResult.fileByteSize)) {
+        // Too much data, roll over to a new partition.
+        latestPartition = new PartitionData();
+        destinationData.addPartition(latestPartition);
       }
-      currNumFilesMap.put(tableDestination, currNumFiles + 1);
-      currSizeBytesMap.put(tableDestination, currSizeBytes + fileResult.fileByteSize);
-      // Always add to the most recent partition for this table.
-      partitions.get(partitions.size() - 1).add(fileResult.filename);
+      latestPartition.addFilename(fileResult.filename);
+      latestPartition.addFiles(1);
+      latestPartition.addBytes(fileResult.fileByteSize);
     }
 
-    for (Map.Entry<TableDestination, List<List<String>>> entry : currResultsMap.entrySet()) {
+    // Now that we've figured out which tables and partitions to write out, emit this information
+    // to the next stage.
+    for (Map.Entry<TableDestination, DestinationData> entry : currentResults.entrySet()) {
       TableDestination tableDestination = entry.getKey();
-      List<List<String>> partitions = entry.getValue();
+      DestinationData destinationData = entry.getValue();
+      // In the fast-path case where we only output one table, the transform loads it directly
+      // to the final table. In this case, we output on a special TupleTag so the enclosing
+      // transform knows to skip the rename step.
       TupleTag<KV<ShardedKey<TableDestination>, List<String>>> outputTag =
-          (partitions.size() == 1) ? singlePartitionTag : multiPartitionsTag;
-      for (int i = 0; i < partitions.size(); ++i) {
-        c.output(outputTag, KV.of(ShardedKey.of(tableDestination, i + 1), partitions.get(i)));
+          (destinationData.getPartitions().size() == 1) ? singlePartitionTag : multiPartitionsTag;
+      for (int i = 0; i < destinationData.getPartitions().size(); ++i) {
+        PartitionData partitionData = destinationData.getPartitions().get(i);
+        c.output(outputTag, KV.of(ShardedKey.of(tableDestination, i + 1),
+            partitionData.getFilenames()));
       }
     }
   }
-
-  private <T> T getOrDefault(Map<TableDestination, T> map, TableDestination tableDestination,
-                     T defaultValue) {
-    if (map.containsKey(tableDestination)) {
-      return map.get(tableDestination);
-    } else {
-      return defaultValue;
-    }
-  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
index 752e7d3..9b1c989 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
@@ -89,8 +89,9 @@ class WriteRename extends DoFn<String, Void> {
       }
 
       // Make sure each destination table gets a unique job id.
-      String jobIdPrefix = String.format(
-          c.sideInput(jobIdToken) + "0x%08x", finalTableDestination.hashCode());
+      String jobIdPrefix = BigQueryHelpers.createJobId(
+          c.sideInput(jobIdToken), finalTableDestination, -1);
+
       copy(
           bqServices.getJobService(c.getPipelineOptions().as(BigQueryOptions.class)),
           bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class)),

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
index f7fe87b..4a6cd2b 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
@@ -57,11 +57,15 @@ import org.slf4j.LoggerFactory;
 /**
  * Writes partitions to BigQuery tables.
  *
- * <p>The input is a list of files corresponding to a partition of a table. These files are
+ * <p>The input is a list of files corresponding to each partition of a table. These files are
  * load into a temporary table (or into the final table if there is only one partition). The output
- * is a {@link KV} mapping the final table to the temporary tables for each partition of that table.
+ * is a {@link KV} mapping each final table to a list of the temporary tables containing its data.
+ *
+ * <p>In the case where all the data in the files fit into a single load job, this transform loads
+ * the data directly into the final table, skipping temporary tables. In this case, the output
+ * {@link KV} maps the final table to itself.
  */
-class WriteTables extends DoFn<KV<ShardedKey<TableDestination>, Iterable<List<String>>>,
+class WriteTables extends DoFn<KV<ShardedKey<TableDestination>, List<String>>,
     KV<TableDestination, String>> {
   private static final Logger LOG = LoggerFactory.getLogger(WriteTables.class);
 
@@ -94,10 +98,9 @@ class WriteTables extends DoFn<KV<ShardedKey<TableDestination>, Iterable<List<St
   public void processElement(ProcessContext c) throws Exception {
     TableDestination tableDestination = c.element().getKey().getKey();
     Integer partition = c.element().getKey().getShardNumber();
-    List<String> partitionFiles = Lists.newArrayList(c.element().getValue()).get(0);
-    // Job ID must be different for each partition of each table.
-    String jobIdPrefix = String.format(
-        c.sideInput(jobIdToken) + "_0x%08x_%05d", tableDestination.hashCode(), partition);
+    List<String> partitionFiles = Lists.newArrayList(c.element().getValue());
+    String jobIdPrefix = BigQueryHelpers.createJobId(
+        c.sideInput(jobIdToken), tableDestination, partition);
 
     TableReference ref = tableDestination.getTableReference();
     if (!singlePartition) {

http://git-wip-us.apache.org/repos/asf/beam/blob/7d13061c/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index f10be13..d0004e4 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -71,6 +71,7 @@ import org.apache.beam.sdk.coders.AtomicCoder;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.Coder.Context;
 import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.IterableCoder;
 import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.TableRowJsonCoder;
@@ -122,7 +123,6 @@ import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.PDone;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.TypeDescriptor;
 import org.apache.beam.sdk.values.ValueInSingleWindow;
@@ -607,13 +607,11 @@ public class BigQueryIOTest implements Serializable {
   }
 
   @Test
-  @Category(NeedsRunner.class)
   public void testStreamingWriteWithDynamicTables() throws Exception {
     testWriteWithDynamicTables(true);
   }
 
   @Test
-  @Category(NeedsRunner.class)
   public void testBatchWriteWithDynamicTables() throws Exception {
     testWriteWithDynamicTables(false);
   }
@@ -842,7 +840,7 @@ public class BigQueryIOTest implements Serializable {
             BigQueryIO.writeTableRows().to("foo.com:project:somedataset.sometable");
     checkWriteObject(
         write, "foo.com:project", "somedataset", "sometable",
-        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, null);
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, "");
   }
 
   @Test
@@ -894,7 +892,7 @@ public class BigQueryIOTest implements Serializable {
         null,
         CreateDisposition.CREATE_IF_NEEDED,
         WriteDisposition.WRITE_EMPTY,
-        null,
+        "",
         false);
   }
 
@@ -905,7 +903,7 @@ public class BigQueryIOTest implements Serializable {
     checkWriteObject(
         write, null, "somedataset", "sometable",
         null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY,
-        null);
+        "");
   }
 
   @Test
@@ -917,7 +915,7 @@ public class BigQueryIOTest implements Serializable {
     BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows().to(table);
     checkWriteObject(
         write, "foo.com:project", "somedataset", "sometable",
-        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, null);
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, "");
   }
 
   @Test
@@ -927,7 +925,7 @@ public class BigQueryIOTest implements Serializable {
         BigQueryIO.<TableRow>write().to("foo.com:project:somedataset.sometable").withSchema(schema);
     checkWriteObject(
         write, "foo.com:project", "somedataset", "sometable",
-        schema, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, null);
+        schema, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, "");
   }
 
   @Test
@@ -937,7 +935,7 @@ public class BigQueryIOTest implements Serializable {
         .withCreateDisposition(CreateDisposition.CREATE_NEVER);
     checkWriteObject(
         write, "foo.com:project", "somedataset", "sometable",
-        null, CreateDisposition.CREATE_NEVER, WriteDisposition.WRITE_EMPTY, null);
+        null, CreateDisposition.CREATE_NEVER, WriteDisposition.WRITE_EMPTY, "");
   }
 
   @Test
@@ -947,7 +945,7 @@ public class BigQueryIOTest implements Serializable {
         .withCreateDisposition(CreateDisposition.CREATE_IF_NEEDED);
     checkWriteObject(
         write, "foo.com:project", "somedataset", "sometable",
-        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, null);
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, "");
   }
 
   @Test
@@ -957,7 +955,7 @@ public class BigQueryIOTest implements Serializable {
         .withWriteDisposition(WriteDisposition.WRITE_TRUNCATE);
     checkWriteObject(
         write, "foo.com:project", "somedataset", "sometable",
-        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_TRUNCATE, null);
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_TRUNCATE, "");
   }
 
   @Test
@@ -967,7 +965,7 @@ public class BigQueryIOTest implements Serializable {
         .withWriteDisposition(WriteDisposition.WRITE_APPEND);
     checkWriteObject(
         write, "foo.com:project", "somedataset", "sometable",
-        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_APPEND, null);
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_APPEND, "");
   }
 
   @Test
@@ -977,7 +975,7 @@ public class BigQueryIOTest implements Serializable {
         .withWriteDisposition(WriteDisposition.WRITE_EMPTY);
     checkWriteObject(
         write, "foo.com:project", "somedataset", "sometable",
-        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, null);
+        null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY, "");
   }
 
   @Test
@@ -1359,7 +1357,6 @@ public class BigQueryIOTest implements Serializable {
     SourceTestUtils.assertSplitAtFractionBehavior(
         bqSource, 2, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
 
-
     List<? extends BoundedSource<TableRow>> sources = bqSource.split(100, options);
     assertEquals(2, sources.size());
     BoundedSource<TableRow> actual = sources.get(0);
@@ -1626,9 +1623,11 @@ public class BigQueryIOTest implements Serializable {
     TupleTag<KV<ShardedKey<TableDestination>, List<String>>> singlePartitionTag =
         new TupleTag<KV<ShardedKey<TableDestination>, List<String>>>("singlePartitionTag") {};
 
+    PCollection<WriteBundlesToFiles.Result> filesPCollection =
+        p.apply(Create.of(files).withType(new TypeDescriptor<WriteBundlesToFiles.Result>() {}));
     PCollectionView<Iterable<WriteBundlesToFiles.Result>> resultsView =
         PCollectionViews.iterableView(
-        p,
+        filesPCollection,
         WindowingStrategy.globalDefault(),
         WriteBundlesToFiles.ResultCoder.of());
 
@@ -1699,14 +1698,12 @@ public class BigQueryIOTest implements Serializable {
 
     Path baseDir = Files.createTempDirectory(tempFolder, "testWriteTables");
 
-    List<KV<ShardedKey<TableDestination>, Iterable<List<String>>>> partitions =
-        Lists.newArrayList();
+    List<KV<ShardedKey<TableDestination>, List<String>>> partitions = Lists.newArrayList();
     for (int i = 0; i < numTables; ++i) {
       String tableName = String.format("project-id:dataset-id.table%05d", i);
       TableDestination tableDestination = new TableDestination(tableName, tableName);
       for (int j = 0; j < numPartitions; ++j) {
-        String tempTableId = String.format(
-            jobIdToken + "_0x%08x_%05d", tableDestination.hashCode(), j);
+        String tempTableId = BigQueryHelpers.createJobId(jobIdToken, tableDestination, j);
         List<String> filesPerPartition = Lists.newArrayList();
         for (int k = 0; k < numFilesPerPartition; ++k) {
           String filename = Paths.get(baseDir.toString(),
@@ -1721,7 +1718,7 @@ public class BigQueryIOTest implements Serializable {
           filesPerPartition.add(filename);
         }
         partitions.add(KV.of(ShardedKey.of(tableDestination, j),
-            (Iterable<List<String>>) Collections.singleton(filesPerPartition)));
+            filesPerPartition));
 
         List<String> expectedTables = expectedTempTables.get(tableDestination);
         if (expectedTables == null) {
@@ -1735,11 +1732,6 @@ public class BigQueryIOTest implements Serializable {
       }
     }
 
-    PCollection<String> expectedTempTablesPCollection = p.apply(Create.of(expectedTempTables));
-    PCollectionView<Iterable<String>> tempTablesView = PCollectionViews.iterableView(
-        expectedTempTablesPCollection,
-        WindowingStrategy.globalDefault(),
-        StringUtf8Coder.of());
     PCollection<String> jobIdTokenCollection = p.apply("CreateJobId", Create.of("jobId"));
     PCollectionView<String> jobIdTokenView =
         jobIdTokenCollection.apply(View.<String>asSingleton());
@@ -1753,10 +1745,10 @@ public class BigQueryIOTest implements Serializable {
         CreateDisposition.CREATE_IF_NEEDED,
         null);
 
-    DoFnTester<KV<ShardedKey<TableDestination>, Iterable<List<String>>>,
+    DoFnTester<KV<ShardedKey<TableDestination>, List<String>>,
         KV<TableDestination, String>> tester = DoFnTester.of(writeTables);
     tester.setSideInput(jobIdTokenView, GlobalWindow.INSTANCE, jobIdToken);
-    for (KV<ShardedKey<TableDestination>, Iterable<List<String>>> partition : partitions) {
+    for (KV<ShardedKey<TableDestination>, List<String>> partition : partitions) {
       tester.processElement(partition);
     }
 
@@ -1848,11 +1840,27 @@ public class BigQueryIOTest implements Serializable {
       }
     }
 
+    PCollection<KV<TableDestination, String>> tempTablesPCollection =
+        p.apply(Create.of(tempTables)
+            .withCoder(KvCoder.of(TableDestinationCoder.of(),
+                IterableCoder.of(StringUtf8Coder.of()))))
+            .apply(ParDo.of(new DoFn<KV<TableDestination, Iterable<String>>,
+                KV<TableDestination, String>>() {
+              @ProcessElement
+              public void processElement(ProcessContext c) {
+                TableDestination tableDestination = c.element().getKey();
+                for (String tempTable : c.element().getValue()) {
+                  c.output(KV.of(tableDestination, tempTable));
+                }
+              }
+            }));
+
     PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView =
         PCollectionViews.multimapView(
-        p,
+            tempTablesPCollection,
         WindowingStrategy.globalDefault(),
-        KvCoder.of(TableDestinationCoder.of(), StringUtf8Coder.of()));
+        KvCoder.of(TableDestinationCoder.of(),
+            StringUtf8Coder.of()));
 
     PCollection<String> jobIdTokenCollection = p.apply("CreateJobId", Create.of("jobId"));
     PCollectionView<String> jobIdTokenView =

[10/50] [abbrv] beam git commit: Add PrepareWrite transform.

Posted by dh...@apache.org.

Add PrepareWrite transform.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/67a5f827
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/67a5f827
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/67a5f827

Branch: refs/heads/DSL_SQL
Commit: 67a5f82706e52fe025b63aa2e9652368f22c8344
Parents: c939a43
Author: Reuven Lax <re...@google.com>
Authored: Tue Mar 28 12:53:27 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 21:12:49 2017 -0700

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/PrepareWrite.java  | 58 ++++++++++++++++++++
 .../sdk/io/gcp/bigquery/TableDestination.java   |  7 +++
 .../gcp/bigquery/TagWithUniqueIdsAndTable.java  | 15 ++---
 3 files changed, 69 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/67a5f827/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java
new file mode 100644
index 0000000..0c08e18
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java
@@ -0,0 +1,58 @@
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.common.base.Strings;
+import java.io.IOException;
+import org.apache.beam.sdk.options.BigQueryOptions;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.ValueInSingleWindow;
+
+/**
+ * Prepare an input {@link PCollection<T>} for writing to BigQuery. Use the table-reference
+ * function to determine which tables each element is written to, and format the element into a
+ * {@link TableRow} using the user-supplied format function.
+ */
+public class PrepareWrite<T> extends PTransform<PCollection<T>, PCollection<KV<String, TableRow>>> {
+  private static final String NAME = "PrepareWrite";
+  private SerializableFunction<ValueInSingleWindow<T>, TableReference> tableRefFunction;
+  private SerializableFunction<T, TableRow> formatFunction;
+
+  public PrepareWrite(SerializableFunction<ValueInSingleWindow<T>, TableReference> tableRefFunction,
+                      SerializableFunction<T, TableRow> formatFunction) {
+    super(NAME);
+    this.tableRefFunction = tableRefFunction;
+    this.formatFunction = formatFunction;
+  }
+
+  @Override
+  public PCollection<KV<String, TableRow>> expand(PCollection<T> input) {
+    PCollection<KV<String, TableRow>> elementsByTable =
+        input.apply(ParDo.of(new DoFn<T, KV<String, TableRow>>() {
+      @ProcessElement
+      public void processElement(ProcessContext context, BoundedWindow window) throws IOException {
+        String tableSpec = tableSpecFromWindowedValue(
+            context.getPipelineOptions().as(BigQueryOptions.class),
+            ValueInSingleWindow.of(context.element(), context.timestamp(), window, context.pane()));
+        TableRow tableRow = formatFunction.apply(context.element());
+        context.output(KV.of(tableSpec, tableRow));
+      }
+    }));
+    return elementsByTable;
+  }
+
+  private String tableSpecFromWindowedValue(BigQueryOptions options,
+                                            ValueInSingleWindow<T> value) {
+    TableReference table = tableRefFunction.apply(value);
+    if (Strings.isNullOrEmpty(table.getProjectId())) {
+      table.setProjectId(options.getProject());
+    }
+    return BigQueryHelpers.toTableSpec(table);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/67a5f827/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
new file mode 100644
index 0000000..3cbbf3b
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
@@ -0,0 +1,7 @@
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+/**
+ * Created by relax on 3/28/17.
+ */
+public class TableDestination {
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/67a5f827/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java
index 8d7d1e6..4e50f7c 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java
@@ -73,9 +73,9 @@ class TagWithUniqueIdsAndTable<T>
   public void processElement(ProcessContext context, BoundedWindow window) throws IOException {
     String uniqueId = randomUUID + sequenceNo++;
     ThreadLocalRandom randomGenerator = ThreadLocalRandom.current();
-    String tableSpec = tableSpecFromWindowedValue(
-        context.getPipelineOptions().as(BigQueryOptions.class),
-        ValueInSingleWindow.of(context.element(), context.timestamp(), window, context.pane()));
+      String tableSpec = tableSpecFromWindowedValue(
+          context.getPipelineOptions().as(BigQueryOptions.class),
+          ValueInSingleWindow.of(context.element(), context.timestamp(), window, context.pane()));
     // We output on keys 0-50 to ensure that there's enough batching for
     // BigQuery.
     context.output(KV.of(ShardedKey.of(tableSpec, randomGenerator.nextInt(0, 50)),
@@ -97,12 +97,5 @@ class TagWithUniqueIdsAndTable<T>
   }
 
 
-  private String tableSpecFromWindowedValue(BigQueryOptions options,
-                                            ValueInSingleWindow<T> value) {
-    TableReference table = write.getTableRefFunction().apply(value);
-    if (Strings.isNullOrEmpty(table.getProjectId())) {
-      table.setProjectId(options.getProject());
-    }
-    return BigQueryHelpers.toTableSpec(table);
-  }
+
 }

[34/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkKeyGroupStateInternals.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkKeyGroupStateInternals.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkKeyGroupStateInternals.java
deleted file mode 100644
index 24b340e..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkKeyGroupStateInternals.java
+++ /dev/null
@@ -1,487 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
-
-import static org.apache.flink.util.Preconditions.checkArgument;
-
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import org.apache.beam.runners.core.StateInternals;
-import org.apache.beam.runners.core.StateNamespace;
-import org.apache.beam.runners.core.StateTag;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.Coder.Context;
-import org.apache.beam.sdk.coders.CoderException;
-import org.apache.beam.sdk.coders.ListCoder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.CombineWithContext;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
-import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.beam.sdk.util.state.BagState;
-import org.apache.beam.sdk.util.state.CombiningState;
-import org.apache.beam.sdk.util.state.MapState;
-import org.apache.beam.sdk.util.state.ReadableState;
-import org.apache.beam.sdk.util.state.SetState;
-import org.apache.beam.sdk.util.state.State;
-import org.apache.beam.sdk.util.state.StateContext;
-import org.apache.beam.sdk.util.state.StateContexts;
-import org.apache.beam.sdk.util.state.ValueState;
-import org.apache.beam.sdk.util.state.WatermarkHoldState;
-import org.apache.flink.api.java.tuple.Tuple2;
-import org.apache.flink.runtime.state.KeyGroupsList;
-import org.apache.flink.runtime.state.KeyedStateBackend;
-import org.apache.flink.streaming.api.operators.HeapInternalTimerService;
-import org.apache.flink.util.InstantiationUtil;
-import org.apache.flink.util.Preconditions;
-
-/**
- * {@link StateInternals} that uses {@link KeyGroupCheckpointedOperator}
- * to checkpoint state.
- *
- * <p>Note:
- * Ignore index of key.
- * Just implement BagState.
- *
- * <p>Reference from {@link HeapInternalTimerService} to the local key-group range.
- */
-public class FlinkKeyGroupStateInternals<K> implements StateInternals<K> {
-
-  private final Coder<K> keyCoder;
-  private final KeyGroupsList localKeyGroupRange;
-  private KeyedStateBackend keyedStateBackend;
-  private final int localKeyGroupRangeStartIdx;
-
-  // stateName -> namespace -> (valueCoder, value)
-  private final Map<String, Tuple2<Coder<?>, Map<String, ?>>>[] stateTables;
-
-  public FlinkKeyGroupStateInternals(
-      Coder<K> keyCoder,
-      KeyedStateBackend keyedStateBackend) {
-    this.keyCoder = keyCoder;
-    this.keyedStateBackend = keyedStateBackend;
-    this.localKeyGroupRange = keyedStateBackend.getKeyGroupRange();
-    // find the starting index of the local key-group range
-    int startIdx = Integer.MAX_VALUE;
-    for (Integer keyGroupIdx : localKeyGroupRange) {
-      startIdx = Math.min(keyGroupIdx, startIdx);
-    }
-    this.localKeyGroupRangeStartIdx = startIdx;
-    stateTables = (Map<String, Tuple2<Coder<?>, Map<String, ?>>>[])
-        new Map[localKeyGroupRange.getNumberOfKeyGroups()];
-    for (int i = 0; i < stateTables.length; i++) {
-      stateTables[i] = new HashMap<>();
-    }
-  }
-
-  @Override
-  public K getKey() {
-    ByteBuffer keyBytes = (ByteBuffer) keyedStateBackend.getCurrentKey();
-    try {
-      return CoderUtils.decodeFromByteArray(keyCoder, keyBytes.array());
-    } catch (CoderException e) {
-      throw new RuntimeException("Error decoding key.", e);
-    }
-  }
-
-  @Override
-  public <T extends State> T state(
-      final StateNamespace namespace,
-      StateTag<? super K, T> address) {
-
-    return state(namespace, address, StateContexts.nullContext());
-  }
-
-  @Override
-  public <T extends State> T state(
-      final StateNamespace namespace,
-      StateTag<? super K, T> address,
-      final StateContext<?> context) {
-
-    return address.bind(new StateTag.StateBinder<K>() {
-
-      @Override
-      public <T> ValueState<T> bindValue(
-          StateTag<? super K, ValueState<T>> address,
-          Coder<T> coder) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", ValueState.class.getSimpleName()));
-      }
-
-      @Override
-      public <T> BagState<T> bindBag(
-          StateTag<? super K, BagState<T>> address,
-          Coder<T> elemCoder) {
-
-        return new FlinkKeyGroupBagState<>(address, namespace, elemCoder);
-      }
-
-      @Override
-      public <T> SetState<T> bindSet(
-          StateTag<? super K, SetState<T>> address,
-          Coder<T> elemCoder) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", SetState.class.getSimpleName()));
-      }
-
-      @Override
-      public <KeyT, ValueT> MapState<KeyT, ValueT> bindMap(
-          StateTag<? super K, MapState<KeyT, ValueT>> spec,
-          Coder<KeyT> mapKeyCoder, Coder<ValueT> mapValueCoder) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", MapState.class.getSimpleName()));
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT>
-      bindCombiningValue(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          Combine.CombineFn<InputT, AccumT, OutputT> combineFn) {
-        throw new UnsupportedOperationException("bindCombiningValue is not supported.");
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValue(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
-        throw new UnsupportedOperationException("bindKeyedCombiningValue is not supported.");
-
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValueWithContext(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          CombineWithContext.KeyedCombineFnWithContext<
-              ? super K, InputT, AccumT, OutputT> combineFn) {
-        throw new UnsupportedOperationException(
-            "bindKeyedCombiningValueWithContext is not supported.");
-      }
-
-      @Override
-      public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
-          StateTag<? super K, WatermarkHoldState<W>> address,
-          OutputTimeFn<? super W> outputTimeFn) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", CombiningState.class.getSimpleName()));
-      }
-    });
-  }
-
-  /**
-   * Reference from {@link Combine.CombineFn}.
-   *
-   * <p>Accumulators are stored in each KeyGroup, call addInput() when a element comes,
-   * call extractOutput() to produce the desired value when need to read data.
-   */
-  interface KeyGroupCombiner<InputT, AccumT, OutputT> {
-
-    /**
-     * Returns a new, mutable accumulator value, representing the accumulation
-     * of zero input values.
-     */
-    AccumT createAccumulator();
-
-    /**
-     * Adds the given input value to the given accumulator, returning the
-     * new accumulator value.
-     */
-    AccumT addInput(AccumT accumulator, InputT input);
-
-    /**
-     * Returns the output value that is the result of all accumulators from KeyGroups
-     * that are assigned to this operator.
-     */
-    OutputT extractOutput(Iterable<AccumT> accumulators);
-  }
-
-  private abstract class AbstractKeyGroupState<InputT, AccumT, OutputT> {
-
-    private String stateName;
-    private String namespace;
-    private Coder<AccumT> coder;
-    private KeyGroupCombiner<InputT, AccumT, OutputT> keyGroupCombiner;
-
-    AbstractKeyGroupState(
-        String stateName,
-        String namespace,
-        Coder<AccumT> coder,
-        KeyGroupCombiner<InputT, AccumT, OutputT> keyGroupCombiner) {
-      this.stateName = stateName;
-      this.namespace = namespace;
-      this.coder = coder;
-      this.keyGroupCombiner = keyGroupCombiner;
-    }
-
-    /**
-     * Choose keyGroup of input and addInput to accumulator.
-     */
-    void addInput(InputT input) {
-      int keyGroupIdx = keyedStateBackend.getCurrentKeyGroupIndex();
-      int localIdx = getIndexForKeyGroup(keyGroupIdx);
-      Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable = stateTables[localIdx];
-      Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
-      if (tuple2 == null) {
-        tuple2 = new Tuple2<>();
-        tuple2.f0 = coder;
-        tuple2.f1 = new HashMap<>();
-        stateTable.put(stateName, tuple2);
-      }
-      Map<String, AccumT> map = (Map<String, AccumT>) tuple2.f1;
-      AccumT accumulator = map.get(namespace);
-      if (accumulator == null) {
-        accumulator = keyGroupCombiner.createAccumulator();
-      }
-      accumulator = keyGroupCombiner.addInput(accumulator, input);
-      map.put(namespace, accumulator);
-    }
-
-    /**
-     * Get all accumulators and invoke extractOutput().
-     */
-    OutputT extractOutput() {
-      List<AccumT> accumulators = new ArrayList<>(stateTables.length);
-      for (Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable : stateTables) {
-        Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
-        if (tuple2 != null) {
-          AccumT accumulator = (AccumT) tuple2.f1.get(namespace);
-          if (accumulator != null) {
-            accumulators.add(accumulator);
-          }
-        }
-      }
-      return keyGroupCombiner.extractOutput(accumulators);
-    }
-
-    /**
-     * Find the first accumulator and return immediately.
-     */
-    boolean isEmptyInternal() {
-      for (Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable : stateTables) {
-        Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
-        if (tuple2 != null) {
-          AccumT accumulator = (AccumT) tuple2.f1.get(namespace);
-          if (accumulator != null) {
-            return false;
-          }
-        }
-      }
-      return true;
-    }
-
-    /**
-     * Clear accumulators and clean empty map.
-     */
-    void clearInternal() {
-      for (Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable : stateTables) {
-        Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
-        if (tuple2 != null) {
-          tuple2.f1.remove(namespace);
-          if (tuple2.f1.size() == 0) {
-            stateTable.remove(stateName);
-          }
-        }
-      }
-    }
-
-  }
-
-  private int getIndexForKeyGroup(int keyGroupIdx) {
-    checkArgument(localKeyGroupRange.contains(keyGroupIdx),
-        "Key Group " + keyGroupIdx + " does not belong to the local range.");
-    return keyGroupIdx - this.localKeyGroupRangeStartIdx;
-  }
-
-  private class KeyGroupBagCombiner<T> implements KeyGroupCombiner<T, List<T>, Iterable<T>> {
-
-    @Override
-    public List<T> createAccumulator() {
-      return new ArrayList<>();
-    }
-
-    @Override
-    public List<T> addInput(List<T> accumulator, T input) {
-      accumulator.add(input);
-      return accumulator;
-    }
-
-    @Override
-    public Iterable<T> extractOutput(Iterable<List<T>> accumulators) {
-      List<T> result = new ArrayList<>();
-      // maybe can return an unmodifiable view.
-      for (List<T> list : accumulators) {
-        result.addAll(list);
-      }
-      return result;
-    }
-  }
-
-  private class FlinkKeyGroupBagState<T> extends AbstractKeyGroupState<T, List<T>, Iterable<T>>
-      implements BagState<T> {
-
-    private final StateNamespace namespace;
-    private final StateTag<? super K, BagState<T>> address;
-
-    FlinkKeyGroupBagState(
-        StateTag<? super K, BagState<T>> address,
-        StateNamespace namespace,
-        Coder<T> coder) {
-      super(address.getId(), namespace.stringKey(), ListCoder.of(coder),
-          new KeyGroupBagCombiner<T>());
-      this.namespace = namespace;
-      this.address = address;
-    }
-
-    @Override
-    public void add(T input) {
-      addInput(input);
-    }
-
-    @Override
-    public BagState<T> readLater() {
-      return this;
-    }
-
-    @Override
-    public Iterable<T> read() {
-      Iterable<T> result = extractOutput();
-      return result != null ? result : Collections.<T>emptyList();
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public Boolean read() {
-          try {
-            return isEmptyInternal();
-          } catch (Exception e) {
-            throw new RuntimeException("Error reading state.", e);
-          }
-
-        }
-
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-      };
-    }
-
-    @Override
-    public void clear() {
-      clearInternal();
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkKeyGroupBagState<?> that = (FlinkKeyGroupBagState<?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-  }
-
-  /**
-   * Snapshots the state {@code (stateName -> (valueCoder && (namespace -> value)))} for a given
-   * {@code keyGroupIdx}.
-   *
-   * @param keyGroupIdx the id of the key-group to be put in the snapshot.
-   * @param out the stream to write to.
-   */
-  public void snapshotKeyGroupState(int keyGroupIdx, DataOutputStream out) throws Exception {
-    int localIdx = getIndexForKeyGroup(keyGroupIdx);
-    Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable = stateTables[localIdx];
-    Preconditions.checkState(stateTable.size() <= Short.MAX_VALUE,
-        "Too many States: " + stateTable.size() + ". Currently at most "
-            + Short.MAX_VALUE + " states are supported");
-    out.writeShort(stateTable.size());
-    for (Map.Entry<String, Tuple2<Coder<?>, Map<String, ?>>> entry : stateTable.entrySet()) {
-      out.writeUTF(entry.getKey());
-      Coder coder = entry.getValue().f0;
-      InstantiationUtil.serializeObject(out, coder);
-      Map<String, ?> map = entry.getValue().f1;
-      out.writeInt(map.size());
-      for (Map.Entry<String, ?> entry1 : map.entrySet()) {
-        StringUtf8Coder.of().encode(entry1.getKey(), out, Context.NESTED);
-        coder.encode(entry1.getValue(), out, Context.NESTED);
-      }
-    }
-  }
-
-  /**
-   * Restore the state {@code (stateName -> (valueCoder && (namespace -> value)))}
-   * for a given {@code keyGroupIdx}.
-   *
-   * @param keyGroupIdx the id of the key-group to be put in the snapshot.
-   * @param in the stream to read from.
-   * @param userCodeClassLoader the class loader that will be used to deserialize
-   *                            the valueCoder.
-   */
-  public void restoreKeyGroupState(int keyGroupIdx, DataInputStream in,
-                                   ClassLoader userCodeClassLoader) throws Exception {
-    int localIdx = getIndexForKeyGroup(keyGroupIdx);
-    Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable = stateTables[localIdx];
-    int numStates = in.readShort();
-    for (int i = 0; i < numStates; ++i) {
-      String stateName = in.readUTF();
-      Coder coder = InstantiationUtil.deserializeObject(in, userCodeClassLoader);
-      Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
-      if (tuple2 == null) {
-        tuple2 = new Tuple2<>();
-        tuple2.f0 = coder;
-        tuple2.f1 = new HashMap<>();
-        stateTable.put(stateName, tuple2);
-      }
-      Map<String, Object> map = (Map<String, Object>) tuple2.f1;
-      int mapSize = in.readInt();
-      for (int j = 0; j < mapSize; j++) {
-        String namespace = StringUtf8Coder.of().decode(in, Context.NESTED);
-        Object value = coder.decode(in, Context.NESTED);
-        map.put(namespace, value);
-      }
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkSplitStateInternals.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkSplitStateInternals.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkSplitStateInternals.java
deleted file mode 100644
index 2bf0bf1..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkSplitStateInternals.java
+++ /dev/null
@@ -1,260 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
-
-import com.google.common.collect.Iterators;
-import java.util.Collections;
-import org.apache.beam.runners.core.StateInternals;
-import org.apache.beam.runners.core.StateNamespace;
-import org.apache.beam.runners.core.StateTag;
-import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.CombineWithContext;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
-import org.apache.beam.sdk.util.state.BagState;
-import org.apache.beam.sdk.util.state.CombiningState;
-import org.apache.beam.sdk.util.state.MapState;
-import org.apache.beam.sdk.util.state.ReadableState;
-import org.apache.beam.sdk.util.state.SetState;
-import org.apache.beam.sdk.util.state.State;
-import org.apache.beam.sdk.util.state.StateContext;
-import org.apache.beam.sdk.util.state.StateContexts;
-import org.apache.beam.sdk.util.state.ValueState;
-import org.apache.beam.sdk.util.state.WatermarkHoldState;
-import org.apache.flink.api.common.ExecutionConfig;
-import org.apache.flink.api.common.state.ListStateDescriptor;
-import org.apache.flink.runtime.state.OperatorStateBackend;
-
-/**
- * {@link StateInternals} that uses a Flink {@link OperatorStateBackend}
- * to manage the split-distribute state.
- *
- * <p>Elements in ListState will be redistributed in round robin fashion
- * to operators when restarting with a different parallelism.
- *
- *  <p>Note:
- *  Ignore index of key and namespace.
- *  Just implement BagState.
- */
-public class FlinkSplitStateInternals<K> implements StateInternals<K> {
-
-  private final OperatorStateBackend stateBackend;
-
-  public FlinkSplitStateInternals(OperatorStateBackend stateBackend) {
-    this.stateBackend = stateBackend;
-  }
-
-  @Override
-  public K getKey() {
-    return null;
-  }
-
-  @Override
-  public <T extends State> T state(
-      final StateNamespace namespace,
-      StateTag<? super K, T> address) {
-
-    return state(namespace, address, StateContexts.nullContext());
-  }
-
-  @Override
-  public <T extends State> T state(
-      final StateNamespace namespace,
-      StateTag<? super K, T> address,
-      final StateContext<?> context) {
-
-    return address.bind(new StateTag.StateBinder<K>() {
-
-      @Override
-      public <T> ValueState<T> bindValue(
-          StateTag<? super K, ValueState<T>> address,
-          Coder<T> coder) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", ValueState.class.getSimpleName()));
-      }
-
-      @Override
-      public <T> BagState<T> bindBag(
-          StateTag<? super K, BagState<T>> address,
-          Coder<T> elemCoder) {
-
-        return new FlinkSplitBagState<>(stateBackend, address, namespace, elemCoder);
-      }
-
-      @Override
-      public <T> SetState<T> bindSet(
-          StateTag<? super K, SetState<T>> address,
-          Coder<T> elemCoder) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", SetState.class.getSimpleName()));
-      }
-
-      @Override
-      public <KeyT, ValueT> MapState<KeyT, ValueT> bindMap(
-          StateTag<? super K, MapState<KeyT, ValueT>> spec,
-          Coder<KeyT> mapKeyCoder, Coder<ValueT> mapValueCoder) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", MapState.class.getSimpleName()));
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT>
-      bindCombiningValue(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          Combine.CombineFn<InputT, AccumT, OutputT> combineFn) {
-        throw new UnsupportedOperationException("bindCombiningValue is not supported.");
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValue(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
-        throw new UnsupportedOperationException("bindKeyedCombiningValue is not supported.");
-
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValueWithContext(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          CombineWithContext.KeyedCombineFnWithContext<
-              ? super K, InputT, AccumT, OutputT> combineFn) {
-        throw new UnsupportedOperationException(
-            "bindKeyedCombiningValueWithContext is not supported.");
-      }
-
-      @Override
-      public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
-          StateTag<? super K, WatermarkHoldState<W>> address,
-          OutputTimeFn<? super W> outputTimeFn) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", CombiningState.class.getSimpleName()));
-      }
-    });
-  }
-
-  private static class FlinkSplitBagState<K, T> implements BagState<T> {
-
-    private final ListStateDescriptor<T> descriptor;
-    private OperatorStateBackend flinkStateBackend;
-    private final StateNamespace namespace;
-    private final StateTag<? super K, BagState<T>> address;
-
-    FlinkSplitBagState(
-        OperatorStateBackend flinkStateBackend,
-        StateTag<? super K, BagState<T>> address,
-        StateNamespace namespace,
-        Coder<T> coder) {
-      this.flinkStateBackend = flinkStateBackend;
-      this.namespace = namespace;
-      this.address = address;
-
-      CoderTypeInformation<T> typeInfo =
-          new CoderTypeInformation<>(coder);
-
-      descriptor = new ListStateDescriptor<>(address.getId(),
-          typeInfo.createSerializer(new ExecutionConfig()));
-    }
-
-    @Override
-    public void add(T input) {
-      try {
-        flinkStateBackend.getOperatorState(descriptor).add(input);
-      } catch (Exception e) {
-        throw new RuntimeException("Error updating state.", e);
-      }
-    }
-
-    @Override
-    public BagState<T> readLater() {
-      return this;
-    }
-
-    @Override
-    public Iterable<T> read() {
-      try {
-        Iterable<T> result = flinkStateBackend.getOperatorState(descriptor).get();
-        return result != null ? result : Collections.<T>emptyList();
-      } catch (Exception e) {
-        throw new RuntimeException("Error updating state.", e);
-      }
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public Boolean read() {
-          try {
-            Iterable<T> result = flinkStateBackend.getOperatorState(descriptor).get();
-            // PartitionableListState.get() return empty collection When there is no element,
-            // KeyedListState different. (return null)
-            return result == null || Iterators.size(result.iterator()) == 0;
-          } catch (Exception e) {
-            throw new RuntimeException("Error reading state.", e);
-          }
-
-        }
-
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-      };
-    }
-
-    @Override
-    public void clear() {
-      try {
-        flinkStateBackend.getOperatorState(descriptor).clear();
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkSplitBagState<?, ?> that = (FlinkSplitBagState<?, ?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkStateInternals.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkStateInternals.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkStateInternals.java
deleted file mode 100644
index 4f961e5..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkStateInternals.java
+++ /dev/null
@@ -1,1053 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
-
-import com.google.common.collect.Lists;
-import java.nio.ByteBuffer;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.beam.runners.core.StateInternals;
-import org.apache.beam.runners.core.StateNamespace;
-import org.apache.beam.runners.core.StateTag;
-import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.CoderException;
-import org.apache.beam.sdk.coders.InstantCoder;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.CombineWithContext;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
-import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.beam.sdk.util.CombineContextFactory;
-import org.apache.beam.sdk.util.state.BagState;
-import org.apache.beam.sdk.util.state.CombiningState;
-import org.apache.beam.sdk.util.state.MapState;
-import org.apache.beam.sdk.util.state.ReadableState;
-import org.apache.beam.sdk.util.state.SetState;
-import org.apache.beam.sdk.util.state.State;
-import org.apache.beam.sdk.util.state.StateContext;
-import org.apache.beam.sdk.util.state.StateContexts;
-import org.apache.beam.sdk.util.state.ValueState;
-import org.apache.beam.sdk.util.state.WatermarkHoldState;
-import org.apache.flink.api.common.state.ListStateDescriptor;
-import org.apache.flink.api.common.state.ValueStateDescriptor;
-import org.apache.flink.api.common.typeutils.base.StringSerializer;
-import org.apache.flink.runtime.state.KeyedStateBackend;
-import org.joda.time.Instant;
-
-/**
- * {@link StateInternals} that uses a Flink {@link KeyedStateBackend} to manage state.
- *
- * <p>Note: In the Flink streaming runner the key is always encoded
- * using an {@link Coder} and stored in a {@link ByteBuffer}.
- */
-public class FlinkStateInternals<K> implements StateInternals<K> {
-
-  private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
-  private Coder<K> keyCoder;
-
-  // on recovery, these will no be properly set because we don't
-  // know which watermark hold states there are in the Flink State Backend
-  private final Map<String, Instant> watermarkHolds = new HashMap<>();
-
-  public FlinkStateInternals(KeyedStateBackend<ByteBuffer> flinkStateBackend, Coder<K> keyCoder) {
-    this.flinkStateBackend = flinkStateBackend;
-    this.keyCoder = keyCoder;
-  }
-
-  /**
-   * Returns the minimum over all watermark holds.
-   */
-  public Instant watermarkHold() {
-    long min = Long.MAX_VALUE;
-    for (Instant hold: watermarkHolds.values()) {
-      min = Math.min(min, hold.getMillis());
-    }
-    return new Instant(min);
-  }
-
-  @Override
-  public K getKey() {
-    ByteBuffer keyBytes = flinkStateBackend.getCurrentKey();
-    try {
-      return CoderUtils.decodeFromByteArray(keyCoder, keyBytes.array());
-    } catch (CoderException e) {
-      throw new RuntimeException("Error decoding key.", e);
-    }
-  }
-
-  @Override
-  public <T extends State> T state(
-      final StateNamespace namespace,
-      StateTag<? super K, T> address) {
-
-    return state(namespace, address, StateContexts.nullContext());
-  }
-
-  @Override
-  public <T extends State> T state(
-      final StateNamespace namespace,
-      StateTag<? super K, T> address,
-      final StateContext<?> context) {
-
-    return address.bind(new StateTag.StateBinder<K>() {
-
-      @Override
-      public <T> ValueState<T> bindValue(
-          StateTag<? super K, ValueState<T>> address,
-          Coder<T> coder) {
-
-        return new FlinkValueState<>(flinkStateBackend, address, namespace, coder);
-      }
-
-      @Override
-      public <T> BagState<T> bindBag(
-          StateTag<? super K, BagState<T>> address,
-          Coder<T> elemCoder) {
-
-        return new FlinkBagState<>(flinkStateBackend, address, namespace, elemCoder);
-      }
-
-      @Override
-      public <T> SetState<T> bindSet(
-          StateTag<? super K, SetState<T>> address,
-          Coder<T> elemCoder) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", SetState.class.getSimpleName()));
-      }
-
-      @Override
-      public <KeyT, ValueT> MapState<KeyT, ValueT> bindMap(
-          StateTag<? super K, MapState<KeyT, ValueT>> spec,
-          Coder<KeyT> mapKeyCoder, Coder<ValueT> mapValueCoder) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", MapState.class.getSimpleName()));
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT>
-      bindCombiningValue(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          Combine.CombineFn<InputT, AccumT, OutputT> combineFn) {
-
-        return new FlinkCombiningState<>(
-            flinkStateBackend, address, combineFn, namespace, accumCoder);
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValue(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
-        return new FlinkKeyedCombiningState<>(
-            flinkStateBackend,
-            address,
-            combineFn,
-            namespace,
-            accumCoder,
-            FlinkStateInternals.this);
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValueWithContext(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          CombineWithContext.KeyedCombineFnWithContext<
-              ? super K, InputT, AccumT, OutputT> combineFn) {
-        return new FlinkCombiningStateWithContext<>(
-            flinkStateBackend,
-            address,
-            combineFn,
-            namespace,
-            accumCoder,
-            FlinkStateInternals.this,
-            CombineContextFactory.createFromStateContext(context));
-      }
-
-      @Override
-      public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
-          StateTag<? super K, WatermarkHoldState<W>> address,
-          OutputTimeFn<? super W> outputTimeFn) {
-
-        return new FlinkWatermarkHoldState<>(
-            flinkStateBackend, FlinkStateInternals.this, address, namespace, outputTimeFn);
-      }
-    });
-  }
-
-  private static class FlinkValueState<K, T> implements ValueState<T> {
-
-    private final StateNamespace namespace;
-    private final StateTag<? super K, ValueState<T>> address;
-    private final ValueStateDescriptor<T> flinkStateDescriptor;
-    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
-
-    FlinkValueState(
-        KeyedStateBackend<ByteBuffer> flinkStateBackend,
-        StateTag<? super K, ValueState<T>> address,
-        StateNamespace namespace,
-        Coder<T> coder) {
-
-      this.namespace = namespace;
-      this.address = address;
-      this.flinkStateBackend = flinkStateBackend;
-
-      CoderTypeInformation<T> typeInfo = new CoderTypeInformation<>(coder);
-
-      flinkStateDescriptor = new ValueStateDescriptor<>(address.getId(), typeInfo, null);
-    }
-
-    @Override
-    public void write(T input) {
-      try {
-        flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).update(input);
-      } catch (Exception e) {
-        throw new RuntimeException("Error updating state.", e);
-      }
-    }
-
-    @Override
-    public ValueState<T> readLater() {
-      return this;
-    }
-
-    @Override
-    public T read() {
-      try {
-        return flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).value();
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public void clear() {
-      try {
-        flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).clear();
-      } catch (Exception e) {
-        throw new RuntimeException("Error clearing state.", e);
-      }
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkValueState<?, ?> that = (FlinkValueState<?, ?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-  }
-
-  private static class FlinkBagState<K, T> implements BagState<T> {
-
-    private final StateNamespace namespace;
-    private final StateTag<? super K, BagState<T>> address;
-    private final ListStateDescriptor<T> flinkStateDescriptor;
-    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
-
-    FlinkBagState(
-        KeyedStateBackend<ByteBuffer> flinkStateBackend,
-        StateTag<? super K, BagState<T>> address,
-        StateNamespace namespace,
-        Coder<T> coder) {
-
-      this.namespace = namespace;
-      this.address = address;
-      this.flinkStateBackend = flinkStateBackend;
-
-      CoderTypeInformation<T> typeInfo = new CoderTypeInformation<>(coder);
-
-      flinkStateDescriptor = new ListStateDescriptor<>(address.getId(), typeInfo);
-    }
-
-    @Override
-    public void add(T input) {
-      try {
-        flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).add(input);
-      } catch (Exception e) {
-        throw new RuntimeException("Error adding to bag state.", e);
-      }
-    }
-
-    @Override
-    public BagState<T> readLater() {
-      return this;
-    }
-
-    @Override
-    public Iterable<T> read() {
-      try {
-        Iterable<T> result = flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).get();
-
-        return result != null ? result : Collections.<T>emptyList();
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public Boolean read() {
-          try {
-            Iterable<T> result = flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor).get();
-            return result == null;
-          } catch (Exception e) {
-            throw new RuntimeException("Error reading state.", e);
-          }
-
-        }
-
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-      };
-    }
-
-    @Override
-    public void clear() {
-      try {
-        flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).clear();
-      } catch (Exception e) {
-        throw new RuntimeException("Error clearing state.", e);
-      }
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkBagState<?, ?> that = (FlinkBagState<?, ?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-  }
-
-  private static class FlinkCombiningState<K, InputT, AccumT, OutputT>
-      implements CombiningState<InputT, AccumT, OutputT> {
-
-    private final StateNamespace namespace;
-    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
-    private final Combine.CombineFn<InputT, AccumT, OutputT> combineFn;
-    private final ValueStateDescriptor<AccumT> flinkStateDescriptor;
-    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
-
-    FlinkCombiningState(
-        KeyedStateBackend<ByteBuffer> flinkStateBackend,
-        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-        Combine.CombineFn<InputT, AccumT, OutputT> combineFn,
-        StateNamespace namespace,
-        Coder<AccumT> accumCoder) {
-
-      this.namespace = namespace;
-      this.address = address;
-      this.combineFn = combineFn;
-      this.flinkStateBackend = flinkStateBackend;
-
-      CoderTypeInformation<AccumT> typeInfo = new CoderTypeInformation<>(accumCoder);
-
-      flinkStateDescriptor = new ValueStateDescriptor<>(address.getId(), typeInfo, null);
-    }
-
-    @Override
-    public CombiningState<InputT, AccumT, OutputT> readLater() {
-      return this;
-    }
-
-    @Override
-    public void add(InputT value) {
-      try {
-        org.apache.flink.api.common.state.ValueState<AccumT> state =
-            flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor);
-
-        AccumT current = state.value();
-        if (current == null) {
-          current = combineFn.createAccumulator();
-        }
-        current = combineFn.addInput(current, value);
-        state.update(current);
-      } catch (Exception e) {
-        throw new RuntimeException("Error adding to state." , e);
-      }
-    }
-
-    @Override
-    public void addAccum(AccumT accum) {
-      try {
-        org.apache.flink.api.common.state.ValueState<AccumT> state =
-            flinkStateBackend.getPartitionedState(
-              namespace.stringKey(),
-              StringSerializer.INSTANCE,
-              flinkStateDescriptor);
-
-        AccumT current = state.value();
-        if (current == null) {
-          state.update(accum);
-        } else {
-          current = combineFn.mergeAccumulators(Lists.newArrayList(current, accum));
-          state.update(current);
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Error adding to state.", e);
-      }
-    }
-
-    @Override
-    public AccumT getAccum() {
-      try {
-        return flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).value();
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
-      return combineFn.mergeAccumulators(accumulators);
-    }
-
-    @Override
-    public OutputT read() {
-      try {
-        org.apache.flink.api.common.state.ValueState<AccumT> state =
-            flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor);
-
-        AccumT accum = state.value();
-        if (accum != null) {
-          return combineFn.extractOutput(accum);
-        } else {
-          return combineFn.extractOutput(combineFn.createAccumulator());
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public Boolean read() {
-          try {
-            return flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor).value() == null;
-          } catch (Exception e) {
-            throw new RuntimeException("Error reading state.", e);
-          }
-
-        }
-
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-      };
-    }
-
-    @Override
-    public void clear() {
-      try {
-        flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).clear();
-      } catch (Exception e) {
-        throw new RuntimeException("Error clearing state.", e);
-      }
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkCombiningState<?, ?, ?, ?> that =
-          (FlinkCombiningState<?, ?, ?, ?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-  }
-
-  private static class FlinkKeyedCombiningState<K, InputT, AccumT, OutputT>
-      implements CombiningState<InputT, AccumT, OutputT> {
-
-    private final StateNamespace namespace;
-    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
-    private final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn;
-    private final ValueStateDescriptor<AccumT> flinkStateDescriptor;
-    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
-    private final FlinkStateInternals<K> flinkStateInternals;
-
-    FlinkKeyedCombiningState(
-        KeyedStateBackend<ByteBuffer> flinkStateBackend,
-        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-        Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn,
-        StateNamespace namespace,
-        Coder<AccumT> accumCoder,
-        FlinkStateInternals<K> flinkStateInternals) {
-
-      this.namespace = namespace;
-      this.address = address;
-      this.combineFn = combineFn;
-      this.flinkStateBackend = flinkStateBackend;
-      this.flinkStateInternals = flinkStateInternals;
-
-      CoderTypeInformation<AccumT> typeInfo = new CoderTypeInformation<>(accumCoder);
-
-      flinkStateDescriptor = new ValueStateDescriptor<>(address.getId(), typeInfo, null);
-    }
-
-    @Override
-    public CombiningState<InputT, AccumT, OutputT> readLater() {
-      return this;
-    }
-
-    @Override
-    public void add(InputT value) {
-      try {
-        org.apache.flink.api.common.state.ValueState<AccumT> state =
-            flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor);
-
-        AccumT current = state.value();
-        if (current == null) {
-          current = combineFn.createAccumulator(flinkStateInternals.getKey());
-        }
-        current = combineFn.addInput(flinkStateInternals.getKey(), current, value);
-        state.update(current);
-      } catch (Exception e) {
-        throw new RuntimeException("Error adding to state." , e);
-      }
-    }
-
-    @Override
-    public void addAccum(AccumT accum) {
-      try {
-        org.apache.flink.api.common.state.ValueState<AccumT> state =
-            flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor);
-
-        AccumT current = state.value();
-        if (current == null) {
-          state.update(accum);
-        } else {
-          current = combineFn.mergeAccumulators(
-              flinkStateInternals.getKey(),
-              Lists.newArrayList(current, accum));
-          state.update(current);
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Error adding to state.", e);
-      }
-    }
-
-    @Override
-    public AccumT getAccum() {
-      try {
-        return flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).value();
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
-      return combineFn.mergeAccumulators(flinkStateInternals.getKey(), accumulators);
-    }
-
-    @Override
-    public OutputT read() {
-      try {
-        org.apache.flink.api.common.state.ValueState<AccumT> state =
-            flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor);
-
-        AccumT accum = state.value();
-        if (accum != null) {
-          return combineFn.extractOutput(flinkStateInternals.getKey(), accum);
-        } else {
-          return combineFn.extractOutput(
-              flinkStateInternals.getKey(),
-              combineFn.createAccumulator(flinkStateInternals.getKey()));
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public Boolean read() {
-          try {
-            return flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor).value() == null;
-          } catch (Exception e) {
-            throw new RuntimeException("Error reading state.", e);
-          }
-
-        }
-
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-      };
-    }
-
-    @Override
-    public void clear() {
-      try {
-        flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).clear();
-      } catch (Exception e) {
-        throw new RuntimeException("Error clearing state.", e);
-      }
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkKeyedCombiningState<?, ?, ?, ?> that =
-          (FlinkKeyedCombiningState<?, ?, ?, ?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-  }
-
-  private static class FlinkCombiningStateWithContext<K, InputT, AccumT, OutputT>
-      implements CombiningState<InputT, AccumT, OutputT> {
-
-    private final StateNamespace namespace;
-    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
-    private final CombineWithContext.KeyedCombineFnWithContext<
-        ? super K, InputT, AccumT, OutputT> combineFn;
-    private final ValueStateDescriptor<AccumT> flinkStateDescriptor;
-    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
-    private final FlinkStateInternals<K> flinkStateInternals;
-    private final CombineWithContext.Context context;
-
-    FlinkCombiningStateWithContext(
-        KeyedStateBackend<ByteBuffer> flinkStateBackend,
-        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-        CombineWithContext.KeyedCombineFnWithContext<
-            ? super K, InputT, AccumT, OutputT> combineFn,
-        StateNamespace namespace,
-        Coder<AccumT> accumCoder,
-        FlinkStateInternals<K> flinkStateInternals,
-        CombineWithContext.Context context) {
-
-      this.namespace = namespace;
-      this.address = address;
-      this.combineFn = combineFn;
-      this.flinkStateBackend = flinkStateBackend;
-      this.flinkStateInternals = flinkStateInternals;
-      this.context = context;
-
-      CoderTypeInformation<AccumT> typeInfo = new CoderTypeInformation<>(accumCoder);
-
-      flinkStateDescriptor = new ValueStateDescriptor<>(address.getId(), typeInfo, null);
-    }
-
-    @Override
-    public CombiningState<InputT, AccumT, OutputT> readLater() {
-      return this;
-    }
-
-    @Override
-    public void add(InputT value) {
-      try {
-        org.apache.flink.api.common.state.ValueState<AccumT> state =
-            flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor);
-
-        AccumT current = state.value();
-        if (current == null) {
-          current = combineFn.createAccumulator(flinkStateInternals.getKey(), context);
-        }
-        current = combineFn.addInput(flinkStateInternals.getKey(), current, value, context);
-        state.update(current);
-      } catch (Exception e) {
-        throw new RuntimeException("Error adding to state." , e);
-      }
-    }
-
-    @Override
-    public void addAccum(AccumT accum) {
-      try {
-        org.apache.flink.api.common.state.ValueState<AccumT> state =
-            flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor);
-
-        AccumT current = state.value();
-        if (current == null) {
-          state.update(accum);
-        } else {
-          current = combineFn.mergeAccumulators(
-              flinkStateInternals.getKey(),
-              Lists.newArrayList(current, accum),
-              context);
-          state.update(current);
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Error adding to state.", e);
-      }
-    }
-
-    @Override
-    public AccumT getAccum() {
-      try {
-        return flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).value();
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
-      return combineFn.mergeAccumulators(flinkStateInternals.getKey(), accumulators, context);
-    }
-
-    @Override
-    public OutputT read() {
-      try {
-        org.apache.flink.api.common.state.ValueState<AccumT> state =
-            flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor);
-
-        AccumT accum = state.value();
-        return combineFn.extractOutput(flinkStateInternals.getKey(), accum, context);
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public Boolean read() {
-          try {
-            return flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor).value() == null;
-          } catch (Exception e) {
-            throw new RuntimeException("Error reading state.", e);
-          }
-
-        }
-
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-      };
-    }
-
-    @Override
-    public void clear() {
-      try {
-        flinkStateBackend.getPartitionedState(
-            namespace.stringKey(),
-            StringSerializer.INSTANCE,
-            flinkStateDescriptor).clear();
-      } catch (Exception e) {
-        throw new RuntimeException("Error clearing state.", e);
-      }
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkCombiningStateWithContext<?, ?, ?, ?> that =
-          (FlinkCombiningStateWithContext<?, ?, ?, ?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-  }
-
-  private static class FlinkWatermarkHoldState<K, W extends BoundedWindow>
-      implements WatermarkHoldState<W> {
-    private final StateTag<? super K, WatermarkHoldState<W>> address;
-    private final OutputTimeFn<? super W> outputTimeFn;
-    private final StateNamespace namespace;
-    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
-    private final FlinkStateInternals<K> flinkStateInternals;
-    private final ValueStateDescriptor<Instant> flinkStateDescriptor;
-
-    public FlinkWatermarkHoldState(
-        KeyedStateBackend<ByteBuffer> flinkStateBackend,
-        FlinkStateInternals<K> flinkStateInternals,
-        StateTag<? super K, WatermarkHoldState<W>> address,
-        StateNamespace namespace,
-        OutputTimeFn<? super W> outputTimeFn) {
-      this.address = address;
-      this.outputTimeFn = outputTimeFn;
-      this.namespace = namespace;
-      this.flinkStateBackend = flinkStateBackend;
-      this.flinkStateInternals = flinkStateInternals;
-
-      CoderTypeInformation<Instant> typeInfo = new CoderTypeInformation<>(InstantCoder.of());
-      flinkStateDescriptor = new ValueStateDescriptor<>(address.getId(), typeInfo, null);
-    }
-
-    @Override
-    public OutputTimeFn<? super W> getOutputTimeFn() {
-      return outputTimeFn;
-    }
-
-    @Override
-    public WatermarkHoldState<W> readLater() {
-      return this;
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public Boolean read() {
-          try {
-            return flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor).value() == null;
-          } catch (Exception e) {
-            throw new RuntimeException("Error reading state.", e);
-          }
-        }
-
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-      };
-
-    }
-
-    @Override
-    public void add(Instant value) {
-      try {
-        org.apache.flink.api.common.state.ValueState<Instant> state =
-            flinkStateBackend.getPartitionedState(
-              namespace.stringKey(),
-              StringSerializer.INSTANCE,
-              flinkStateDescriptor);
-
-        Instant current = state.value();
-        if (current == null) {
-          state.update(value);
-          flinkStateInternals.watermarkHolds.put(namespace.stringKey(), value);
-        } else {
-          Instant combined = outputTimeFn.combine(current, value);
-          state.update(combined);
-          flinkStateInternals.watermarkHolds.put(namespace.stringKey(), combined);
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Error updating state.", e);
-      }
-    }
-
-    @Override
-    public Instant read() {
-      try {
-        org.apache.flink.api.common.state.ValueState<Instant> state =
-            flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor);
-        return state.value();
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public void clear() {
-      flinkStateInternals.watermarkHolds.remove(namespace.stringKey());
-      try {
-        org.apache.flink.api.common.state.ValueState<Instant> state =
-            flinkStateBackend.getPartitionedState(
-                namespace.stringKey(),
-                StringSerializer.INSTANCE,
-                flinkStateDescriptor);
-        state.clear();
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkWatermarkHoldState<?, ?> that = (FlinkWatermarkHoldState<?, ?>) o;
-
-      if (!address.equals(that.address)) {
-        return false;
-      }
-      if (!outputTimeFn.equals(that.outputTimeFn)) {
-        return false;
-      }
-      return namespace.equals(that.namespace);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = address.hashCode();
-      result = 31 * result + outputTimeFn.hashCode();
-      result = 31 * result + namespace.hashCode();
-      return result;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupCheckpointedOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupCheckpointedOperator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupCheckpointedOperator.java
deleted file mode 100644
index b38a520..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupCheckpointedOperator.java
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
-
-import java.io.DataOutputStream;
-
-/**
- * This interface is used to checkpoint key-groups state.
- */
-public interface KeyGroupCheckpointedOperator extends KeyGroupRestoringOperator{
-  /**
-   * Snapshots the state for a given {@code keyGroupIdx}.
-   *
-   * <p>AbstractStreamOperator would call this hook in
-   * AbstractStreamOperator.snapshotState() while iterating over the key groups.
-   * @param keyGroupIndex the id of the key-group to be put in the snapshot.
-   * @param out the stream to write to.
-   */
-  void snapshotKeyGroupState(int keyGroupIndex, DataOutputStream out) throws Exception;
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupRestoringOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupRestoringOperator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupRestoringOperator.java
deleted file mode 100644
index 2bdfc6e..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupRestoringOperator.java
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
-
-import java.io.DataInputStream;
-
-/**
- * This interface is used to restore key-groups state.
- */
-public interface KeyGroupRestoringOperator {
-  /**
-   * Restore the state for a given {@code keyGroupIndex}.
-   * @param keyGroupIndex the id of the key-group to be put in the snapshot.
-   * @param in the stream to read from.
-   */
-  void restoreKeyGroupState(int keyGroupIndex, DataInputStream in) throws Exception;
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/package-info.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/package-info.java
deleted file mode 100644
index 0004e9e..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Internal state implementation of the Beam runner for Apache Flink.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming.state;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/resources/log4j.properties b/runners/flink/runner/src/main/resources/log4j.properties
deleted file mode 100644
index 4b6a708..0000000
--- a/runners/flink/runner/src/main/resources/log4j.properties
+++ /dev/null
@@ -1,23 +0,0 @@
-################################################################################
-#  Licensed to the Apache Software Foundation (ASF) under one
-#  or more contributor license agreements.  See the NOTICE file
-#  distributed with this work for additional information
-#  regarding copyright ownership.  The ASF licenses this file
-#  to you under the Apache License, Version 2.0 (the
-#  "License"); you may not use this file except in compliance
-#  with the License.  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-# limitations under the License.
-################################################################################
-
-log4j.rootLogger=OFF,console
-log4j.appender.console=org.apache.log4j.ConsoleAppender
-log4j.appender.console.target=System.err
-log4j.appender.console.layout=org.apache.log4j.PatternLayout
-log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/EncodedValueComparatorTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/EncodedValueComparatorTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/EncodedValueComparatorTest.java
deleted file mode 100644
index 10d6d9d..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/EncodedValueComparatorTest.java
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.runners.flink.translation.types.EncodedValueComparator;
-import org.apache.beam.runners.flink.translation.types.EncodedValueTypeInformation;
-import org.apache.beam.sdk.coders.CoderException;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.flink.api.common.ExecutionConfig;
-import org.apache.flink.api.common.typeutils.ComparatorTestBase;
-import org.apache.flink.api.common.typeutils.TypeComparator;
-import org.apache.flink.api.common.typeutils.TypeSerializer;
-import org.junit.Assert;
-
-/**
- * Test for {@link EncodedValueComparator}.
- */
-public class EncodedValueComparatorTest extends ComparatorTestBase<byte[]> {
-
-  @Override
-  protected TypeComparator<byte[]> createComparator(boolean ascending) {
-    return new EncodedValueTypeInformation().createComparator(ascending, new ExecutionConfig());
-  }
-
-  @Override
-  protected TypeSerializer<byte[]> createSerializer() {
-    return new EncodedValueTypeInformation().createSerializer(new ExecutionConfig());
-  }
-
-  @Override
-  protected void deepEquals(String message, byte[] should, byte[] is) {
-    Assert.assertArrayEquals(message, should, is);
-  }
-
-  @Override
-  protected byte[][] getSortedTestData() {
-    StringUtf8Coder coder = StringUtf8Coder.of();
-
-    try {
-      return new byte[][]{
-          CoderUtils.encodeToByteArray(coder, ""),
-          CoderUtils.encodeToByteArray(coder, "Lorem Ipsum Dolor Omit Longer"),
-          CoderUtils.encodeToByteArray(coder, "aaaa"),
-          CoderUtils.encodeToByteArray(coder, "abcd"),
-          CoderUtils.encodeToByteArray(coder, "abce"),
-          CoderUtils.encodeToByteArray(coder, "abdd"),
-          CoderUtils.encodeToByteArray(coder, "accd"),
-          CoderUtils.encodeToByteArray(coder, "bbcd")
-      };
-    } catch (CoderException e) {
-      throw new RuntimeException("Could not encode values.", e);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkRunnerRegistrarTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkRunnerRegistrarTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkRunnerRegistrarTest.java
deleted file mode 100644
index d9d174c..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkRunnerRegistrarTest.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.flink;
-
-import static org.junit.Assert.assertEquals;
-
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.junit.Test;
-
-/**
- * Tests the proper registration of the Flink runner.
- */
-public class FlinkRunnerRegistrarTest {
-
-  @Test
-  public void testFullName() {
-    String[] args =
-        new String[] {String.format("--runner=%s", FlinkRunner.class.getName())};
-    PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create();
-    assertEquals(opts.getRunner(), FlinkRunner.class);
-  }
-
-  @Test
-  public void testClassName() {
-    String[] args =
-        new String[] {String.format("--runner=%s", FlinkRunner.class.getSimpleName())};
-    PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create();
-    assertEquals(opts.getRunner(), FlinkRunner.class);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
deleted file mode 100644
index d6240c4..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.PipelineResult;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.runners.PipelineRunner;
-
-/**
- * {@link org.apache.beam.sdk.Pipeline} for testing Dataflow programs on the
- * {@link FlinkRunner}.
- */
-public class FlinkTestPipeline extends Pipeline {
-
-  /**
-   * Creates and returns a new test pipeline for batch execution.
-   *
-   * <p>Use {@link org.apache.beam.sdk.testing.PAssert} to add tests, then call
-   * {@link Pipeline#run} to execute the pipeline and check the tests.
-   */
-  public static FlinkTestPipeline createForBatch() {
-    return create(false);
-  }
-
-  /**
-   * Creates and returns a new test pipeline for streaming execution.
-   *
-   * <p>Use {@link org.apache.beam.sdk.testing.PAssert} to add tests, then call
-   * {@link Pipeline#run} to execute the pipeline and check the tests.
-   *
-   * @return The Test Pipeline
-   */
-  public static FlinkTestPipeline createForStreaming() {
-    return create(true);
-  }
-
-  /**
-   * Creates and returns a new test pipeline for streaming or batch execution.
-   *
-   * <p>Use {@link org.apache.beam.sdk.testing.PAssert} to add tests, then call
-   * {@link Pipeline#run} to execute the pipeline and check the tests.
-   *
-   * @param streaming <code>True</code> for streaming mode, <code>False</code> for batch.
-   * @return The Test Pipeline.
-   */
-  private static FlinkTestPipeline create(boolean streaming) {
-    TestFlinkRunner flinkRunner = TestFlinkRunner.create(streaming);
-    return new FlinkTestPipeline(flinkRunner, flinkRunner.getPipelineOptions());
-  }
-
-  private FlinkTestPipeline(PipelineRunner<? extends PipelineResult> runner,
-              PipelineOptions options) {
-    super(runner, options);
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/PipelineOptionsTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/PipelineOptionsTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/PipelineOptionsTest.java
deleted file mode 100644
index 06187f6..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/PipelineOptionsTest.java
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
-import java.util.Collections;
-import java.util.HashMap;
-import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.commons.lang3.SerializationUtils;
-import org.apache.flink.api.common.ExecutionConfig;
-import org.apache.flink.api.common.typeinfo.TypeHint;
-import org.apache.flink.api.common.typeinfo.TypeInformation;
-import org.apache.flink.runtime.state.memory.MemoryStateBackend;
-import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
-import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness;
-import org.joda.time.Instant;
-import org.junit.Assert;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-/**
- * Tests for serialization and deserialization of {@link PipelineOptions} in {@link DoFnOperator}.
- */
-public class PipelineOptionsTest {
-
-  /**
-   * Pipeline options.
-   */
-  public interface MyOptions extends FlinkPipelineOptions {
-    @Description("Bla bla bla")
-    @Default.String("Hello")
-    String getTestOption();
-    void setTestOption(String value);
-  }
-
-  private static MyOptions options;
-  private static SerializedPipelineOptions serializedOptions;
-
-  private static final String[] args = new String[]{"--testOption=nothing"};
-
-  @BeforeClass
-  public static void beforeTest() {
-    options = PipelineOptionsFactory.fromArgs(args).as(MyOptions.class);
-    serializedOptions = new SerializedPipelineOptions(options);
-  }
-
-  @Test
-  public void testDeserialization() {
-    MyOptions deserializedOptions = serializedOptions.getPipelineOptions().as(MyOptions.class);
-    assertEquals("nothing", deserializedOptions.getTestOption());
-  }
-
-  @Test
-  public void testIgnoredFieldSerialization() {
-    FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
-    options.setStateBackend(new MemoryStateBackend());
-
-    FlinkPipelineOptions deserialized =
-        new SerializedPipelineOptions(options).getPipelineOptions().as(FlinkPipelineOptions.class);
-
-    assertNull(deserialized.getStateBackend());
-  }
-
-  @Test
-  public void testCaching() {
-    PipelineOptions deserializedOptions =
-        serializedOptions.getPipelineOptions().as(PipelineOptions.class);
-
-    assertNotNull(deserializedOptions);
-    assertTrue(deserializedOptions == serializedOptions.getPipelineOptions());
-    assertTrue(deserializedOptions == serializedOptions.getPipelineOptions());
-    assertTrue(deserializedOptions == serializedOptions.getPipelineOptions());
-  }
-
-  @Test(expected = Exception.class)
-  public void testNonNull() {
-    new SerializedPipelineOptions(null);
-  }
-
-  @Test(expected = Exception.class)
-  public void parDoBaseClassPipelineOptionsNullTest() {
-    DoFnOperator<String, String, String> doFnOperator = new DoFnOperator<>(
-        new TestDoFn(),
-        WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()),
-        new TupleTag<String>("main-output"),
-        Collections.<TupleTag<?>>emptyList(),
-        new DoFnOperator.DefaultOutputManagerFactory<String>(),
-        WindowingStrategy.globalDefault(),
-        new HashMap<Integer, PCollectionView<?>>(),
-        Collections.<PCollectionView<?>>emptyList(),
-        null,
-        null);
-
-  }
-
-  /**
-   * Tests that PipelineOptions are present after serialization.
-   */
-  @Test
-  public void parDoBaseClassPipelineOptionsSerializationTest() throws Exception {
-
-    DoFnOperator<String, String, String> doFnOperator = new DoFnOperator<>(
-        new TestDoFn(),
-        WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()),
-        new TupleTag<String>("main-output"),
-        Collections.<TupleTag<?>>emptyList(),
-        new DoFnOperator.DefaultOutputManagerFactory<String>(),
-        WindowingStrategy.globalDefault(),
-        new HashMap<Integer, PCollectionView<?>>(),
-        Collections.<PCollectionView<?>>emptyList(),
-        options,
-        null);
-
-    final byte[] serialized = SerializationUtils.serialize(doFnOperator);
-
-    @SuppressWarnings("unchecked")
-    DoFnOperator<Object, Object, Object> deserialized =
-        (DoFnOperator<Object, Object, Object>) SerializationUtils.deserialize(serialized);
-
-    TypeInformation<WindowedValue<Object>> typeInformation = TypeInformation.of(
-        new TypeHint<WindowedValue<Object>>() {});
-
-    OneInputStreamOperatorTestHarness<WindowedValue<Object>, Object> testHarness =
-        new OneInputStreamOperatorTestHarness<>(deserialized,
-            typeInformation.createSerializer(new ExecutionConfig()));
-
-    testHarness.open();
-
-    // execute once to access options
-    testHarness.processElement(new StreamRecord<>(
-        WindowedValue.of(
-            new Object(),
-            Instant.now(),
-            GlobalWindow.INSTANCE,
-            PaneInfo.NO_FIRING)));
-
-    testHarness.close();
-
-  }
-
-
-  private static class TestDoFn extends DoFn<String, String> {
-
-    @ProcessElement
-    public void processElement(ProcessContext c) throws Exception {
-      Assert.assertNotNull(c.getPipelineOptions());
-      Assert.assertEquals(
-          options.getTestOption(),
-          c.getPipelineOptions().as(MyOptions.class).getTestOption());
-    }
-  }
-}

[22/50] [abbrv] beam git commit: Pin default commons-compress version to beam-parent pom

Posted by dh...@apache.org.

Pin default commons-compress version to beam-parent pom


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/28b692d5
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/28b692d5
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/28b692d5

Branch: refs/heads/DSL_SQL
Commit: 28b692d596638926964d3099045067d1d4e7de38
Parents: 470808c
Author: Isma�l Mej�a <ie...@apache.org>
Authored: Wed Apr 19 12:05:41 2017 +0200
Committer: Isma�l Mej�a <ie...@apache.org>
Committed: Wed Apr 19 12:05:41 2017 +0200

----------------------------------------------------------------------
 pom.xml                      | 7 +++++++
 runners/flink/runner/pom.xml | 1 -
 runners/spark/pom.xml        | 1 -
 sdks/java/core/pom.xml       | 1 -
 4 files changed, 7 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/28b692d5/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 306978d..09659db 100644
--- a/pom.xml
+++ b/pom.xml
@@ -102,6 +102,7 @@
 
     <!-- If updating dependencies, please update any relevant javadoc offlineLinks -->
     <apache.commons.lang.version>3.5</apache.commons.lang.version>
+    <apache.commons.compress.version>1.9</apache.commons.compress.version>
     <apex.kryo.version>2.24.0</apex.kryo.version>
     <avro.version>1.8.1</avro.version>
     <bigquery.version>v2-rev295-1.22.0</bigquery.version>
@@ -514,6 +515,12 @@
 
       <dependency>
         <groupId>org.apache.commons</groupId>
+        <artifactId>commons-compress</artifactId>
+        <version>${apache.commons.compress.version}</version>
+      </dependency>
+
+      <dependency>
+        <groupId>org.apache.commons</groupId>
         <artifactId>commons-lang3</artifactId>
         <version>${apache.commons.lang.version}</version>
       </dependency>

http://git-wip-us.apache.org/repos/asf/beam/blob/28b692d5/runners/flink/runner/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/runner/pom.xml b/runners/flink/runner/pom.xml
index 1e6452d..18343ef 100644
--- a/runners/flink/runner/pom.xml
+++ b/runners/flink/runner/pom.xml
@@ -238,7 +238,6 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-compress</artifactId>
-      <version>[1.9,)</version>
       <scope>runtime</scope>
     </dependency>
 

http://git-wip-us.apache.org/repos/asf/beam/blob/28b692d5/runners/spark/pom.xml
----------------------------------------------------------------------
diff --git a/runners/spark/pom.xml b/runners/spark/pom.xml
index 514cb43..7493485 100644
--- a/runners/spark/pom.xml
+++ b/runners/spark/pom.xml
@@ -196,7 +196,6 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-compress</artifactId>
-      <version>1.9</version>
       <scope>provided</scope>
     </dependency>
     <dependency>

http://git-wip-us.apache.org/repos/asf/beam/blob/28b692d5/sdks/java/core/pom.xml
----------------------------------------------------------------------
diff --git a/sdks/java/core/pom.xml b/sdks/java/core/pom.xml
index 2b12481..dc80a2c 100644
--- a/sdks/java/core/pom.xml
+++ b/sdks/java/core/pom.xml
@@ -247,7 +247,6 @@
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-compress</artifactId>
-      <version>1.9</version>
     </dependency>
 
     <dependency>

[50/50] [abbrv] beam git commit: This closes #2584

Posted by dh...@apache.org.

This closes #2584


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/aa07a1d4
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/aa07a1d4
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/aa07a1d4

Branch: refs/heads/DSL_SQL
Commit: aa07a1d41b71c810c4968bd35e5ca69dc56b8c30
Parents: ca87603 19ae877
Author: Dan Halperin <dh...@google.com>
Authored: Wed Apr 19 12:14:05 2017 -0700
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Apr 19 12:14:05 2017 -0700

----------------------------------------------------------------------
 .jenkins/common_job_properties.groovy           |  261 ----
 .../job_beam_PerformanceTests_Dataflow.groovy   |   43 -
 .jenkins/job_beam_PerformanceTests_JDBC.groovy  |   60 -
 .jenkins/job_beam_PerformanceTests_Spark.groovy |   44 -
 ...job_beam_PostCommit_Java_MavenInstall.groovy |   42 -
 ..._PostCommit_Java_ValidatesRunner_Apex.groovy |   48 -
 ...tCommit_Java_ValidatesRunner_Dataflow.groovy |   45 -
 ...PostCommit_Java_ValidatesRunner_Flink.groovy |   43 -
 ...tCommit_Java_ValidatesRunner_Gearpump.groovy |   49 -
 ...PostCommit_Java_ValidatesRunner_Spark.groovy |   44 -
 .../job_beam_PostCommit_Python_Verify.groovy    |   55 -
 .../job_beam_PreCommit_Java_MavenInstall.groovy |   42 -
 .../job_beam_PreCommit_Website_Stage.groovy     |   80 -
 .jenkins/job_beam_PreCommit_Website_Test.groovy |   65 -
 .../job_beam_Release_NightlySnapshot.groovy     |   45 -
 .jenkins/job_seed.groovy                        |   53 -
 .../jenkins/common_job_properties.groovy        |  261 ++++
 .../job_beam_PerformanceTests_Dataflow.groovy   |   43 +
 .../job_beam_PerformanceTests_JDBC.groovy       |   60 +
 .../job_beam_PerformanceTests_Spark.groovy      |   44 +
 ...job_beam_PostCommit_Java_MavenInstall.groovy |   42 +
 ..._PostCommit_Java_ValidatesRunner_Apex.groovy |   48 +
 ...tCommit_Java_ValidatesRunner_Dataflow.groovy |   45 +
 ...PostCommit_Java_ValidatesRunner_Flink.groovy |   43 +
 ...tCommit_Java_ValidatesRunner_Gearpump.groovy |   49 +
 ...PostCommit_Java_ValidatesRunner_Spark.groovy |   44 +
 .../job_beam_PostCommit_Python_Verify.groovy    |   55 +
 .../job_beam_PreCommit_Java_MavenInstall.groovy |   42 +
 .../job_beam_PreCommit_Website_Stage.groovy     |   80 +
 .../job_beam_PreCommit_Website_Test.groovy      |   65 +
 .../job_beam_Release_NightlySnapshot.groovy     |   45 +
 .test-infra/jenkins/job_seed.groovy             |   53 +
 .../cassandra-service-for-local-dev.yaml        |   28 +
 .../cassandra-svc-statefulset.yaml              |  114 ++
 .../LargeITCluster/cassandra-svc-temp.yaml      |   74 +
 .../cassandra/LargeITCluster/data-load.sh       |  122 ++
 .../cassandra/LargeITCluster/show_health.sh     |   47 +
 .../cassandra/LargeITCluster/start-up.sh        |   22 +
 .../cassandra/LargeITCluster/teardown.sh        |   25 +
 .../cassandra-service-for-local-dev.yaml        |   30 +
 .../SmallITCluster/cassandra-svc-rc.yaml        |   74 +
 .../cassandra/SmallITCluster/data-load.sh       |   86 ++
 .../cassandra/SmallITCluster/show_health.sh     |   47 +
 .../cassandra/SmallITCluster/start-up.sh        |   23 +
 .../cassandra/SmallITCluster/teardown.sh        |   22 +
 .../kubernetes/cassandra/data-load-setup.sh     |   29 +
 .../elasticsearch-service-for-local-dev.yaml    |   33 +
 .../es-services-deployments.yaml                |  258 ++++
 .../LargeProductionCluster/start-up.sh          |   22 +
 .../LargeProductionCluster/teardown.sh          |   21 +
 .../elasticsearch-service-for-local-dev.yaml    |   34 +
 .../SmallITCluster/elasticsearch-svc-rc.yaml    |   96 ++
 .../elasticsearch/SmallITCluster/start-up.sh    |   23 +
 .../elasticsearch/SmallITCluster/teardown.sh    |   21 +
 .../kubernetes/elasticsearch/data-load-setup.sh |   26 +
 .../kubernetes/elasticsearch/data-load.sh       |   33 +
 .../kubernetes/elasticsearch/es_test_data.py    |  299 ++++
 .../kubernetes/elasticsearch/show-health.sh     |   33 +
 .../postgres/postgres-service-for-local-dev.yml |   28 +
 .test-infra/kubernetes/postgres/postgres.yml    |   56 +
 .test-infra/travis/README.md                    |   23 +
 .test-infra/travis/settings.xml                 |   33 +
 .test-infra/travis/test_wordcount.sh            |  125 ++
 .travis.yml                                     |    4 +-
 .travis/README.md                               |   23 -
 .travis/settings.xml                            |   33 -
 .travis/test_wordcount.sh                       |  125 --
 examples/java/pom.xml                           |    5 +
 examples/java8/pom.xml                          |   39 +
 pom.xml                                         |   13 +
 runners/apex/pom.xml                            |    1 +
 .../apache/beam/runners/apex/ApexRunner.java    |   32 +-
 .../beam/runners/apex/ApexYarnLauncher.java     |    2 -
 .../apex/translation/ParDoTranslator.java       |    6 +-
 .../operators/ApexGroupByKeyOperator.java       |  226 +--
 .../operators/ApexParDoOperator.java            |   51 +-
 .../apex/translation/utils/NoOpStepContext.java |    2 +-
 .../utils/SerializablePipelineOptions.java      |   13 +-
 .../apex/translation/utils/ValuesSource.java    |    2 +-
 .../apex/examples/UnboundedTextSource.java      |    2 +-
 .../translation/GroupByKeyTranslatorTest.java   |    2 +-
 .../apex/translation/ParDoTranslatorTest.java   |   18 +-
 .../translation/utils/CollectionSource.java     |    2 +-
 runners/core-construction-java/pom.xml          |    2 +-
 .../beam/runners/core/construction/Coders.java  |   26 +-
 .../DeduplicatedFlattenFactory.java             |   63 +-
 .../EmptyFlattenAsCreateFactory.java            |   20 +-
 .../runners/core/construction/PCollections.java |   97 ++
 .../core/construction/PTransformMatchers.java   |    2 -
 .../construction/PTransformReplacements.java    |   69 +
 .../runners/core/construction/PTransforms.java  |  107 ++
 .../core/construction/PrimitiveCreate.java      |   13 +-
 .../core/construction/SdkComponents.java        |   49 +-
 .../SingleInputOutputOverrideFactory.java       |    9 +-
 .../runners/core/construction/Triggers.java     |  336 +++++
 .../UnboundedReadFromBoundedSource.java         |    7 +-
 .../core/construction/UnconsumedReads.java      |   72 +
 .../UnsupportedOverrideFactory.java             |   14 +-
 .../core/construction/WindowingStrategies.java  |    1 -
 .../runners/core/construction/CodersTest.java   |  144 +-
 .../DeduplicatedFlattenFactoryTest.java         |   18 +-
 .../EmptyFlattenAsCreateFactoryTest.java        |   36 +-
 .../core/construction/PCollectionsTest.java     |  188 +++
 .../PTransformReplacementsTest.java             |  131 ++
 .../core/construction/PTransformsTest.java      |  189 +++
 .../core/construction/SdkComponentsTest.java    |   85 +-
 .../SingleInputOutputOverrideFactoryTest.java   |   31 +-
 .../runners/core/construction/TriggersTest.java |  111 ++
 .../core/construction/UnconsumedReadsTest.java  |  105 ++
 .../UnsupportedOverrideFactoryTest.java         |   11 +-
 runners/core-java/pom.xml                       |    5 +
 .../beam/runners/core/BaseExecutionContext.java |   13 +-
 .../apache/beam/runners/core/DoFnAdapters.java  |   16 +-
 .../apache/beam/runners/core/DoFnRunners.java   |   40 +-
 .../beam/runners/core/ExecutionContext.java     |   13 +-
 .../GroupAlsoByWindowViaOutputBufferDoFn.java   |    2 +-
 .../core/GroupAlsoByWindowViaWindowSetDoFn.java |    2 +-
 .../GroupAlsoByWindowViaWindowSetNewDoFn.java   |    8 +-
 .../org/apache/beam/runners/core/OldDoFn.java   |   41 +-
 ...eBoundedSplittableProcessElementInvoker.java |    8 +-
 .../beam/runners/core/OutputWindowedValue.java  |   10 +-
 .../beam/runners/core/ProcessFnRunner.java      |  127 ++
 .../core/PushbackSideInputDoFnRunner.java       |  106 +-
 .../beam/runners/core/SimpleDoFnRunner.java     |   54 +-
 .../beam/runners/core/SimpleOldDoFnRunner.java  |   63 +-
 .../core/SimplePushbackSideInputDoFnRunner.java |  115 ++
 .../beam/runners/core/SplittableParDo.java      |  131 +-
 .../beam/runners/core/WindowingInternals.java   |    8 +-
 .../core/WindowingInternalsAdapters.java        |    8 +-
 .../core/GroupAlsoByWindowsProperties.java      |   10 +-
 .../apache/beam/runners/core/NoOpOldDoFn.java   |    4 +-
 .../apache/beam/runners/core/OldDoFnTest.java   |    4 +-
 ...ndedSplittableProcessElementInvokerTest.java |    6 +-
 .../core/PushbackSideInputDoFnRunnerTest.java   |  282 ----
 .../beam/runners/core/ReduceFnTester.java       |   10 +-
 .../runners/core/SimpleOldDoFnRunnerTest.java   |    4 +-
 .../SimplePushbackSideInputDoFnRunnerTest.java  |  282 ++++
 .../beam/runners/core/SplittableParDoTest.java  |   98 +-
 runners/direct-java/pom.xml                     |    1 +
 .../direct/BoundedReadEvaluatorFactory.java     |    2 +-
 ...ectGBKIntoKeyedWorkItemsOverrideFactory.java |   16 +-
 .../direct/DirectGroupByKeyOverrideFactory.java |   14 +-
 ...ecycleManagerRemovingTransformEvaluator.java |    6 +-
 .../GroupAlsoByWindowEvaluatorFactory.java      |   10 +-
 .../beam/runners/direct/ModelEnforcement.java   |   13 +-
 .../beam/runners/direct/ParDoEvaluator.java     |  129 +-
 .../runners/direct/ParDoEvaluatorFactory.java   |   23 +-
 .../direct/ParDoMultiOverrideFactory.java       |   24 +-
 ...littableProcessElementsEvaluatorFactory.java |  108 +-
 .../direct/StatefulParDoEvaluatorFactory.java   |    6 +-
 .../direct/TestStreamEvaluatorFactory.java      |   14 +-
 .../direct/TransformEvaluatorFactory.java       |   10 +-
 .../direct/TransformEvaluatorRegistry.java      |    4 +-
 .../direct/UnboundedReadEvaluatorFactory.java   |    2 +-
 .../runners/direct/ViewOverrideFactory.java     |   18 +-
 .../direct/WriteWithShardingFactory.java        |   16 +-
 .../direct/BoundedReadEvaluatorFactoryTest.java |    4 +-
 .../DirectGroupByKeyOverrideFactoryTest.java    |   12 +-
 .../beam/runners/direct/DirectRunnerTest.java   |    4 +-
 ...leManagerRemovingTransformEvaluatorTest.java |    8 +-
 .../beam/runners/direct/ParDoEvaluatorTest.java |   13 +-
 .../direct/ParDoMultiOverrideFactoryTest.java   |   45 -
 .../direct/TestStreamEvaluatorFactoryTest.java  |   12 -
 .../UnboundedReadEvaluatorFactoryTest.java      |    2 +-
 .../runners/direct/ViewOverrideFactoryTest.java |   42 +-
 .../direct/WriteWithShardingFactoryTest.java    |   23 +-
 runners/flink/examples/pom.xml                  |  126 --
 .../beam/runners/flink/examples/TFIDF.java      |  455 ------
 .../beam/runners/flink/examples/WordCount.java  |  129 --
 .../runners/flink/examples/package-info.java    |   22 -
 .../flink/examples/streaming/AutoComplete.java  |  400 -----
 .../flink/examples/streaming/JoinExamples.java  |  154 --
 .../examples/streaming/KafkaIOExamples.java     |  338 -----
 .../KafkaWindowedWordCountExample.java          |  164 --
 .../examples/streaming/WindowedWordCount.java   |  141 --
 .../flink/examples/streaming/package-info.java  |   22 -
 runners/flink/pom.xml                           |  320 +++-
 runners/flink/runner/pom.xml                    |  311 ----
 .../flink/DefaultParallelismFactory.java        |   39 -
 .../flink/FlinkBatchPipelineTranslator.java     |  139 --
 .../flink/FlinkBatchTransformTranslators.java   |  723 ---------
 .../flink/FlinkBatchTranslationContext.java     |  153 --
 .../flink/FlinkDetachedRunnerResult.java        |   76 -
 .../FlinkPipelineExecutionEnvironment.java      |  241 ---
 .../runners/flink/FlinkPipelineOptions.java     |  101 --
 .../runners/flink/FlinkPipelineTranslator.java  |   53 -
 .../apache/beam/runners/flink/FlinkRunner.java  |  232 ---
 .../runners/flink/FlinkRunnerRegistrar.java     |   62 -
 .../beam/runners/flink/FlinkRunnerResult.java   |   98 --
 .../flink/FlinkStreamingPipelineTranslator.java |  272 ----
 .../FlinkStreamingTransformTranslators.java     | 1107 --------------
 .../flink/FlinkStreamingTranslationContext.java |  130 --
 .../flink/FlinkStreamingViewOverrides.java      |  372 -----
 .../flink/PipelineTranslationOptimizer.java     |   72 -
 .../beam/runners/flink/TestFlinkRunner.java     |   84 --
 .../beam/runners/flink/TranslationMode.java     |   31 -
 .../apache/beam/runners/flink/package-info.java |   22 -
 .../functions/FlinkAggregatorFactory.java       |   53 -
 .../functions/FlinkAssignContext.java           |   63 -
 .../functions/FlinkAssignWindows.java           |   49 -
 .../functions/FlinkDoFnFunction.java            |  161 --
 .../FlinkMergingNonShuffleReduceFunction.java   |  228 ---
 .../FlinkMergingPartialReduceFunction.java      |  201 ---
 .../functions/FlinkMergingReduceFunction.java   |  199 ---
 .../FlinkMultiOutputPruningFunction.java        |   50 -
 .../functions/FlinkNoOpStepContext.java         |   73 -
 .../functions/FlinkPartialReduceFunction.java   |  172 ---
 .../functions/FlinkReduceFunction.java          |  173 ---
 .../functions/FlinkSideInputReader.java         |   80 -
 .../functions/FlinkStatefulDoFnFunction.java    |  198 ---
 .../functions/SideInputInitializer.java         |   73 -
 .../translation/functions/package-info.java     |   22 -
 .../runners/flink/translation/package-info.java |   22 -
 .../translation/types/CoderTypeInformation.java |  120 --
 .../translation/types/CoderTypeSerializer.java  |  132 --
 .../types/EncodedValueComparator.java           |  195 ---
 .../types/EncodedValueSerializer.java           |  113 --
 .../types/EncodedValueTypeInformation.java      |  107 --
 .../flink/translation/types/FlinkCoder.java     |   63 -
 .../types/InspectableByteArrayOutputStream.java |   34 -
 .../flink/translation/types/KvKeySelector.java  |   50 -
 .../flink/translation/types/package-info.java   |   22 -
 .../utils/SerializedPipelineOptions.java        |   65 -
 .../flink/translation/utils/package-info.java   |   22 -
 .../wrappers/DataInputViewWrapper.java          |   58 -
 .../wrappers/DataOutputViewWrapper.java         |   51 -
 .../SerializableFnAggregatorWrapper.java        |   98 --
 .../translation/wrappers/SourceInputFormat.java |  149 --
 .../translation/wrappers/SourceInputSplit.java  |   52 -
 .../translation/wrappers/package-info.java      |   22 -
 .../wrappers/streaming/DoFnOperator.java        |  772 ----------
 .../streaming/KvToByteBufferKeySelector.java    |   56 -
 .../streaming/SingletonKeyedWorkItem.java       |   58 -
 .../streaming/SingletonKeyedWorkItemCoder.java  |  128 --
 .../streaming/SplittableDoFnOperator.java       |  150 --
 .../wrappers/streaming/WindowDoFnOperator.java  |  120 --
 .../wrappers/streaming/WorkItemKeySelector.java |   56 -
 .../streaming/io/BoundedSourceWrapper.java      |  218 ---
 .../streaming/io/UnboundedFlinkSink.java        |  200 ---
 .../streaming/io/UnboundedFlinkSource.java      |  120 --
 .../streaming/io/UnboundedSocketSource.java     |  249 ---
 .../streaming/io/UnboundedSourceWrapper.java    |  476 ------
 .../wrappers/streaming/io/package-info.java     |   22 -
 .../wrappers/streaming/package-info.java        |   22 -
 .../state/FlinkBroadcastStateInternals.java     |  865 -----------
 .../state/FlinkKeyGroupStateInternals.java      |  487 ------
 .../state/FlinkSplitStateInternals.java         |  260 ----
 .../streaming/state/FlinkStateInternals.java    | 1053 -------------
 .../state/KeyGroupCheckpointedOperator.java     |   35 -
 .../state/KeyGroupRestoringOperator.java        |   32 -
 .../wrappers/streaming/state/package-info.java  |   22 -
 .../runner/src/main/resources/log4j.properties  |   23 -
 .../flink/EncodedValueComparatorTest.java       |   70 -
 .../runners/flink/FlinkRunnerRegistrarTest.java |   48 -
 .../beam/runners/flink/FlinkTestPipeline.java   |   72 -
 .../beam/runners/flink/PipelineOptionsTest.java |  184 ---
 .../beam/runners/flink/ReadSourceITCase.java    |   85 --
 .../flink/ReadSourceStreamingITCase.java        |   74 -
 .../beam/runners/flink/WriteSinkITCase.java     |  192 ---
 .../flink/streaming/DoFnOperatorTest.java       |  600 --------
 .../FlinkBroadcastStateInternalsTest.java       |  245 ---
 .../FlinkKeyGroupStateInternalsTest.java        |  262 ----
 .../streaming/FlinkSplitStateInternalsTest.java |  101 --
 .../streaming/FlinkStateInternalsTest.java      |  395 -----
 .../flink/streaming/GroupByNullKeyTest.java     |  124 --
 .../flink/streaming/TestCountingSource.java     |  254 ----
 .../streaming/TopWikipediaSessionsITCase.java   |  133 --
 .../streaming/UnboundedSourceWrapperTest.java   |  464 ------
 .../runners/flink/streaming/package-info.java   |   22 -
 .../src/test/resources/log4j-test.properties    |   27 -
 .../flink/DefaultParallelismFactory.java        |   39 +
 .../flink/FlinkBatchPipelineTranslator.java     |  139 ++
 .../flink/FlinkBatchTransformTranslators.java   |  723 +++++++++
 .../flink/FlinkBatchTranslationContext.java     |  153 ++
 .../flink/FlinkDetachedRunnerResult.java        |   75 +
 .../FlinkPipelineExecutionEnvironment.java      |  241 +++
 .../runners/flink/FlinkPipelineOptions.java     |  101 ++
 .../runners/flink/FlinkPipelineTranslator.java  |   53 +
 .../apache/beam/runners/flink/FlinkRunner.java  |  232 +++
 .../runners/flink/FlinkRunnerRegistrar.java     |   62 +
 .../beam/runners/flink/FlinkRunnerResult.java   |   98 ++
 .../flink/FlinkStreamingPipelineTranslator.java |  276 ++++
 .../FlinkStreamingTransformTranslators.java     | 1044 +++++++++++++
 .../flink/FlinkStreamingTranslationContext.java |  130 ++
 .../flink/FlinkStreamingViewOverrides.java      |  372 +++++
 .../flink/PipelineTranslationOptimizer.java     |   72 +
 .../beam/runners/flink/TestFlinkRunner.java     |   84 ++
 .../beam/runners/flink/TranslationMode.java     |   31 +
 .../apache/beam/runners/flink/package-info.java |   22 +
 .../functions/FlinkAggregatorFactory.java       |   53 +
 .../functions/FlinkAssignContext.java           |   63 +
 .../functions/FlinkAssignWindows.java           |   49 +
 .../functions/FlinkDoFnFunction.java            |  161 ++
 .../FlinkMergingNonShuffleReduceFunction.java   |  228 +++
 .../FlinkMergingPartialReduceFunction.java      |  201 +++
 .../functions/FlinkMergingReduceFunction.java   |  199 +++
 .../FlinkMultiOutputPruningFunction.java        |   50 +
 .../functions/FlinkNoOpStepContext.java         |   73 +
 .../functions/FlinkPartialReduceFunction.java   |  172 +++
 .../functions/FlinkReduceFunction.java          |  173 +++
 .../functions/FlinkSideInputReader.java         |   80 +
 .../functions/FlinkStatefulDoFnFunction.java    |  198 +++
 .../functions/SideInputInitializer.java         |   73 +
 .../translation/functions/package-info.java     |   22 +
 .../runners/flink/translation/package-info.java |   22 +
 .../translation/types/CoderTypeInformation.java |  120 ++
 .../translation/types/CoderTypeSerializer.java  |  132 ++
 .../types/EncodedValueComparator.java           |  195 +++
 .../types/EncodedValueSerializer.java           |  113 ++
 .../types/EncodedValueTypeInformation.java      |   98 ++
 .../types/InspectableByteArrayOutputStream.java |   34 +
 .../flink/translation/types/KvKeySelector.java  |   50 +
 .../flink/translation/types/package-info.java   |   22 +
 .../utils/SerializedPipelineOptions.java        |   67 +
 .../flink/translation/utils/package-info.java   |   22 +
 .../wrappers/DataInputViewWrapper.java          |   58 +
 .../wrappers/DataOutputViewWrapper.java         |   51 +
 .../SerializableFnAggregatorWrapper.java        |   98 ++
 .../translation/wrappers/SourceInputFormat.java |  150 ++
 .../translation/wrappers/SourceInputSplit.java  |   52 +
 .../translation/wrappers/package-info.java      |   22 +
 .../wrappers/streaming/DoFnOperator.java        |  774 ++++++++++
 .../streaming/KvToByteBufferKeySelector.java    |   56 +
 .../streaming/SingletonKeyedWorkItem.java       |   56 +
 .../streaming/SingletonKeyedWorkItemCoder.java  |  126 ++
 .../streaming/SplittableDoFnOperator.java       |  150 ++
 .../wrappers/streaming/WindowDoFnOperator.java  |  117 ++
 .../wrappers/streaming/WorkItemKeySelector.java |   56 +
 .../streaming/io/BoundedSourceWrapper.java      |  218 +++
 .../streaming/io/UnboundedSocketSource.java     |  249 +++
 .../streaming/io/UnboundedSourceWrapper.java    |  476 ++++++
 .../wrappers/streaming/io/package-info.java     |   22 +
 .../wrappers/streaming/package-info.java        |   22 +
 .../state/FlinkBroadcastStateInternals.java     |  865 +++++++++++
 .../state/FlinkKeyGroupStateInternals.java      |  487 ++++++
 .../state/FlinkSplitStateInternals.java         |  260 ++++
 .../streaming/state/FlinkStateInternals.java    | 1053 +++++++++++++
 .../state/KeyGroupCheckpointedOperator.java     |   35 +
 .../state/KeyGroupRestoringOperator.java        |   32 +
 .../wrappers/streaming/state/package-info.java  |   22 +
 .../flink/src/main/resources/log4j.properties   |   23 +
 .../flink/EncodedValueComparatorTest.java       |   70 +
 .../runners/flink/FlinkRunnerRegistrarTest.java |   48 +
 .../beam/runners/flink/FlinkTestPipeline.java   |   72 +
 .../beam/runners/flink/PipelineOptionsTest.java |  184 +++
 .../beam/runners/flink/ReadSourceITCase.java    |   85 ++
 .../flink/ReadSourceStreamingITCase.java        |   74 +
 .../beam/runners/flink/WriteSinkITCase.java     |  192 +++
 .../flink/streaming/DoFnOperatorTest.java       |  600 ++++++++
 .../FlinkBroadcastStateInternalsTest.java       |  245 +++
 .../FlinkKeyGroupStateInternalsTest.java        |  262 ++++
 .../streaming/FlinkSplitStateInternalsTest.java |  101 ++
 .../streaming/FlinkStateInternalsTest.java      |  395 +++++
 .../flink/streaming/GroupByNullKeyTest.java     |  124 ++
 .../flink/streaming/TestCountingSource.java     |  254 ++++
 .../streaming/TopWikipediaSessionsITCase.java   |  133 ++
 .../streaming/UnboundedSourceWrapperTest.java   |  464 ++++++
 .../runners/flink/streaming/package-info.java   |   22 +
 .../src/test/resources/log4j-test.properties    |   27 +
 runners/google-cloud-dataflow-java/pom.xml      |   62 +-
 .../dataflow/BatchStatefulParDoOverrides.java   |   44 +-
 .../runners/dataflow/BatchViewOverrides.java    |   23 +-
 .../beam/runners/dataflow/DataflowRunner.java   |   96 +-
 .../dataflow/PrimitiveParDoSingleFactory.java   |   15 +-
 .../dataflow/ReshuffleOverrideFactory.java      |   12 +-
 .../dataflow/StreamingViewOverrides.java        |   14 +-
 .../dataflow/internal/CustomSources.java        |    2 +-
 .../dataflow/BatchViewOverridesTest.java        |    4 +-
 .../dataflow/DataflowPipelineJobTest.java       |    2 +-
 .../runners/dataflow/DataflowRunnerTest.java    |   24 +
 .../PrimitiveParDoSingleFactoryTest.java        |   59 +-
 runners/pom.xml                                 |   40 -
 runners/spark/pom.xml                           |    2 +-
 .../beam/runners/spark/TestSparkRunner.java     |   14 +-
 .../beam/runners/spark/io/MicrobatchSource.java |    7 +-
 .../beam/runners/spark/io/SourceDStream.java    |    2 +-
 .../apache/beam/runners/spark/io/SourceRDD.java |    4 +-
 .../SparkGroupAlsoByWindowViaWindowSet.java     |   13 +-
 .../spark/translation/MultiDoFnFunction.java    |    4 +-
 ...SparkGroupAlsoByWindowViaOutputBufferFn.java |   10 +-
 .../spark/translation/SparkProcessContext.java  |    2 +-
 .../spark/translation/SparkRuntimeContext.java  |    2 +
 .../streaming/StreamingTransformTranslator.java |    2 +-
 .../spark/util/SparkSideInputReader.java        |    3 +-
 sdks/common/fn-api/pom.xml                      |    5 -
 .../fn-api/src/main/proto/beam_fn_api.proto     |  174 +--
 .../src/main/proto/beam_runner_api.proto        |    4 +-
 .../src/main/resources/beam/findbugs-filter.xml |    7 -
 sdks/java/core/pom.xml                          |   49 +-
 .../main/java/org/apache/beam/sdk/Pipeline.java |   58 +-
 .../java/org/apache/beam/sdk/io/AvroIO.java     |   13 +-
 .../sdk/io/BoundedReadFromUnboundedSource.java  |    4 +-
 .../org/apache/beam/sdk/io/BoundedSource.java   |   13 +-
 .../apache/beam/sdk/io/CompressedSource.java    |    4 +-
 .../org/apache/beam/sdk/io/CountingSource.java  |    2 +-
 .../org/apache/beam/sdk/io/FileBasedSource.java |    9 +-
 .../apache/beam/sdk/io/OffsetBasedSource.java   |    4 +-
 .../main/java/org/apache/beam/sdk/io/Sink.java  |    2 +-
 .../java/org/apache/beam/sdk/io/TFRecordIO.java |  639 +++-----
 .../org/apache/beam/sdk/io/UnboundedSource.java |   15 +-
 .../main/java/org/apache/beam/sdk/io/XmlIO.java |  477 ++++++
 .../java/org/apache/beam/sdk/io/XmlSink.java    |  226 +--
 .../java/org/apache/beam/sdk/io/XmlSource.java  |  191 +--
 .../beam/sdk/options/BigQueryOptions.java       |   32 -
 .../options/CloudResourceManagerOptions.java    |   40 -
 .../DefaultPipelineOptionsRegistrar.java        |    5 -
 .../org/apache/beam/sdk/options/GcpOptions.java |  227 ---
 .../org/apache/beam/sdk/options/GcsOptions.java |  158 --
 .../beam/sdk/options/GoogleApiDebugOptions.java |   87 --
 .../beam/sdk/options/PipelineOptions.java       |   16 +-
 .../apache/beam/sdk/options/PubsubOptions.java  |   36 -
 .../sdk/runners/PTransformOverrideFactory.java  |   33 +-
 .../apache/beam/sdk/runners/PipelineRunner.java |    7 +-
 .../beam/sdk/testing/BigqueryMatcher.java       |  256 ----
 .../beam/sdk/testing/SourceTestUtils.java       |    6 +-
 .../apache/beam/sdk/testing/TestPipeline.java   |    5 +-
 ...esSplittableParDoWithWindowedSideInputs.java |   26 +
 .../beam/sdk/transforms/AppliedPTransform.java  |    5 +
 .../org/apache/beam/sdk/transforms/Combine.java |    8 +-
 .../org/apache/beam/sdk/transforms/Create.java  |    7 +-
 .../org/apache/beam/sdk/transforms/DoFn.java    |   14 +-
 .../apache/beam/sdk/transforms/DoFnTester.java  |   41 +-
 .../beam/sdk/transforms/FlatMapElements.java    |    4 +-
 .../apache/beam/sdk/transforms/MapElements.java |    4 +-
 .../org/apache/beam/sdk/transforms/ParDo.java   |   86 +-
 .../apache/beam/sdk/transforms/Partition.java   |    2 +-
 .../sdk/transforms/display/DisplayData.java     |    6 +
 .../beam/sdk/transforms/windowing/Triggers.java |  322 ----
 .../beam/sdk/transforms/windowing/Window.java   |    1 +
 .../beam/sdk/transforms/windowing/WindowFn.java |   13 -
 .../beam/sdk/util/AppEngineEnvironment.java     |   62 -
 ...AttemptAndTimeBoundedExponentialBackOff.java |  170 ---
 .../util/AttemptBoundedExponentialBackOff.java  |   86 --
 .../apache/beam/sdk/util/CredentialFactory.java |   29 -
 .../org/apache/beam/sdk/util/DefaultBucket.java |  105 --
 .../beam/sdk/util/GcpCredentialFactory.java     |   67 -
 .../apache/beam/sdk/util/GcpProjectUtil.java    |  106 --
 .../beam/sdk/util/GcsIOChannelFactory.java      |  111 --
 .../sdk/util/GcsIOChannelFactoryRegistrar.java  |   38 -
 .../apache/beam/sdk/util/GcsPathValidator.java  |   95 --
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |  798 ----------
 .../apache/beam/sdk/util/IOChannelUtils.java    |   22 +-
 .../util/IntervalBoundedExponentialBackOff.java |   89 --
 .../beam/sdk/util/NoopCredentialFactory.java    |   68 -
 .../sdk/util/NullCredentialInitializer.java     |   62 -
 .../org/apache/beam/sdk/util/Transport.java     |  178 ---
 .../beam/sdk/values/PCollectionTuple.java       |    3 +-
 .../org/apache/beam/sdk/values/TupleTag.java    |   26 +-
 .../apache/beam/sdk/values/TupleTagList.java    |    2 +-
 .../apache/beam/sdk/values/TypeDescriptors.java |   25 +-
 .../org/apache/beam/sdk/values/TypedPValue.java |    4 +-
 .../org/apache/beam/SdkCoreApiSurfaceTest.java  |    2 -
 .../java/org/apache/beam/sdk/PipelineTest.java  |   92 +-
 .../org/apache/beam/sdk/io/AvroSourceTest.java  |   10 +-
 .../apache/beam/sdk/io/CountingSourceTest.java  |    6 +-
 .../apache/beam/sdk/io/FileBasedSourceTest.java |    8 +-
 .../beam/sdk/io/OffsetBasedSourceTest.java      |    8 +-
 .../java/org/apache/beam/sdk/io/ReadTest.java   |    4 +-
 .../org/apache/beam/sdk/io/TFRecordIOTest.java  |   20 +-
 .../java/org/apache/beam/sdk/io/TextIOTest.java |  113 +-
 .../org/apache/beam/sdk/io/XmlSinkTest.java     |   89 +-
 .../org/apache/beam/sdk/io/XmlSourceTest.java   |  250 +--
 .../apache/beam/sdk/metrics/MetricsTest.java    |    2 +-
 .../apache/beam/sdk/options/GcpOptionsTest.java |  171 ---
 .../sdk/options/GoogleApiDebugOptionsTest.java  |  145 --
 .../sdk/options/PipelineOptionsFactoryTest.java |    4 +-
 .../beam/sdk/runners/PipelineRunnerTest.java    |   46 +-
 .../runners/dataflow/TestCountingSource.java    |    2 +-
 .../beam/sdk/testing/BigqueryMatcherTest.java   |  176 ---
 .../beam/sdk/testing/SourceTestUtilsTest.java   |    2 +-
 .../beam/sdk/testing/StaticWindowsTest.java     |   10 +-
 .../beam/sdk/testing/TestPipelineTest.java      |    6 +-
 .../apache/beam/sdk/transforms/CreateTest.java  |   20 +-
 .../apache/beam/sdk/transforms/ParDoTest.java   |  293 ++--
 .../beam/sdk/transforms/SplittableDoFnTest.java |  107 +-
 .../sdk/transforms/display/DisplayDataTest.java |   17 +
 .../sdk/transforms/windowing/TriggersTest.java  |  100 --
 ...mptAndTimeBoundedExponentialBackOffTest.java |  213 ---
 .../AttemptBoundedExponentialBackOffTest.java   |   85 --
 .../apache/beam/sdk/util/DefaultBucketTest.java |  112 --
 .../beam/sdk/util/GcpProjectUtilTest.java       |   76 -
 .../util/GcsIOChannelFactoryRegistrarTest.java  |   44 -
 .../beam/sdk/util/GcsIOChannelFactoryTest.java  |   43 -
 .../beam/sdk/util/GcsPathValidatorTest.java     |   87 --
 .../org/apache/beam/sdk/util/GcsUtilTest.java   |  798 ----------
 .../IntervalBoundedExponentialBackOffTest.java  |  100 --
 .../util/RetryHttpRequestInitializerTest.java   |  290 ----
 .../beam/sdk/values/PCollectionTupleTest.java   |    8 +-
 .../apache/beam/sdk/values/TypedPValueTest.java |   46 +-
 sdks/java/extensions/gcp-core/pom.xml           |  217 +++
 .../beam/sdk/options/BigQueryOptions.java       |   32 +
 .../options/CloudResourceManagerOptions.java    |   40 +
 .../org/apache/beam/sdk/options/GcpOptions.java |  227 +++
 .../options/GcpPipelineOptionsRegistrar.java    |   39 +
 .../org/apache/beam/sdk/options/GcsOptions.java |  154 ++
 .../beam/sdk/options/GoogleApiDebugOptions.java |   87 ++
 .../apache/beam/sdk/options/PubsubOptions.java  |   36 +
 .../apache/beam/sdk/options/package-info.java   |   22 +
 .../beam/sdk/testing/BigqueryMatcher.java       |  256 ++++
 .../apache/beam/sdk/testing/package-info.java   |   21 +
 .../apache/beam/sdk/util/CredentialFactory.java |   29 +
 .../org/apache/beam/sdk/util/DefaultBucket.java |  105 ++
 .../beam/sdk/util/GcpCredentialFactory.java     |   67 +
 .../apache/beam/sdk/util/GcpProjectUtil.java    |  106 ++
 .../beam/sdk/util/GcsIOChannelFactory.java      |  111 ++
 .../sdk/util/GcsIOChannelFactoryRegistrar.java  |   38 +
 .../apache/beam/sdk/util/GcsPathValidator.java  |   95 ++
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |  798 ++++++++++
 .../beam/sdk/util/NoopCredentialFactory.java    |   68 +
 .../sdk/util/NullCredentialInitializer.java     |   62 +
 .../org/apache/beam/sdk/util/Transport.java     |  178 +++
 .../org/apache/beam/sdk/util/package-info.java  |   20 +
 .../org/apache/beam/GcpCoreApiSurfaceTest.java  |   62 +
 .../apache/beam/sdk/options/GcpOptionsTest.java |  171 +++
 .../sdk/options/GoogleApiDebugOptionsTest.java  |  145 ++
 .../beam/sdk/testing/BigqueryMatcherTest.java   |  176 +++
 .../apache/beam/sdk/util/DefaultBucketTest.java |  112 ++
 .../beam/sdk/util/GcpProjectUtilTest.java       |   76 +
 .../util/GcsIOChannelFactoryRegistrarTest.java  |   44 +
 .../beam/sdk/util/GcsIOChannelFactoryTest.java  |   43 +
 .../beam/sdk/util/GcsPathValidatorTest.java     |   87 ++
 .../org/apache/beam/sdk/util/GcsUtilTest.java   |  798 ++++++++++
 .../util/RetryHttpRequestInitializerTest.java   |  290 ++++
 sdks/java/extensions/pom.xml                    |    1 +
 sdks/java/harness/pom.xml                       |    5 +
 .../beam/fn/harness/fake/FakeStepContext.java   |    2 +-
 .../control/ProcessBundleHandlerTest.java       |   30 +-
 .../sdk/io/elasticsearch/ElasticsearchIO.java   |    2 +-
 .../sdk/io/elasticsearch/ElasticsearchIOIT.java |    2 +-
 .../io/elasticsearch/ElasticsearchIOTest.java   |    4 +-
 sdks/java/io/google-cloud-platform/pom.xml      |   20 +-
 .../sdk/io/gcp/bigquery/BatchLoadBigQuery.java  |  180 ---
 .../beam/sdk/io/gcp/bigquery/BatchLoads.java    |  225 +++
 .../sdk/io/gcp/bigquery/BigQueryHelpers.java    |   13 +
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    |  172 +--
 .../sdk/io/gcp/bigquery/BigQuerySourceBase.java |   33 +-
 .../io/gcp/bigquery/BigQueryTableSource.java    |   34 +-
 .../beam/sdk/io/gcp/bigquery/CreateTables.java  |  127 ++
 .../io/gcp/bigquery/GenerateShardedTable.java   |   47 +
 .../beam/sdk/io/gcp/bigquery/PrepareWrite.java  |   81 +
 .../beam/sdk/io/gcp/bigquery/ShardedKey.java    |   25 +-
 .../sdk/io/gcp/bigquery/StreamWithDeDup.java    |   90 --
 .../sdk/io/gcp/bigquery/StreamingInserts.java   |   79 +
 .../sdk/io/gcp/bigquery/StreamingWriteFn.java   |   81 +-
 .../io/gcp/bigquery/StreamingWriteTables.java   |   86 ++
 .../sdk/io/gcp/bigquery/TableDestination.java   |   76 +
 .../io/gcp/bigquery/TableDestinationCoder.java  |   60 +
 .../sdk/io/gcp/bigquery/TableRowWriter.java     |   19 +-
 .../sdk/io/gcp/bigquery/TagWithUniqueIds.java   |   62 +
 .../gcp/bigquery/TagWithUniqueIdsAndTable.java  |  135 --
 .../sdk/io/gcp/bigquery/TransformingSource.java |    4 +-
 .../beam/sdk/io/gcp/bigquery/WriteBundles.java  |   82 -
 .../io/gcp/bigquery/WriteBundlesToFiles.java    |  157 ++
 .../sdk/io/gcp/bigquery/WritePartition.java     |  163 +-
 .../beam/sdk/io/gcp/bigquery/WriteRename.java   |   71 +-
 .../beam/sdk/io/gcp/bigquery/WriteTables.java   |   58 +-
 .../beam/sdk/io/gcp/bigtable/BigtableIO.java    |    6 +-
 .../io/gcp/pubsub/PubsubUnboundedSource.java    |    6 +-
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 1419 +++++++-----------
 .../io/gcp/bigquery/FakeBigQueryServices.java   |  166 ++
 .../sdk/io/gcp/bigquery/FakeDatasetService.java |  208 +++
 .../sdk/io/gcp/bigquery/FakeJobService.java     |  404 +++++
 .../sdk/io/gcp/bigquery/TableContainer.java     |   61 +
 .../sdk/io/gcp/bigtable/BigtableIOTest.java     |    8 +-
 .../gcp/pubsub/PubsubUnboundedSourceTest.java   |    4 +-
 .../hadoop/inputformat/HadoopInputFormatIO.java |    4 +-
 .../inputformat/HadoopInputFormatIOTest.java    |   19 +-
 .../integration/tests/HIFIOCassandraIT.java     |    6 +-
 .../integration/tests/HIFIOElasticIT.java       |    4 +-
 .../SmallITCluster/cassandra-svc-rc.yaml        |   88 --
 .../cassandra/SmallITCluster/start-up.sh        |   21 -
 .../cassandra/SmallITCluster/teardown.sh        |   21 -
 .../kubernetes/cassandra/data-load-setup.sh     |   29 -
 .../resources/kubernetes/cassandra/data-load.sh |   67 -
 .../LargeProductionCluster/es-services.yaml     |  277 ----
 .../LargeProductionCluster/start-up.sh          |   21 -
 .../LargeProductionCluster/teardown.sh          |   20 -
 .../SmallITCluster/elasticsearch-svc-rc.yaml    |   84 --
 .../elasticsearch/SmallITCluster/start-up.sh    |   22 -
 .../elasticsearch/SmallITCluster/teardown.sh    |   20 -
 .../kubernetes/elasticsearch/data-load-setup.sh |   26 -
 .../kubernetes/elasticsearch/data-load.sh       |   33 -
 .../kubernetes/elasticsearch/es_test_data.py    |  299 ----
 .../kubernetes/elasticsearch/show-health.sh     |   25 -
 sdks/java/io/hadoop/pom.xml                     |   15 +-
 .../org/apache/beam/sdk/io/hbase/HBaseIO.java   |    7 +-
 .../apache/beam/sdk/io/hbase/HBaseIOTest.java   |    2 +-
 .../apache/beam/sdk/io/hdfs/HDFSFileSource.java |    6 +-
 .../beam/sdk/io/hdfs/HDFSFileSourceTest.java    |    4 +-
 .../org/apache/beam/sdk/io/jdbc/JdbcIO.java     |   42 +-
 .../org/apache/beam/sdk/io/jdbc/JdbcIOTest.java |   10 +-
 .../postgres-service-for-local-dev.yml          |   28 -
 .../src/test/resources/kubernetes/postgres.yml  |   56 -
 .../jdbc/src/test/resources/kubernetes/setup.sh |   19 -
 .../src/test/resources/kubernetes/teardown.sh   |   19 -
 .../java/org/apache/beam/sdk/io/jms/JmsIO.java  |    2 +-
 .../org/apache/beam/sdk/io/jms/JmsIOTest.java   |    4 +-
 .../org/apache/beam/sdk/io/kafka/KafkaIO.java   |   12 +-
 .../apache/beam/sdk/io/kafka/KafkaIOTest.java   |    8 +-
 .../beam/sdk/io/kinesis/KinesisSource.java      |    2 +-
 .../beam/sdk/io/mongodb/MongoDbGridFSIO.java    |   11 +-
 .../apache/beam/sdk/io/mongodb/MongoDbIO.java   |    2 +-
 .../sdk/io/mongodb/MongoDBGridFSIOTest.java     |    2 +-
 .../org/apache/beam/sdk/io/mqtt/MqttIO.java     |    2 +-
 sdks/python/.pylintrc                           |    6 +-
 sdks/python/apache_beam/coders/coder_impl.py    |   50 +-
 sdks/python/apache_beam/coders/coders.py        |    6 +-
 sdks/python/apache_beam/error.py                |    4 +
 .../examples/complete/game/user_score.py        |    8 +-
 .../examples/complete/top_wikipedia_sessions.py |    8 -
 .../examples/cookbook/datastore_wordcount.py    |   15 +-
 .../examples/cookbook/group_with_coder.py       |    6 +-
 .../examples/cookbook/multiple_output_pardo.py  |   47 +-
 .../examples/snippets/snippets_test.py          |   33 +-
 sdks/python/apache_beam/examples/wordcount.py   |   32 +-
 .../apache_beam/examples/wordcount_debugging.py |    4 -
 sdks/python/apache_beam/internal/gcp/auth.py    |   80 +-
 .../apache_beam/internal/gcp/auth_test.py       |   44 -
 .../apache_beam/internal/gcp/json_value.py      |    6 +
 sdks/python/apache_beam/internal/pickler.py     |    8 +-
 sdks/python/apache_beam/io/avroio_test.py       |   14 +-
 sdks/python/apache_beam/io/concat_source.py     |   74 +-
 .../python/apache_beam/io/concat_source_test.py |   12 +-
 sdks/python/apache_beam/io/filebasedsource.py   |   53 +-
 .../apache_beam/io/filebasedsource_test.py      |   25 +-
 sdks/python/apache_beam/io/fileio.py            |  158 +-
 sdks/python/apache_beam/io/fileio_test.py       |   45 +-
 sdks/python/apache_beam/io/filesystem.py        |    1 +
 sdks/python/apache_beam/io/filesystems_util.py  |   10 +-
 sdks/python/apache_beam/io/gcp/bigquery.py      |   32 +-
 .../io/gcp/datastore/v1/datastoreio.py          |   24 +-
 .../io/gcp/datastore/v1/datastoreio_test.py     |    4 +-
 .../apache_beam/io/gcp/datastore/v1/helper.py   |   16 +-
 .../io/gcp/datastore/v1/query_splitter.py       |    2 +-
 sdks/python/apache_beam/io/gcp/gcsfilesystem.py |    7 +-
 .../io/gcp/tests/bigquery_matcher.py            |    3 +-
 sdks/python/apache_beam/io/iobase.py            |    7 +-
 sdks/python/apache_beam/io/localfilesystem.py   |    3 +-
 sdks/python/apache_beam/io/range_trackers.py    |   19 +-
 sdks/python/apache_beam/io/source_test_utils.py |   79 +-
 .../apache_beam/io/source_test_utils_test.py    |   20 +-
 sdks/python/apache_beam/io/textio.py            |   15 +-
 sdks/python/apache_beam/io/textio_test.py       |   18 +-
 sdks/python/apache_beam/metrics/cells.py        |   28 +-
 sdks/python/apache_beam/metrics/execution.py    |    3 +-
 sdks/python/apache_beam/metrics/metric.py       |    9 +-
 sdks/python/apache_beam/pvalue.py               |   26 +-
 sdks/python/apache_beam/runners/common.pxd      |    2 +-
 sdks/python/apache_beam/runners/common.py       |   15 +-
 .../runners/dataflow/dataflow_metrics_test.py   |    3 +-
 .../runners/dataflow/dataflow_runner.py         |   29 +-
 .../runners/dataflow/internal/apiclient.py      |    9 +-
 .../runners/dataflow/internal/dependency.py     |    6 +-
 .../runners/dataflow/test_dataflow_runner.py    |    4 -
 .../runners/direct/bundle_factory.py            |   14 +-
 sdks/python/apache_beam/runners/direct/clock.py |    9 +-
 .../consumer_tracking_pipeline_visitor_test.py  |    2 +-
 .../apache_beam/runners/direct/direct_runner.py |    6 +
 .../runners/direct/evaluation_context.py        |   21 +-
 .../apache_beam/runners/direct/executor.py      |   21 +-
 .../runners/direct/transform_evaluator.py       |   19 +-
 .../runners/direct/transform_result.py          |   45 +-
 .../runners/direct/watermark_manager.py         |    4 +-
 sdks/python/apache_beam/runners/runner.py       |   17 +-
 .../apache_beam/tests/pipeline_verifiers.py     |    7 +-
 sdks/python/apache_beam/transforms/combiners.py |   62 +-
 .../apache_beam/transforms/combiners_test.py    |    4 +-
 sdks/python/apache_beam/transforms/core.py      |  112 +-
 .../apache_beam/transforms/create_test.py       |   18 +-
 sdks/python/apache_beam/transforms/display.py   |    4 +-
 .../apache_beam/transforms/display_test.py      |   36 +
 .../python/apache_beam/transforms/ptransform.py |   38 +-
 .../apache_beam/transforms/ptransform_test.py   |   19 +-
 .../python/apache_beam/transforms/sideinputs.py |   11 +-
 sdks/python/apache_beam/transforms/trigger.py   |   26 +-
 .../apache_beam/transforms/trigger_test.py      |    6 +-
 sdks/python/apache_beam/typehints/decorators.py |   26 +-
 .../apache_beam/typehints/trivial_inference.py  |   26 +-
 .../typehints/trivial_inference_test.py         |    3 +-
 sdks/python/apache_beam/typehints/typecheck.py  |   11 +-
 sdks/python/apache_beam/typehints/typehints.py  |   66 +-
 .../apache_beam/typehints/typehints_test.py     |    7 +-
 .../apache_beam/utils/annotations_test.py       |    2 +-
 sdks/python/apache_beam/utils/path.py           |    3 +-
 .../apache_beam/utils/pipeline_options.py       |   95 +-
 .../apache_beam/utils/pipeline_options_test.py  |   52 +-
 sdks/python/apache_beam/utils/proto_utils.py    |   15 +-
 sdks/python/apache_beam/utils/retry.py          |   14 +-
 sdks/python/apache_beam/utils/timestamp.py      |    6 +-
 sdks/python/apache_beam/utils/value_provider.py |  103 ++
 .../apache_beam/utils/value_provider_test.py    |  145 ++
 sdks/python/apache_beam/utils/windowed_value.py |   17 +-
 sdks/python/run_pylint.sh                       |    2 +-
 sdks/python/tox.ini                             |   18 +-
 694 files changed, 32745 insertions(+), 32281 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/aa07a1d4/pom.xml
----------------------------------------------------------------------

[07/50] [abbrv] beam git commit: Explodes windows before GBKIKWI

Posted by dh...@apache.org.

Explodes windows before GBKIKWI

Also
* Adds a test for windowed side inputs that requires this
  behavior.
* Adds a test category for SDF with windowed side input.
  Runners should gradually implement this properly. For now
  only direct runner implements this properly.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/6ac3ac50
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/6ac3ac50
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/6ac3ac50

Branch: refs/heads/DSL_SQL
Commit: 6ac3ac50fec2eb02927c0a07ca928967cfef5652
Parents: b93de58
Author: Eugene Kirpichov <ki...@google.com>
Authored: Mon Apr 17 11:28:24 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 18:02:07 2017 -0700

----------------------------------------------------------------------
 .../beam/runners/core/SplittableParDo.java      | 75 +++++++++---------
 .../beam/runners/core/SplittableParDoTest.java  | 82 +++++++-------------
 runners/flink/runner/pom.xml                    |  3 +-
 ...esSplittableParDoWithWindowedSideInputs.java | 26 +++++++
 .../beam/sdk/transforms/SplittableDoFnTest.java | 41 ++++++++++
 5 files changed, 137 insertions(+), 90 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/6ac3ac50/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDo.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDo.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDo.java
index 44db1f7..31d89ee 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDo.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDo.java
@@ -19,10 +19,8 @@ package org.apache.beam.runners.core;
 
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 import java.util.List;
 import java.util.UUID;
@@ -138,6 +136,12 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
             .setCoder(splitCoder)
             .apply("Split restriction", ParDo.of(new SplitRestrictionFn<InputT, RestrictionT>(fn)))
             .setCoder(splitCoder)
+            // ProcessFn requires all input elements to be in a single window and have a single
+            // element per work item. This must precede the unique keying so each key has a single
+            // associated element.
+            .apply(
+                "Explode windows",
+                ParDo.of(new ExplodeWindowsFn<ElementAndRestriction<InputT, RestrictionT>>()))
             .apply(
                 "Assign unique key",
                 WithKeys.of(new RandomUniqueKeyFn<ElementAndRestriction<InputT, RestrictionT>>()))
@@ -158,6 +162,18 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
   }
 
   /**
+   * A {@link DoFn} that forces each of its outputs to be in a single window, by indicating to the
+   * runner that it observes the window of its input element, so the runner is forced to apply it to
+   * each input in a single window and thus its output is also in a single window.
+   */
+  private static class ExplodeWindowsFn<InputT> extends DoFn<InputT, InputT> {
+    @ProcessElement
+    public void process(ProcessContext c, BoundedWindow window) {
+      c.output(c.element());
+    }
+  }
+
+  /**
    * Runner-specific primitive {@link GroupByKey GroupByKey-like} {@link PTransform} that produces
    * {@link KeyedWorkItem KeyedWorkItems} so that downstream transforms can access state and timers.
    *
@@ -317,6 +333,13 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
    * The heart of splittable {@link DoFn} execution: processes a single (element, restriction) pair
    * by creating a tracker for the restriction and checkpointing/resuming processing later if
    * necessary.
+   *
+   * <p>Takes {@link KeyedWorkItem} and assumes that the KeyedWorkItem contains a single element
+   * (or a single timer set by {@link ProcessFn itself}, in a single window. This is necessary
+   * because {@link ProcessFn} sets timers, and timers are namespaced to a single window and it
+   * should be the window of the input element.
+   *
+   * <p>See also: https://issues.apache.org/jira/browse/BEAM-1983
    */
   @VisibleForTesting
   public static class ProcessFn<
@@ -441,7 +464,18 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
       // Subsequent calls are timer firings and the element has to be retrieved from the state.
       TimerInternals.TimerData timer = Iterables.getOnlyElement(c.element().timersIterable(), null);
       boolean isSeedCall = (timer == null);
-      StateNamespace stateNamespace = isSeedCall ? StateNamespaces.global() : timer.getNamespace();
+      StateNamespace stateNamespace;
+      if (isSeedCall) {
+        WindowedValue<ElementAndRestriction<InputT, RestrictionT>> windowedValue =
+            Iterables.getOnlyElement(c.element().elementsIterable());
+        BoundedWindow window = Iterables.getOnlyElement(windowedValue.getWindows());
+        stateNamespace =
+            StateNamespaces.window(
+                (Coder<BoundedWindow>) inputWindowingStrategy.getWindowFn().windowCoder(), window);
+      } else {
+        stateNamespace = timer.getNamespace();
+      }
+
       ValueState<WindowedValue<InputT>> elementState =
           stateInternals.state(stateNamespace, elementTag);
       ValueState<RestrictionT> restrictionState =
@@ -451,15 +485,8 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
 
       ElementAndRestriction<WindowedValue<InputT>, RestrictionT> elementAndRestriction;
       if (isSeedCall) {
-        // The element and restriction are available in c.element().
-        // elementsIterable() will, by construction of SplittableParDo, contain the same value
-        // potentially in several different windows. We implode this into a single WindowedValue
-        // in order to simplify the rest of the code and avoid iterating over elementsIterable()
-        // explicitly. The windows of this WindowedValue will be propagated to windows of the
-        // output. This is correct because a splittable DoFn is not allowed to inspect the window
-        // of its element.
         WindowedValue<ElementAndRestriction<InputT, RestrictionT>> windowedValue =
-            implodeWindows(c.element().elementsIterable());
+            Iterables.getOnlyElement(c.element().elementsIterable());
         WindowedValue<InputT> element = windowedValue.withValue(windowedValue.getValue().element());
         elementState.write(element);
         elementAndRestriction =
@@ -498,32 +525,6 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
               stateNamespace, timerInternals.currentProcessingTime(), TimeDomain.PROCESSING_TIME));
     }
 
-    /**
-     * Does the opposite of {@link WindowedValue#explodeWindows()} - creates a single {@link
-     * WindowedValue} from a collection of {@link WindowedValue}'s that is known to contain copies
-     * of the same value with the same timestamp, but different window sets.
-     *
-     * <p>This is only legal to do because we know that {@link RandomUniqueKeyFn} created unique
-     * keys for every {@link ElementAndRestriction}, so if there's multiple {@link WindowedValue}'s
-     * for the same key, that means only that the windows of that {@link ElementAndRestriction} are
-     * being delivered separately rather than all at once. It is also legal to do because splittable
-     * {@link DoFn} is not allowed to access the window of its element, so we can propagate the full
-     * set of windows of its input to its output.
-     */
-    private static <InputT, RestrictionT>
-        WindowedValue<ElementAndRestriction<InputT, RestrictionT>> implodeWindows(
-            Iterable<WindowedValue<ElementAndRestriction<InputT, RestrictionT>>> values) {
-      WindowedValue<ElementAndRestriction<InputT, RestrictionT>> first =
-          Iterables.getFirst(values, null);
-      checkState(first != null, "Got a KeyedWorkItem with no elements and no timers");
-      ImmutableList.Builder<BoundedWindow> windows = ImmutableList.builder();
-      for (WindowedValue<ElementAndRestriction<InputT, RestrictionT>> value : values) {
-        windows.addAll(value.getWindows());
-      }
-      return WindowedValue.of(
-          first.getValue(), first.getTimestamp(), windows.build(), first.getPane());
-    }
-
     private DoFn<InputT, OutputT>.Context wrapContext(final Context baseContext) {
       return fn.new Context() {
         @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/6ac3ac50/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoTest.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoTest.java
index 5629635..1a44453 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoTest.java
@@ -30,6 +30,7 @@ import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.Collections;
 import java.util.List;
 import java.util.NoSuchElementException;
 import java.util.concurrent.Executors;
@@ -194,11 +195,6 @@ public class SplittableParDoTest {
 
   // ------------------------------- Tests for ProcessFn ---------------------------------
 
-  enum WindowExplosion {
-    EXPLODE_WINDOWS,
-    DO_NOT_EXPLODE_WINDOWS
-  }
-
   /**
    * A helper for testing {@link SplittableParDo.ProcessFn} on 1 element (but possibly over multiple
    * {@link DoFn.ProcessElement} calls).
@@ -293,24 +289,13 @@ public class SplittableParDoTest {
               ElementAndRestriction.of(element, restriction),
               currentProcessingTime,
               GlobalWindow.INSTANCE,
-              PaneInfo.ON_TIME_AND_ONLY_FIRING),
-          WindowExplosion.DO_NOT_EXPLODE_WINDOWS);
+              PaneInfo.ON_TIME_AND_ONLY_FIRING));
     }
 
-    void startElement(
-        WindowedValue<ElementAndRestriction<InputT, RestrictionT>> windowedValue,
-        WindowExplosion explosion)
+    void startElement(WindowedValue<ElementAndRestriction<InputT, RestrictionT>> windowedValue)
         throws Exception {
-      switch (explosion) {
-        case EXPLODE_WINDOWS:
-          tester.processElement(
-              KeyedWorkItems.elementsWorkItem("key", windowedValue.explodeWindows()));
-          break;
-        case DO_NOT_EXPLODE_WINDOWS:
-          tester.processElement(
-              KeyedWorkItems.elementsWorkItem("key", Arrays.asList(windowedValue)));
-          break;
-      }
+      tester.processElement(
+          KeyedWorkItems.elementsWorkItem("key", Collections.singletonList(windowedValue)));
     }
 
     /**
@@ -394,46 +379,39 @@ public class SplittableParDoTest {
   }
 
   @Test
-  public void testTrivialProcessFnPropagatesOutputsWindowsAndTimestamp() throws Exception {
-    // Tests that ProcessFn correctly propagates windows and timestamp of the element
+  public void testTrivialProcessFnPropagatesOutputWindowAndTimestamp() throws Exception {
+    // Tests that ProcessFn correctly propagates the window and timestamp of the element
     // inside the KeyedWorkItem.
     // The underlying DoFn is actually monolithic, so this doesn't test splitting.
     DoFn<Integer, String> fn = new ToStringFn();
 
     Instant base = Instant.now();
 
-    IntervalWindow w1 =
+    IntervalWindow w =
         new IntervalWindow(
             base.minus(Duration.standardMinutes(1)), base.plus(Duration.standardMinutes(1)));
-    IntervalWindow w2 =
-        new IntervalWindow(
-            base.minus(Duration.standardMinutes(2)), base.plus(Duration.standardMinutes(2)));
-    IntervalWindow w3 =
-        new IntervalWindow(
-            base.minus(Duration.standardMinutes(3)), base.plus(Duration.standardMinutes(3)));
-
-    for (WindowExplosion explosion : WindowExplosion.values()) {
-      ProcessFnTester<Integer, String, SomeRestriction, SomeRestrictionTracker> tester =
-          new ProcessFnTester<>(
-              base, fn, BigEndianIntegerCoder.of(), SerializableCoder.of(SomeRestriction.class),
-              MAX_OUTPUTS_PER_BUNDLE, MAX_BUNDLE_DURATION);
-      tester.startElement(
-          WindowedValue.of(
-              ElementAndRestriction.of(42, new SomeRestriction()),
-              base,
-              Arrays.asList(w1, w2, w3),
-              PaneInfo.ON_TIME_AND_ONLY_FIRING),
-          explosion);
-
-      for (IntervalWindow w : new IntervalWindow[] {w1, w2, w3}) {
-        assertEquals(
-            Arrays.asList(
-                TimestampedValue.of("42a", base),
-                TimestampedValue.of("42b", base),
-                TimestampedValue.of("42c", base)),
-            tester.peekOutputElementsInWindow(w));
-      }
-    }
+
+    ProcessFnTester<Integer, String, SomeRestriction, SomeRestrictionTracker> tester =
+        new ProcessFnTester<>(
+            base,
+            fn,
+            BigEndianIntegerCoder.of(),
+            SerializableCoder.of(SomeRestriction.class),
+            MAX_OUTPUTS_PER_BUNDLE,
+            MAX_BUNDLE_DURATION);
+    tester.startElement(
+        WindowedValue.of(
+            ElementAndRestriction.of(42, new SomeRestriction()),
+            base,
+            Collections.singletonList(w),
+            PaneInfo.ON_TIME_AND_ONLY_FIRING));
+
+    assertEquals(
+        Arrays.asList(
+            TimestampedValue.of("42a", base),
+            TimestampedValue.of("42b", base),
+            TimestampedValue.of("42c", base)),
+        tester.peekOutputElementsInWindow(w));
   }
 
   private static class WatermarkUpdateFn extends DoFn<Instant, String> {

http://git-wip-us.apache.org/repos/asf/beam/blob/6ac3ac50/runners/flink/runner/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/runner/pom.xml b/runners/flink/runner/pom.xml
index 95880f4..1e6452d 100644
--- a/runners/flink/runner/pom.xml
+++ b/runners/flink/runner/pom.xml
@@ -91,7 +91,8 @@
                     org.apache.beam.sdk.testing.UsesMapState,
                     org.apache.beam.sdk.testing.UsesAttemptedMetrics,
                     org.apache.beam.sdk.testing.UsesCommittedMetrics,
-                    org.apache.beam.sdk.testing.UsesTestStream
+                    org.apache.beam.sdk.testing.UsesTestStream,
+                    org.apache.beam.sdk.testing.UsesSplittableParDoWithWindowedSideInputs
                   </excludedGroups>
                   <parallel>none</parallel>
                   <failIfNoTests>true</failIfNoTests>

http://git-wip-us.apache.org/repos/asf/beam/blob/6ac3ac50/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesSplittableParDoWithWindowedSideInputs.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesSplittableParDoWithWindowedSideInputs.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesSplittableParDoWithWindowedSideInputs.java
new file mode 100644
index 0000000..2b1d673
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/UsesSplittableParDoWithWindowedSideInputs.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.testing;
+
+import org.apache.beam.sdk.transforms.ParDo;
+
+/**
+ * Category tag for validation tests which utilize splittable {@link ParDo} and use
+ * windowed side inputs.
+ */
+public interface UsesSplittableParDoWithWindowedSideInputs {}

http://git-wip-us.apache.org/repos/asf/beam/blob/6ac3ac50/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
index 30329f4..a0f1fd3 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
@@ -33,6 +33,7 @@ import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.testing.TestStream;
 import org.apache.beam.sdk.testing.UsesSplittableParDo;
+import org.apache.beam.sdk.testing.UsesSplittableParDoWithWindowedSideInputs;
 import org.apache.beam.sdk.testing.UsesTestStream;
 import org.apache.beam.sdk.testing.ValidatesRunner;
 import org.apache.beam.sdk.transforms.DoFn.BoundedPerElement;
@@ -252,6 +253,46 @@ public class SplittableDoFnTest implements Serializable {
     p.run();
   }
 
+  @Test
+  @Category({
+    ValidatesRunner.class,
+    UsesSplittableParDo.class,
+    UsesSplittableParDoWithWindowedSideInputs.class
+  })
+  public void testWindowedSideInput() throws Exception {
+    PCollection<Integer> mainInput =
+        p.apply("main",
+                Create.timestamped(
+                    TimestampedValue.of(0, new Instant(0)),
+                    TimestampedValue.of(1, new Instant(1)),
+                    TimestampedValue.of(2, new Instant(2)),
+                    TimestampedValue.of(3, new Instant(3)),
+                    TimestampedValue.of(4, new Instant(4)),
+                    TimestampedValue.of(5, new Instant(5)),
+                    TimestampedValue.of(6, new Instant(6)),
+                    TimestampedValue.of(7, new Instant(7))))
+            .apply("window 2", Window.<Integer>into(FixedWindows.of(Duration.millis(2))));
+
+    PCollectionView<String> sideInput =
+        p.apply("side",
+                Create.timestamped(
+                    TimestampedValue.of("a", new Instant(0)),
+                    TimestampedValue.of("b", new Instant(4))))
+            .apply("window 4", Window.<String>into(FixedWindows.of(Duration.millis(4))))
+            .apply("singleton", View.<String>asSingleton());
+
+    PCollection<String> res =
+        mainInput.apply(ParDo.of(new SDFWithSideInput(sideInput)).withSideInputs(sideInput));
+
+    PAssert.that(res).containsInAnyOrder("a:0", "a:1", "a:2", "a:3", "b:4", "b:5", "b:6", "b:7");
+
+    p.run();
+
+    // TODO: also add test coverage when the SDF checkpoints - the resumed call should also
+    // properly access side inputs.
+    // TODO: also test coverage when some of the windows of the side input are not ready.
+  }
+
   private static class SDFWithAdditionalOutput extends DoFn<Integer, String> {
     private final TupleTag<String> additionalOutput;

[12/50] [abbrv] beam git commit: Refactor batch load job path, and add support for data-dependent tables.

Posted by dh...@apache.org.

Refactor batch load job path, and add support for data-dependent tables.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/8581caf3
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/8581caf3
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/8581caf3

Branch: refs/heads/DSL_SQL
Commit: 8581caf388ad688a0e79cfa154262d1e701dee10
Parents: 58ed5c7
Author: Reuven Lax <re...@google.com>
Authored: Wed Mar 29 07:34:10 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 21:12:50 2017 -0700

----------------------------------------------------------------------
 .../sdk/io/gcp/bigquery/BatchLoadBigQuery.java  | 180 ----------------
 .../beam/sdk/io/gcp/bigquery/BatchLoads.java    | 203 +++++++++++++++++++
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    |   3 +-
 .../sdk/io/gcp/bigquery/TableDestination.java   |  17 +-
 .../sdk/io/gcp/bigquery/TableRowWriter.java     |  12 +-
 .../beam/sdk/io/gcp/bigquery/WriteBundles.java  |  82 --------
 .../io/gcp/bigquery/WriteBundlesToFiles.java    | 102 ++++++++++
 .../sdk/io/gcp/bigquery/WritePartition.java     |  95 ++++++---
 .../beam/sdk/io/gcp/bigquery/WriteRename.java   |  63 +++---
 .../beam/sdk/io/gcp/bigquery/WriteTables.java   |  47 ++---
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     |  27 +--
 11 files changed, 469 insertions(+), 362 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/8581caf3/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoadBigQuery.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoadBigQuery.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoadBigQuery.java
deleted file mode 100644
index 160b231..0000000
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoadBigQuery.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.sdk.io.gcp.bigquery;
-
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
-import java.io.IOException;
-import java.util.List;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.TableRefToJson;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
-import org.apache.beam.sdk.options.BigQueryOptions;
-import org.apache.beam.sdk.options.ValueProvider;
-import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.GroupByKey;
-import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.SimpleFunction;
-import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.transforms.windowing.DefaultTrigger;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.util.IOChannelFactory;
-import org.apache.beam.sdk.util.IOChannelUtils;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionTuple;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.sdk.values.TupleTagList;
-import org.apache.beam.sdk.values.TypeDescriptor;
-
-/**
- * PTransform that uses BigQuery batch-load jobs to write a PCollection to BigQuery.
- */
-class BatchLoadBigQuery<T> extends PTransform<PCollection<T>, WriteResult> {
-  BigQueryIO.Write<T> write;
-
-  BatchLoadBigQuery(BigQueryIO.Write<T> write) {
-    this.write = write;
-  }
-
-  @Override
-  public WriteResult expand(PCollection<T> input) {
-    Pipeline p = input.getPipeline();
-    BigQueryOptions options = p.getOptions().as(BigQueryOptions.class);
-    ValueProvider<TableReference> table = write.getTableWithDefaultProject(options);
-
-    final String stepUuid = BigQueryHelpers.randomUUIDString();
-
-    String tempLocation = options.getTempLocation();
-    String tempFilePrefix;
-    try {
-      IOChannelFactory factory = IOChannelUtils.getFactory(tempLocation);
-      tempFilePrefix = factory.resolve(
-          factory.resolve(tempLocation, "BigQueryWriteTemp"),
-          stepUuid);
-    } catch (IOException e) {
-      throw new RuntimeException(
-          String.format("Failed to resolve BigQuery temp location in %s", tempLocation),
-          e);
-    }
-
-    // Create a singleton job ID token at execution time.
-    PCollection<String> singleton = p.apply("Create", Create.of(tempFilePrefix));
-    PCollectionView<String> jobIdTokenView = p
-        .apply("TriggerIdCreation", Create.of("ignored"))
-        .apply("CreateJobId", MapElements.via(
-            new SimpleFunction<String, String>() {
-              @Override
-              public String apply(String input) {
-                return stepUuid;
-              }
-            }))
-        .apply(View.<String>asSingleton());
-
-    PCollection<T> typedInputInGlobalWindow =
-        input.apply(
-            Window.<T>into(new GlobalWindows())
-                .triggering(DefaultTrigger.of())
-                .discardingFiredPanes());
-    // Avoid applying the formatFunction if it is the identity formatter.
-    PCollection<TableRow> inputInGlobalWindow;
-    if (write.getFormatFunction() == BigQueryIO.IDENTITY_FORMATTER) {
-      inputInGlobalWindow = (PCollection<TableRow>) typedInputInGlobalWindow;
-    } else {
-      inputInGlobalWindow =
-          typedInputInGlobalWindow.apply(
-              MapElements.into(new TypeDescriptor<TableRow>() {}).via(write.getFormatFunction()));
-    }
-
-    // PCollection of filename, file byte size.
-    PCollection<KV<String, Long>> results = inputInGlobalWindow
-        .apply("WriteBundles",
-            ParDo.of(new WriteBundles(tempFilePrefix)));
-
-    TupleTag<KV<Long, List<String>>> multiPartitionsTag =
-        new TupleTag<KV<Long, List<String>>>("multiPartitionsTag") {};
-    TupleTag<KV<Long, List<String>>> singlePartitionTag =
-        new TupleTag<KV<Long, List<String>>>("singlePartitionTag") {};
-
-    // Turn the list of files and record counts in a PCollectionView that can be used as a
-    // side input.
-    PCollectionView<Iterable<KV<String, Long>>> resultsView = results
-        .apply("ResultsView", View.<KV<String, Long>>asIterable());
-    PCollectionTuple partitions = singleton.apply(ParDo
-        .of(new WritePartition(
-            resultsView,
-            multiPartitionsTag,
-            singlePartitionTag))
-        .withSideInputs(resultsView)
-        .withOutputTags(multiPartitionsTag, TupleTagList.of(singlePartitionTag)));
-
-    // If WriteBundles produced more than MAX_NUM_FILES files or MAX_SIZE_BYTES bytes, then
-    // the import needs to be split into multiple partitions, and those partitions will be
-    // specified in multiPartitionsTag.
-    PCollection<String> tempTables = partitions.get(multiPartitionsTag)
-        .apply("MultiPartitionsGroupByKey", GroupByKey.<Long, List<String>>create())
-        .apply("MultiPartitionsWriteTables", ParDo.of(new WriteTables(
-            false,
-            write.getBigQueryServices(),
-            jobIdTokenView,
-            tempFilePrefix,
-            NestedValueProvider.of(table, new TableRefToJson()),
-            write.getJsonSchema(),
-            WriteDisposition.WRITE_EMPTY,
-            CreateDisposition.CREATE_IF_NEEDED,
-            write.getTableDescription()))
-            .withSideInputs(jobIdTokenView));
-
-    PCollectionView<Iterable<String>> tempTablesView = tempTables
-        .apply("TempTablesView", View.<String>asIterable());
-    singleton.apply(ParDo
-        .of(new WriteRename(
-            write.getBigQueryServices(),
-            jobIdTokenView,
-            NestedValueProvider.of(table, new TableRefToJson()),
-            write.getWriteDisposition(),
-            write.getCreateDisposition(),
-            tempTablesView,
-            write.getTableDescription()))
-        .withSideInputs(tempTablesView, jobIdTokenView));
-
-    // Write single partition to final table
-    partitions.get(singlePartitionTag)
-        .apply("SinglePartitionGroupByKey", GroupByKey.<Long, List<String>>create())
-        .apply("SinglePartitionWriteTables", ParDo.of(new WriteTables(
-            true,
-            write.getBigQueryServices(),
-            jobIdTokenView,
-            tempFilePrefix,
-            NestedValueProvider.of(table, new TableRefToJson()),
-            write.getJsonSchema(),
-            write.getWriteDisposition(),
-            write.getCreateDisposition(),
-            write.getTableDescription()))
-            .withSideInputs(jobIdTokenView));
-
-    return WriteResult.in(input.getPipeline());
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/8581caf3/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
new file mode 100644
index 0000000..8594211
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
+import org.apache.beam.sdk.options.BigQueryOptions;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.MapElements;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.SimpleFunction;
+import org.apache.beam.sdk.transforms.View;
+import org.apache.beam.sdk.transforms.windowing.DefaultTrigger;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindows;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.util.IOChannelFactory;
+import org.apache.beam.sdk.util.IOChannelUtils;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.TupleTagList;
+
+
+/**
+ * PTransform that uses BigQuery batch-load jobs to write a PCollection to BigQuery.
+ */
+class BatchLoads<T> extends
+    PTransform<PCollection<KV<TableDestination, TableRow>>, WriteResult> {
+  BigQueryIO.Write<T> write;
+
+  private static class ConstantSchemaFunction implements
+      SerializableFunction<TableDestination, TableSchema> {
+    private final @Nullable
+    String jsonSchema;
+
+    ConstantSchemaFunction(TableSchema schema) {
+      this.jsonSchema = BigQueryHelpers.toJsonString(schema);
+    }
+
+    @Override
+    @Nullable
+    public TableSchema apply(TableDestination table) {
+      return BigQueryHelpers.fromJsonString(jsonSchema, TableSchema.class);
+    }
+  }
+
+  BatchLoads(BigQueryIO.Write<T> write) {
+    this.write = write;
+  }
+
+  @Override
+  public WriteResult expand(PCollection<KV<TableDestination, TableRow>> input) {
+    Pipeline p = input.getPipeline();
+    BigQueryOptions options = p.getOptions().as(BigQueryOptions.class);
+    ValueProvider<TableReference> table = write.getTableWithDefaultProject(options);
+
+    final String stepUuid = BigQueryHelpers.randomUUIDString();
+
+    String tempLocation = options.getTempLocation();
+    String tempFilePrefix;
+    try {
+      IOChannelFactory factory = IOChannelUtils.getFactory(tempLocation);
+      tempFilePrefix = factory.resolve(
+          factory.resolve(tempLocation, "BigQueryWriteTemp"),
+          stepUuid);
+    } catch (IOException e) {
+      throw new RuntimeException(
+          String.format("Failed to resolve BigQuery temp location in %s", tempLocation),
+          e);
+    }
+
+    // Create a singleton job ID token at execution time. This will be used as the base for all
+    // load jobs issued from this instance of the transfomr.
+    PCollection<String> singleton = p.apply("Create", Create.of(tempFilePrefix));
+    PCollectionView<String> jobIdTokenView = p
+        .apply("TriggerIdCreation", Create.of("ignored"))
+        .apply("CreateJobId", MapElements.via(
+            new SimpleFunction<String, String>() {
+              @Override
+              public String apply(String input) {
+                return stepUuid;
+              }
+            }))
+        .apply(View.<String>asSingleton());
+
+    PCollection<KV<TableDestination, TableRow>> inputInGlobalWindow =
+        input.apply(
+            Window.<KV<TableDestination, TableRow>>into(new GlobalWindows())
+                .triggering(DefaultTrigger.of())
+                .discardingFiredPanes());
+
+    // PCollection of filename, file byte size, and table destination.
+    PCollection<WriteBundlesToFiles.Result> results = inputInGlobalWindow
+        .apply("WriteBundlesToFiles",
+            ParDo.of(new WriteBundlesToFiles(tempFilePrefix)));
+
+    TupleTag<KV<KV<TableDestination, Integer>, List<String>>> multiPartitionsTag =
+        new TupleTag<KV<KV<TableDestination, Integer>, List<String>>>("multiPartitionsTag") {};
+    TupleTag<KV<KV<TableDestination, Integer>, List<String>>> singlePartitionTag =
+        new TupleTag<KV<KV<TableDestination, Integer>, List<String>>>("singlePartitionTag") {};
+
+    // Turn the list of files and record counts in a PCollectionView that can be used as a
+    // side input.
+    PCollectionView<Iterable<WriteBundlesToFiles.Result>> resultsView = results
+        .apply("ResultsView", View.<WriteBundlesToFiles.Result>asIterable());
+    // This transform will look at the set of files written for each table, and if any table has
+    // too many files or bytes, will partition that table's files into multiple partitions for
+    // loading.
+    PCollectionTuple partitions = singleton.apply(ParDo
+        .of(new WritePartition(
+            write.getTable(),
+            write.getTableDescription(),
+            resultsView,
+            multiPartitionsTag,
+            singlePartitionTag))
+        .withSideInputs(resultsView)
+        .withOutputTags(multiPartitionsTag, TupleTagList.of(singlePartitionTag)));
+
+    // Since BigQueryIO.java does not yet have support for per-table schemas, inject a constant
+    // schema function here. If no schema is specified, this function will return null.
+    SerializableFunction<TableDestination, TableSchema> schemaFunction =
+        new ConstantSchemaFunction(write.getSchema());
+
+    // If WriteBundlesToFiles produced more than MAX_NUM_FILES files or MAX_SIZE_BYTES bytes, then
+    // the import needs to be split into multiple partitions, and those partitions will be
+    // specified in multiPartitionsTag.
+    PCollection<KV<TableDestination, String>> tempTables = partitions.get(multiPartitionsTag)
+        // What's this GroupByKey for? Is this so we have a deterministic temp tables? If so, maybe
+        // Reshuffle is better here.
+        .apply("MultiPartitionsGroupByKey",
+            GroupByKey.<KV<TableDestination, Integer>, List<String>>create())
+        .apply("MultiPartitionsWriteTables", ParDo.of(new WriteTables(
+            false,
+            write.getBigQueryServices(),
+            jobIdTokenView,
+            tempFilePrefix,
+            WriteDisposition.WRITE_EMPTY,
+            CreateDisposition.CREATE_IF_NEEDED,
+            schemaFunction))
+            .withSideInputs(jobIdTokenView));
+
+    // This view maps each final table destination to the set of temporary partitioned tables
+    // the PCollection was loaded into.
+    PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView = tempTables
+        .apply("TempTablesView", View.<TableDestination, String>asMultimap());
+
+    singleton.apply(ParDo
+        .of(new WriteRename(
+            write.getBigQueryServices(),
+            jobIdTokenView,
+            write.getWriteDisposition(),
+            write.getCreateDisposition(),
+            tempTablesView,
+            write.getTableDescription()))
+        .withSideInputs(tempTablesView, jobIdTokenView));
+
+    // Write single partition to final table
+    partitions.get(singlePartitionTag)
+        .apply("SinglePartitionGroupByKey",
+            GroupByKey.<KV<TableDestination, Integer>, List<String>>create())
+        .apply("SinglePartitionWriteTables", ParDo.of(new WriteTables(
+            true,
+            write.getBigQueryServices(),
+            jobIdTokenView,
+            tempFilePrefix,
+            write.getWriteDisposition(),
+            write.getCreateDisposition(),
+            schemaFunction))
+            .withSideInputs(jobIdTokenView));
+
+    return WriteResult.in(input.getPipeline());
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/8581caf3/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index af19b83..f1baaf7 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -984,7 +984,8 @@ public class BigQueryIO {
       if (input.isBounded() == IsBounded.UNBOUNDED) {
         return rowsWithDestination.apply(new StreamingInserts(this));
       } else {
-        return input.apply(new BatchLoadBigQuery<T>(this));
+
+        return rowsWithDestination.apply(new BatchLoads<T>(this));
       }
     }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/8581caf3/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
index 631afeb..1c2b256 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
@@ -20,6 +20,8 @@ package org.apache.beam.sdk.io.gcp.bigquery;
 
 import com.google.api.services.bigquery.model.TableReference;
 
+import java.util.Objects;
+
 /**
  * Encapsulates a BigQuery table destination.
  */
@@ -42,7 +44,6 @@ public class TableDestination {
     return tableSpec;
   }
 
-
   public TableReference getTableReference() {
     return BigQueryHelpers.parseTableSpec(tableSpec);
   }
@@ -50,4 +51,18 @@ public class TableDestination {
   public String getTableDescription() {
     return tableDescription;
   }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof TableDestination)) {
+      return false;
+    }
+    TableDestination other = (TableDestination) o;
+    return tableSpec == other.tableSpec && tableDescription == other.tableDescription;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(tableSpec, tableDescription);
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/8581caf3/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
index 014c498..a1f6153 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
@@ -48,6 +48,14 @@ class TableRowWriter {
   protected String mimeType = MimeTypes.TEXT;
   private CountingOutputStream out;
 
+  public class Result {
+    String filename;
+    long byteSize;
+    public Result(String filename, long byteSize) {
+      this.filename = filename;
+      this.byteSize = byteSize;
+    }
+  }
   TableRowWriter(String basename) {
     this.tempFilePrefix = basename;
   }
@@ -77,8 +85,8 @@ class TableRowWriter {
     out.write(NEWLINE);
   }
 
-  public final KV<String, Long> close() throws IOException {
+  public final Result close() throws IOException {
     channel.close();
-    return KV.of(fileName, out.getCount());
+    return new Result(fileName, out.getCount());
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/8581caf3/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundles.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundles.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundles.java
deleted file mode 100644
index 6219226..0000000
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundles.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.sdk.io.gcp.bigquery;
-
-import com.google.api.services.bigquery.model.TableRow;
-import java.util.UUID;
-
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.display.DisplayData;
-import org.apache.beam.sdk.values.KV;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Writes each bundle of {@link TableRow} elements out to a separate file using
- * {@link TableRowWriter}.
- */
-class WriteBundles extends DoFn<TableRow, KV<String, Long>> {
-  private static final Logger LOG = LoggerFactory.getLogger(WriteBundles.class);
-
-  private transient TableRowWriter writer = null;
-  private final String tempFilePrefix;
-
-  WriteBundles(String tempFilePrefix) {
-    this.tempFilePrefix = tempFilePrefix;
-  }
-
-  @ProcessElement
-  public void processElement(ProcessContext c) throws Exception {
-    if (writer == null) {
-      writer = new TableRowWriter(tempFilePrefix);
-      writer.open(UUID.randomUUID().toString());
-      LOG.debug("Done opening writer {}", writer);
-    }
-    try {
-      writer.write(c.element());
-    } catch (Exception e) {
-      // Discard write result and close the write.
-      try {
-        writer.close();
-        // The writer does not need to be reset, as this DoFn cannot be reused.
-      } catch (Exception closeException) {
-        // Do not mask the exception that caused the write to fail.
-        e.addSuppressed(closeException);
-      }
-      throw e;
-    }
-  }
-
-  @FinishBundle
-  public void finishBundle(Context c) throws Exception {
-    if (writer != null) {
-      c.output(writer.close());
-      writer = null;
-    }
-  }
-
-  @Override
-  public void populateDisplayData(DisplayData.Builder builder) {
-    super.populateDisplayData(builder);
-
-    builder
-        .addIfNotNull(DisplayData.item("tempFilePrefix", tempFilePrefix)
-            .withLabel("Temporary File Prefix"));
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/8581caf3/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
new file mode 100644
index 0000000..4e6167b
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableRow;
+
+import java.util.Map;
+import java.util.UUID;
+
+import com.google.common.collect.Maps;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.display.DisplayData;
+import org.apache.beam.sdk.values.KV;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Writes each bundle of {@link TableRow} elements out to a separate file using
+ * {@link TableRowWriter}.
+ */
+class WriteBundlesToFiles extends DoFn<KV<TableDestination, TableRow>, WriteBundlesToFiles.Result> {
+  private static final Logger LOG = LoggerFactory.getLogger(WriteBundlesToFiles.class);
+
+  // Map from tablespec to a writer for that table.
+  private transient Map<TableDestination, TableRowWriter> writers;
+  private final String tempFilePrefix;
+
+  public static class Result {
+    public String filename;
+    public Long fileByteSize;
+    public TableDestination tableDestination;
+
+    public Result(String filename, Long fileByteSize, TableDestination tableDestination) {
+      this.filename = filename;
+      this.fileByteSize = fileByteSize;
+      this.tableDestination = tableDestination;
+    }
+  }
+  WriteBundlesToFiles(String tempFilePrefix) {
+    this.tempFilePrefix = tempFilePrefix;
+    this.writers = Maps.newHashMap();
+  }
+
+  @ProcessElement
+  public void processElement(ProcessContext c) throws Exception {
+    // ??? can we assume Java8?
+    TableRowWriter writer = writers.getOrDefault(c.element().getKey(), null);
+    if (writer == null) {
+      writer = new TableRowWriter(tempFilePrefix);
+      writer.open(UUID.randomUUID().toString());
+      writers.put(c.element().getKey(), writer);
+      LOG.debug("Done opening writer {}", writer);
+    }
+    try {
+      writer.write(c.element().getValue());
+    } catch (Exception e) {
+      // Discard write result and close the write.
+      try {
+        writer.close();
+        // The writer does not need to be reset, as this DoFn cannot be reused.
+      } catch (Exception closeException) {
+        // Do not mask the exception that caused the write to fail.
+        e.addSuppressed(closeException);
+      }
+      throw e;
+    }
+  }
+
+  @FinishBundle
+  public void finishBundle(Context c) throws Exception {
+    for (Map.Entry<TableDestination, TableRowWriter> entry : writers.entrySet()) {
+      TableRowWriter.Result result = entry.getValue().close();
+      c.output(new Result(result.filename, result.byteSize, entry.getKey()));
+    }
+    writers.clear();
+  }
+
+  @Override
+  public void populateDisplayData(DisplayData.Builder builder) {
+    super.populateDisplayData(builder);
+
+    builder
+        .addIfNotNull(DisplayData.item("tempFilePrefix", tempFilePrefix)
+            .withLabel("Temporary File Prefix"));
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/8581caf3/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
index 1b6492e..8e1b16d 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
@@ -18,27 +18,40 @@
 
 package org.apache.beam.sdk.io.gcp.bigquery;
 
+import com.google.api.services.bigquery.model.TableReference;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import java.util.List;
+import java.util.Map;
 import java.util.UUID;
+
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write;
+import org.apache.beam.sdk.io.gcp.bigquery.WriteBundlesToFiles.Result;
+import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
 
 /**
- * Partitions temporary files based on number of files and file sizes.
+ * Partitions temporary files based on number of files and file sizes. Output key is a pair of
+ * tablespec and the list of files corresponding to each partition of that table.
  */
-class WritePartition extends DoFn<String, KV<Long, List<String>>> {
-  private final PCollectionView<Iterable<KV<String, Long>>> resultsView;
-  private TupleTag<KV<Long, List<String>>> multiPartitionsTag;
-  private TupleTag<KV<Long, List<String>>> singlePartitionTag;
+class WritePartition extends DoFn<String, KV<KV<TableDestination, Integer>, List<String>>> {
+  private final ValueProvider<TableReference> singletonOutputTable;
+  private final String singletonOutputTableDescription;
+  private final PCollectionView<Iterable<WriteBundlesToFiles.Result>> resultsView;
+  private TupleTag<KV<KV<TableDestination, Integer>, List<String>>> multiPartitionsTag;
+  private TupleTag<KV<KV<TableDestination, Integer>, List<String>>> singlePartitionTag;
 
   public WritePartition(
-      PCollectionView<Iterable<KV<String, Long>>> resultsView,
-      TupleTag<KV<Long, List<String>>> multiPartitionsTag,
-      TupleTag<KV<Long, List<String>>> singlePartitionTag) {
+      ValueProvider<TableReference> singletonOutputTable,
+      String singletonOutputTableDescription,
+      PCollectionView<Iterable<WriteBundlesToFiles.Result>> resultsView,
+      TupleTag<KV<KV<TableDestination, Integer>, List<String>>> multiPartitionsTag,
+      TupleTag<KV<KV<TableDestination, Integer>, List<String>>> singlePartitionTag) {
+    this.singletonOutputTable = singletonOutputTable;
+    this.singletonOutputTableDescription = singletonOutputTableDescription;
     this.resultsView = resultsView;
     this.multiPartitionsTag = multiPartitionsTag;
     this.singlePartitionTag = singlePartitionTag;
@@ -46,34 +59,62 @@ class WritePartition extends DoFn<String, KV<Long, List<String>>> {
 
   @ProcessElement
   public void processElement(ProcessContext c) throws Exception {
-    List<KV<String, Long>> results = Lists.newArrayList(c.sideInput(resultsView));
-    if (results.isEmpty()) {
-      TableRowWriter writer = new TableRowWriter(c.element());
-      writer.open(UUID.randomUUID().toString());
-      results.add(writer.close());
+    List<WriteBundlesToFiles.Result> results = Lists.newArrayList(c.sideInput(resultsView));
+
+    // If there are no elements to write _and_ the user specified a constant output table, then
+    // generate an empty table of that name.
+    if (results.isEmpty() && singletonOutputTable != null) {
+      TableReference singletonTable = singletonOutputTable.get();
+      if (singletonTable != null) {
+        TableRowWriter writer = new TableRowWriter(c.element());
+        writer.open(UUID.randomUUID().toString());
+        TableRowWriter.Result writerResult = writer.close();
+        results.add(new Result(writerResult.filename, writerResult.byteSize,
+            new TableDestination(singletonTable, singletonOutputTableDescription)));
+      }
     }
 
+
     long partitionId = 0;
-    int currNumFiles = 0;
-    long currSizeBytes = 0;
-    List<String> currResults = Lists.newArrayList();
+    Map<TableDestination, Integer> currNumFilesMap = Maps.newHashMap();
+    Map<TableDestination, Long> currSizeBytesMap = Maps.newHashMap();
+    Map<TableDestination, List<List<String>>> currResultsMap = Maps.newHashMap();
     for (int i = 0; i < results.size(); ++i) {
-      KV<String, Long> fileResult = results.get(i);
+      WriteBundlesToFiles.Result fileResult = results.get(i);
+      TableDestination tableDestination = fileResult.tableDestination;
+      // JAVA8
+      List<List<String>> partitions = currResultsMap.getOrDefault(tableDestination, null);
+      if (partitions == null) {
+        partitions = Lists.newArrayList();
+        partitions.add(Lists.<String>newArrayList());
+        currResultsMap.put(tableDestination, partitions);
+      }
+      int currNumFiles = currNumFilesMap.getOrDefault(tableDestination, 0);
+      long currSizeBytes = currSizeBytesMap.getOrDefault(tableDestination, 0L);
       if (currNumFiles + 1 > Write.MAX_NUM_FILES
-          || currSizeBytes + fileResult.getValue() > Write.MAX_SIZE_BYTES) {
-        c.output(multiPartitionsTag, KV.of(++partitionId, currResults));
-        currResults = Lists.newArrayList();
+          || currSizeBytes + fileResult.fileByteSize > Write.MAX_SIZE_BYTES) {
+        // Add a new partition for this table.
+        partitions.add(Lists.<String>newArrayList());
+      //  c.sideOutput(multiPartitionsTag, KV.of(++partitionId, currResults));
         currNumFiles = 0;
         currSizeBytes = 0;
+        currNumFilesMap.remove(tableDestination);
+        currSizeBytesMap.remove(tableDestination);
       }
-      ++currNumFiles;
-      currSizeBytes += fileResult.getValue();
-      currResults.add(fileResult.getKey());
+      currNumFilesMap.put(tableDestination, currNumFiles + 1);
+      currSizeBytesMap.put(tableDestination, currSizeBytes + fileResult.fileByteSize);
+      // Always add to the most recent partition for this table.
+      partitions.get(partitions.size() - 1).add(fileResult.filename);
     }
-    if (partitionId == 0) {
-      c.output(singlePartitionTag, KV.of(++partitionId, currResults));
-    } else {
-      c.output(multiPartitionsTag, KV.of(++partitionId, currResults));
+
+    for (Map.Entry<TableDestination, List<List<String>>> entry : currResultsMap.entrySet()) {
+      TableDestination tableDestination = entry.getKey();
+      List<List<String>> partitions = entry.getValue();
+      TupleTag<KV<KV<TableDestination, Integer>, List<String>>> outputTag =
+          (partitions.size() == 1) ? singlePartitionTag : multiPartitionsTag;
+      for (int i = 0; i < partitions.size(); ++i) {
+        c.output(outputTag, KV.of(KV.of(tableDestination, i + 1), partitions.get(i)));
+      }
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/8581caf3/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
index 8cb9439..fbfb290 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
@@ -18,6 +18,7 @@
 
 package org.apache.beam.sdk.io.gcp.bigquery;
 
+import avro.shaded.com.google.common.collect.Maps;
 import com.google.api.services.bigquery.model.Job;
 import com.google.api.services.bigquery.model.JobConfigurationTableCopy;
 import com.google.api.services.bigquery.model.JobReference;
@@ -25,6 +26,7 @@ import com.google.api.services.bigquery.model.TableReference;
 import com.google.common.collect.Lists;
 import java.io.IOException;
 import java.util.List;
+import java.util.Map;
 import javax.annotation.Nullable;
 
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.Status;
@@ -49,24 +51,21 @@ class WriteRename extends DoFn<String, Void> {
 
   private final BigQueryServices bqServices;
   private final PCollectionView<String> jobIdToken;
-  private final ValueProvider<String> jsonTableRef;
   private final WriteDisposition writeDisposition;
   private final CreateDisposition createDisposition;
-  private final PCollectionView<Iterable<String>> tempTablesView;
+  private final PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView;
   @Nullable
   private final String tableDescription;
 
   public WriteRename(
       BigQueryServices bqServices,
       PCollectionView<String> jobIdToken,
-      ValueProvider<String> jsonTableRef,
       WriteDisposition writeDisposition,
       CreateDisposition createDisposition,
-      PCollectionView<Iterable<String>> tempTablesView,
+      PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView,
       @Nullable String tableDescription) {
     this.bqServices = bqServices;
     this.jobIdToken = jobIdToken;
-    this.jsonTableRef = jsonTableRef;
     this.writeDisposition = writeDisposition;
     this.createDisposition = createDisposition;
     this.tempTablesView = tempTablesView;
@@ -75,30 +74,40 @@ class WriteRename extends DoFn<String, Void> {
 
   @ProcessElement
   public void processElement(ProcessContext c) throws Exception {
-    List<String> tempTablesJson = Lists.newArrayList(c.sideInput(tempTablesView));
+    Map<TableDestination, Iterable<String>> tempTablesMap =
+        Maps.newHashMap(c.sideInput(tempTablesView));
 
-    // Do not copy if no temp tables are provided
-    if (tempTablesJson.size() == 0) {
-      return;
-    }
+    // Process each destination table.
+    for (Map.Entry<TableDestination, Iterable<String>> entry : tempTablesMap.entrySet()) {
+      TableDestination finalTableDestination = entry.getKey();
+      List<String> tempTablesJson = Lists.newArrayList(entry.getValue());
+      // Do not copy if no temp tables are provided
+      if (tempTablesJson.size() == 0) {
+        return;
+      }
+
+      List<TableReference> tempTables = Lists.newArrayList();
+      for (String table : tempTablesJson) {
+        tempTables.add(BigQueryHelpers.fromJsonString(table, TableReference.class));
+      }
+
+      // Make sure each destination table gets a unique job id.
+      String jobIdPrefix = String.format(
+          c.sideInput(jobIdToken) + "0x%08x", finalTableDestination.hashCode());
+      copy(
+          bqServices.getJobService(c.getPipelineOptions().as(BigQueryOptions.class)),
+          bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class)),
+          jobIdPrefix,
+          finalTableDestination.getTableReference(),
+          tempTables,
+          writeDisposition,
+          createDisposition,
+          tableDescription);
 
-    List<TableReference> tempTables = Lists.newArrayList();
-    for (String table : tempTablesJson) {
-      tempTables.add(BigQueryHelpers.fromJsonString(table, TableReference.class));
+      DatasetService tableService =
+          bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class));
+      removeTemporaryTables(tableService, tempTables);
     }
-    copy(
-        bqServices.getJobService(c.getPipelineOptions().as(BigQueryOptions.class)),
-        bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class)),
-        c.sideInput(jobIdToken),
-        BigQueryHelpers.fromJsonString(jsonTableRef.get(), TableReference.class),
-        tempTables,
-        writeDisposition,
-        createDisposition,
-        tableDescription);
-
-    DatasetService tableService =
-        bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class));
-    removeTemporaryTables(tableService, tempTables);
   }
 
   private void copy(
@@ -170,8 +179,6 @@ class WriteRename extends DoFn<String, Void> {
     super.populateDisplayData(builder);
 
     builder
-        .addIfNotNull(DisplayData.item("jsonTableRef", jsonTableRef)
-            .withLabel("Table Reference"))
         .add(DisplayData.item("writeDisposition", writeDisposition.toString())
             .withLabel("Write Disposition"))
         .add(DisplayData.item("createDisposition", createDisposition.toString())

http://git-wip-us.apache.org/repos/asf/beam/blob/8581caf3/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
index 29680ad..5051c95 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
@@ -41,6 +41,7 @@ import org.apache.beam.sdk.options.BigQueryOptions;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.util.FileIOChannelFactory;
 import org.apache.beam.sdk.util.GcsIOChannelFactory;
@@ -57,48 +58,45 @@ import org.slf4j.LoggerFactory;
 /**
  * Writes partitions to BigQuery tables.
  */
-class WriteTables extends DoFn<KV<Long, Iterable<List<String>>>, String> {
+class WriteTables extends DoFn<KV<KV<TableDestination, Integer>, Iterable<List<String>>>,
+    KV<TableDestination, String>> {
   private static final Logger LOG = LoggerFactory.getLogger(WriteTables.class);
 
   private final boolean singlePartition;
   private final BigQueryServices bqServices;
   private final PCollectionView<String> jobIdToken;
   private final String tempFilePrefix;
-  private final ValueProvider<String> jsonTableRef;
-  private final ValueProvider<String> jsonSchema;
   private final WriteDisposition writeDisposition;
   private final CreateDisposition createDisposition;
-  @Nullable
-  private final String tableDescription;
+  private final SerializableFunction<TableDestination, TableSchema> schemaFunction;
 
   public WriteTables(
       boolean singlePartition,
       BigQueryServices bqServices,
       PCollectionView<String> jobIdToken,
       String tempFilePrefix,
-      ValueProvider<String> jsonTableRef,
-      ValueProvider<String> jsonSchema,
       WriteDisposition writeDisposition,
       CreateDisposition createDisposition,
-      @Nullable String tableDescription) {
+      SerializableFunction<TableDestination, TableSchema> schemaFunction) {
     this.singlePartition = singlePartition;
     this.bqServices = bqServices;
     this.jobIdToken = jobIdToken;
     this.tempFilePrefix = tempFilePrefix;
-    this.jsonTableRef = jsonTableRef;
-    this.jsonSchema = jsonSchema;
     this.writeDisposition = writeDisposition;
     this.createDisposition = createDisposition;
-    this.tableDescription = tableDescription;
+    this.schemaFunction = schemaFunction;
   }
 
   @ProcessElement
   public void processElement(ProcessContext c) throws Exception {
-    List<String> partition = Lists.newArrayList(c.element().getValue()).get(0);
+    TableDestination tableDestination = c.element().getKey().getKey();
+    Integer partition = c.element().getKey().getValue();
+    List<String> partitionFiles = Lists.newArrayList(c.element().getValue()).get(0);
+    // Job ID must be different for each partition of each table.
     String jobIdPrefix = String.format(
-        c.sideInput(jobIdToken) + "_%05d", c.element().getKey());
-    TableReference ref = BigQueryHelpers.fromJsonString(jsonTableRef.get(),
-        TableReference.class);
+        c.sideInput(jobIdToken) + "0x%08x_%05d", tableDestination.hashCode(), partition);
+
+    TableReference ref = tableDestination.getTableReference();
     if (!singlePartition) {
       ref.setTableId(jobIdPrefix);
     }
@@ -108,15 +106,14 @@ class WriteTables extends DoFn<KV<Long, Iterable<List<String>>>, String> {
         bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class)),
         jobIdPrefix,
         ref,
-        BigQueryHelpers.fromJsonString(
-            jsonSchema == null ? null : jsonSchema.get(), TableSchema.class),
-        partition,
+        schemaFunction.apply(tableDestination),
+        partitionFiles,
         writeDisposition,
         createDisposition,
-        tableDescription);
-    c.output(BigQueryHelpers.toJsonString(ref));
+        tableDestination.getTableDescription());
+    c.output(KV.of(tableDestination, BigQueryHelpers.toJsonString(ref)));
 
-    removeTemporaryFiles(c.getPipelineOptions(), tempFilePrefix, partition);
+    removeTemporaryFiles(c.getPipelineOptions(), tempFilePrefix, partitionFiles);
   }
 
   private void load(
@@ -202,12 +199,6 @@ class WriteTables extends DoFn<KV<Long, Iterable<List<String>>>, String> {
 
     builder
         .addIfNotNull(DisplayData.item("tempFilePrefix", tempFilePrefix)
-            .withLabel("Temporary File Prefix"))
-        .addIfNotNull(DisplayData.item("jsonTableRef", jsonTableRef)
-            .withLabel("Table Reference"))
-        .addIfNotNull(DisplayData.item("jsonSchema", jsonSchema)
-            .withLabel("Table Schema"))
-        .addIfNotNull(DisplayData.item("tableDescription", tableDescription)
-            .withLabel("Table Description"));
+            .withLabel("Temporary File Prefix"));
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/8581caf3/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index d953edd..af39483 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -2078,26 +2078,27 @@ public class BigQueryIOTest implements Serializable {
       files.add(KV.of(fileName, fileSize));
     }
 
-    TupleTag<KV<Long, List<String>>> multiPartitionsTag =
-        new TupleTag<KV<Long, List<String>>>("multiPartitionsTag") {};
-    TupleTag<KV<Long, List<String>>> singlePartitionTag =
-        new TupleTag<KV<Long, List<String>>>("singlePartitionTag") {};
-
-    PCollection<KV<String, Long>> filesPCollection =
-        p.apply(Create.of(files).withType(new TypeDescriptor<KV<String, Long>>() {}));
-    PCollectionView<Iterable<KV<String, Long>>> filesView = PCollectionViews.iterableView(
-        filesPCollection,
+    TupleTag<KV<KV<TableDestination, Integer>, List<String>>> multiPartitionsTag =
+        new TupleTag<KV<KV<TableDestination, Integer>, List<String>>>("multiPartitionsTag") {};
+    TupleTag<KV<KV<TableDestination, Integer>, List<String>>> singlePartitionTag =
+        new TupleTag<KV<KV<TableDestination, Integer>, List<String>>>("singlePartitionTag") {};
+
+    PCollectionView<Iterable<WriteBundlesToFiles.Result>> resultsView =
+        PCollectionViews.iterableView(
+        p,
         WindowingStrategy.globalDefault(),
         KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
 
     WritePartition writePartition =
-        new WritePartition(filesView, multiPartitionsTag, singlePartitionTag);
+        new WritePartition(null, null, resultsView,
+            multiPartitionsTag, singlePartitionTag);
 
-    DoFnTester<String, KV<Long, List<String>>> tester = DoFnTester.of(writePartition);
-    tester.setSideInput(filesView, GlobalWindow.INSTANCE, files);
+    DoFnTester<String, KV<KV<TableDestination, Integer>, List<String>>> tester =
+        DoFnTester.of(writePartition);
+    tester.setSideInput(resultsView, GlobalWindow.INSTANCE, files);
     tester.processElement(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
-    List<KV<Long, List<String>>> partitions;
+    List<KV<KV<TableDestination, Integer>, List<String>>> partitions;
     if (expectedNumPartitions > 1) {
       partitions = tester.takeOutputElements(multiPartitionsTag);
     } else {

[46/50] [abbrv] beam git commit: Cache result of BigQuerySourceBase.split

Posted by dh...@apache.org.

Cache result of BigQuerySourceBase.split


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1533e2b9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1533e2b9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1533e2b9

Branch: refs/heads/DSL_SQL
Commit: 1533e2b9bc49971929277b804587d93d8d2cae4c
Parents: 29e054a
Author: Eugene Kirpichov <ki...@google.com>
Authored: Wed Apr 19 10:09:42 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Wed Apr 19 11:39:21 2017 -0700

----------------------------------------------------------------------
 .../sdk/io/gcp/bigquery/BigQuerySourceBase.java | 31 +++++++++++++-------
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 18 +++++-------
 .../sdk/io/gcp/bigquery/FakeJobService.java     |  9 ++++++
 3 files changed, 37 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/1533e2b9/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java
index 1b90dc3..4142da9 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java
@@ -69,6 +69,8 @@ abstract class BigQuerySourceBase extends BoundedSource<TableRow> {
   protected final BigQueryServices bqServices;
   protected final ValueProvider<String> executingProject;
 
+  private List<BoundedSource<TableRow>> cachedSplitResult;
+
   BigQuerySourceBase(
       ValueProvider<String> jobIdToken,
       String extractDestinationDir,
@@ -83,17 +85,24 @@ abstract class BigQuerySourceBase extends BoundedSource<TableRow> {
   @Override
   public List<BoundedSource<TableRow>> split(
       long desiredBundleSizeBytes, PipelineOptions options) throws Exception {
-    BigQueryOptions bqOptions = options.as(BigQueryOptions.class);
-    TableReference tableToExtract = getTableToExtract(bqOptions);
-    JobService jobService = bqServices.getJobService(bqOptions);
-    String extractJobId = BigQueryIO.getExtractJobId(jobIdToken);
-    List<String> tempFiles = executeExtract(extractJobId, tableToExtract, jobService);
-
-    TableSchema tableSchema = bqServices.getDatasetService(bqOptions)
-        .getTable(tableToExtract).getSchema();
-
-    cleanupTempResource(bqOptions);
-    return createSources(tempFiles, tableSchema);
+    // split() can be called multiple times, e.g. Dataflow runner may call it multiple times
+    // with different desiredBundleSizeBytes in case the split() call produces too many sources.
+    // We ignore desiredBundleSizeBytes anyway, however in any case, we should not initiate
+    // another BigQuery extract job for the repeated split() calls.
+    if (cachedSplitResult == null) {
+      BigQueryOptions bqOptions = options.as(BigQueryOptions.class);
+      TableReference tableToExtract = getTableToExtract(bqOptions);
+      JobService jobService = bqServices.getJobService(bqOptions);
+      String extractJobId = BigQueryIO.getExtractJobId(jobIdToken);
+      List<String> tempFiles = executeExtract(extractJobId, tableToExtract, jobService);
+
+      TableSchema tableSchema = bqServices.getDatasetService(bqOptions)
+          .getTable(tableToExtract).getSchema();
+
+      cleanupTempResource(bqOptions);
+      cachedSplitResult = checkNotNull(createSources(tempFiles, tableSchema));
+    }
+    return cachedSplitResult;
   }
 
   protected abstract TableReference getTableToExtract(BigQueryOptions bqOptions) throws Exception;

http://git-wip-us.apache.org/repos/asf/beam/blob/1533e2b9/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index d0004e4..62c5b5f 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -28,7 +28,6 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-
 import com.google.api.client.util.Data;
 import com.google.api.services.bigquery.model.Job;
 import com.google.api.services.bigquery.model.JobStatistics;
@@ -1230,17 +1229,10 @@ public class BigQueryIOTest implements Serializable {
 
   @Test
   public void testBigQueryTableSourceInitSplit() throws Exception {
-    Job extractJob = new Job();
-    JobStatistics jobStats = new JobStatistics();
-    JobStatistics4 extractStats = new JobStatistics4();
-    extractStats.setDestinationUriFileCounts(ImmutableList.of(1L));
-    jobStats.setExtract(extractStats);
-    extractJob.setStatus(new JobStatus())
-        .setStatistics(jobStats);
-
     FakeDatasetService fakeDatasetService = new FakeDatasetService();
+    FakeJobService fakeJobService = new FakeJobService();
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(new FakeJobService())
+        .withJobService(fakeJobService)
         .withDatasetService(fakeDatasetService);
 
     List<TableRow> expected = ImmutableList.of(
@@ -1280,8 +1272,14 @@ public class BigQueryIOTest implements Serializable {
 
     List<? extends BoundedSource<TableRow>> sources = bqSource.split(100, options);
     assertEquals(2, sources.size());
+    // Simulate a repeated call to split(), like a Dataflow worker will sometimes do.
+    sources = bqSource.split(200, options);
+    assertEquals(2, sources.size());
     BoundedSource<TableRow> actual = sources.get(0);
     assertThat(actual, CoreMatchers.instanceOf(TransformingSource.class));
+
+    // A repeated call to split() should not have caused a duplicate extract job.
+    assertEquals(1, fakeJobService.getNumExtractJobCalls());
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/beam/blob/1533e2b9/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java
index a2454fb..cffd873 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java
@@ -95,6 +95,7 @@ class FakeJobService implements JobService, Serializable {
 
   private static final com.google.common.collect.Table<String, String, JobInfo> allJobs =
       HashBasedTable.create();
+  private static int numExtractJobCalls = 0;
 
   private static final com.google.common.collect.Table<String, String, List<String>>
       filesForLoadJobs = HashBasedTable.create();
@@ -136,6 +137,8 @@ class FakeJobService implements JobService, Serializable {
     checkArgument(extractConfig.getDestinationFormat().equals("AVRO"),
         "Only extract to AVRO is supported");
     synchronized (allJobs) {
+      ++numExtractJobCalls;
+
       Job job = new Job();
       job.setJobReference(jobRef);
       job.setConfiguration(new JobConfiguration().setExtract(extractConfig));
@@ -145,6 +148,12 @@ class FakeJobService implements JobService, Serializable {
     }
   }
 
+  public int getNumExtractJobCalls() {
+    synchronized (allJobs) {
+      return numExtractJobCalls;
+    }
+  }
+
   @Override
   public void startQueryJob(JobReference jobRef, JobConfigurationQuery query)
       throws IOException, InterruptedException {

[25/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java
new file mode 100644
index 0000000..4c826d1
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java
@@ -0,0 +1,600 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.streaming;
+
+import static org.hamcrest.Matchers.emptyIterable;
+import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import com.google.common.base.Function;
+import com.google.common.base.Predicate;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import java.util.Collections;
+import java.util.HashMap;
+import javax.annotation.Nullable;
+import org.apache.beam.runners.core.StatefulDoFnRunner;
+import org.apache.beam.runners.flink.FlinkPipelineOptions;
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.VarIntCoder;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.testing.PCollectionViewTesting;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.FixedWindows;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.TimeDomain;
+import org.apache.beam.sdk.util.Timer;
+import org.apache.beam.sdk.util.TimerSpec;
+import org.apache.beam.sdk.util.TimerSpecs;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.util.state.StateSpec;
+import org.apache.beam.sdk.util.state.StateSpecs;
+import org.apache.beam.sdk.util.state.ValueState;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
+import org.apache.flink.api.java.functions.KeySelector;
+import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+import org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness;
+import org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness;
+import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness;
+import org.apache.flink.streaming.util.TwoInputStreamOperatorTestHarness;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link DoFnOperator}.
+ */
+@RunWith(JUnit4.class)
+public class DoFnOperatorTest {
+
+  // views and windows for testing side inputs
+  private static final long WINDOW_MSECS_1 = 100;
+  private static final long WINDOW_MSECS_2 = 500;
+
+  private WindowingStrategy<Object, IntervalWindow> windowingStrategy1 =
+      WindowingStrategy.of(FixedWindows.of(new Duration(WINDOW_MSECS_1)));
+
+  private PCollectionView<Iterable<String>> view1 =
+      PCollectionViewTesting.testingView(
+          new TupleTag<Iterable<WindowedValue<String>>>() {},
+          new PCollectionViewTesting.IdentityViewFn<String>(),
+          StringUtf8Coder.of(),
+          windowingStrategy1);
+
+  private WindowingStrategy<Object, IntervalWindow> windowingStrategy2 =
+      WindowingStrategy.of(FixedWindows.of(new Duration(WINDOW_MSECS_2)));
+
+  private PCollectionView<Iterable<String>> view2 =
+      PCollectionViewTesting.testingView(
+          new TupleTag<Iterable<WindowedValue<String>>>() {},
+          new PCollectionViewTesting.IdentityViewFn<String>(),
+          StringUtf8Coder.of(),
+          windowingStrategy2);
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testSingleOutput() throws Exception {
+
+    WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
+        WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
+
+    TupleTag<String> outputTag = new TupleTag<>("main-output");
+
+    DoFnOperator<String, String, String> doFnOperator = new DoFnOperator<>(
+        new IdentityDoFn<String>(),
+        windowedValueCoder,
+        outputTag,
+        Collections.<TupleTag<?>>emptyList(),
+        new DoFnOperator.DefaultOutputManagerFactory(),
+        WindowingStrategy.globalDefault(),
+        new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
+        Collections.<PCollectionView<?>>emptyList(), /* side inputs */
+        PipelineOptionsFactory.as(FlinkPipelineOptions.class),
+        null);
+
+    OneInputStreamOperatorTestHarness<WindowedValue<String>, String> testHarness =
+        new OneInputStreamOperatorTestHarness<>(doFnOperator);
+
+    testHarness.open();
+
+    testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("Hello")));
+
+    assertThat(
+        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        contains(WindowedValue.valueInGlobalWindow("Hello")));
+
+    testHarness.close();
+  }
+
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testMultiOutputOutput() throws Exception {
+
+    WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
+        WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
+
+    TupleTag<String> mainOutput = new TupleTag<>("main-output");
+    TupleTag<String> additionalOutput1 = new TupleTag<>("output-1");
+    TupleTag<String> additionalOutput2 = new TupleTag<>("output-2");
+    ImmutableMap<TupleTag<?>, Integer> outputMapping = ImmutableMap.<TupleTag<?>, Integer>builder()
+        .put(mainOutput, 1)
+        .put(additionalOutput1, 2)
+        .put(additionalOutput2, 3)
+        .build();
+
+    DoFnOperator<String, String, RawUnionValue> doFnOperator = new DoFnOperator<>(
+        new MultiOutputDoFn(additionalOutput1, additionalOutput2),
+        windowedValueCoder,
+        mainOutput,
+        ImmutableList.<TupleTag<?>>of(additionalOutput1, additionalOutput2),
+        new DoFnOperator.MultiOutputOutputManagerFactory(outputMapping),
+        WindowingStrategy.globalDefault(),
+        new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
+        Collections.<PCollectionView<?>>emptyList(), /* side inputs */
+        PipelineOptionsFactory.as(FlinkPipelineOptions.class),
+        null);
+
+    OneInputStreamOperatorTestHarness<WindowedValue<String>, RawUnionValue> testHarness =
+        new OneInputStreamOperatorTestHarness<>(doFnOperator);
+
+    testHarness.open();
+
+    testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("one")));
+    testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("two")));
+    testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("hello")));
+
+    assertThat(
+        this.stripStreamRecordFromRawUnion(testHarness.getOutput()),
+        contains(
+            new RawUnionValue(2, WindowedValue.valueInGlobalWindow("extra: one")),
+            new RawUnionValue(3, WindowedValue.valueInGlobalWindow("extra: two")),
+            new RawUnionValue(1, WindowedValue.valueInGlobalWindow("got: hello")),
+            new RawUnionValue(2, WindowedValue.valueInGlobalWindow("got: hello")),
+            new RawUnionValue(3, WindowedValue.valueInGlobalWindow("got: hello"))));
+
+    testHarness.close();
+  }
+
+  @Test
+  public void testLateDroppingForStatefulFn() throws Exception {
+
+    WindowingStrategy<Object, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(new Duration(10)));
+
+    DoFn<Integer, String> fn = new DoFn<Integer, String>() {
+
+      @StateId("state")
+      private final StateSpec<Object, ValueState<String>> stateSpec =
+          StateSpecs.value(StringUtf8Coder.of());
+
+      @ProcessElement
+      public void processElement(ProcessContext context) {
+        context.output(context.element().toString());
+      }
+    };
+
+    WindowedValue.FullWindowedValueCoder<Integer> windowedValueCoder =
+        WindowedValue.getFullCoder(
+            VarIntCoder.of(),
+            windowingStrategy.getWindowFn().windowCoder());
+
+    TupleTag<String> outputTag = new TupleTag<>("main-output");
+
+    DoFnOperator<Integer, String, WindowedValue<String>> doFnOperator = new DoFnOperator<>(
+        fn,
+        windowedValueCoder,
+        outputTag,
+        Collections.<TupleTag<?>>emptyList(),
+        new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<String>>(),
+        windowingStrategy,
+        new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
+        Collections.<PCollectionView<?>>emptyList(), /* side inputs */
+        PipelineOptionsFactory.as(FlinkPipelineOptions.class),
+        VarIntCoder.of() /* key coder */);
+
+    OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness =
+        new KeyedOneInputStreamOperatorTestHarness<>(
+            doFnOperator,
+            new KeySelector<WindowedValue<Integer>, Integer>() {
+              @Override
+              public Integer getKey(WindowedValue<Integer> integerWindowedValue) throws Exception {
+                return integerWindowedValue.getValue();
+              }
+            },
+            new CoderTypeInformation<>(VarIntCoder.of()));
+
+    testHarness.open();
+
+    testHarness.processWatermark(0);
+
+    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
+
+    // this should not be late
+    testHarness.processElement(
+        new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
+
+    assertThat(
+        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
+
+    testHarness.getOutput().clear();
+
+    testHarness.processWatermark(9);
+
+    // this should still not be considered late
+    testHarness.processElement(
+        new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
+
+    assertThat(
+        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
+
+    testHarness.getOutput().clear();
+
+    testHarness.processWatermark(10);
+
+    // this should now be considered late
+    testHarness.processElement(
+        new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
+
+    assertThat(
+        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        emptyIterable());
+
+    testHarness.close();
+  }
+
+  @Test
+  public void testStateGCForStatefulFn() throws Exception {
+
+    WindowingStrategy<Object, IntervalWindow> windowingStrategy =
+        WindowingStrategy.of(FixedWindows.of(new Duration(10))).withAllowedLateness(Duration.ZERO);
+
+    final String timerId = "boo";
+    final String stateId = "dazzle";
+
+    final int offset = 5000;
+    final int timerOutput = 4093;
+
+    DoFn<KV<String, Integer>, KV<String, Integer>> fn =
+        new DoFn<KV<String, Integer>, KV<String, Integer>>() {
+
+          @TimerId(timerId)
+          private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
+
+          @StateId(stateId)
+          private final StateSpec<Object, ValueState<String>> stateSpec =
+              StateSpecs.value(StringUtf8Coder.of());
+
+          @ProcessElement
+          public void processElement(
+              ProcessContext context,
+              @TimerId(timerId) Timer timer,
+              @StateId(stateId) ValueState<String> state,
+              BoundedWindow window) {
+            timer.set(window.maxTimestamp());
+            state.write(context.element().getKey());
+            context.output(
+                KV.of(context.element().getKey(), context.element().getValue() + offset));
+          }
+
+          @OnTimer(timerId)
+          public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
+            context.output(KV.of(state.read(), timerOutput));
+          }
+        };
+
+    WindowedValue.FullWindowedValueCoder<KV<String, Integer>> windowedValueCoder =
+        WindowedValue.getFullCoder(
+            KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()),
+            windowingStrategy.getWindowFn().windowCoder());
+
+    TupleTag<KV<String, Integer>> outputTag = new TupleTag<>("main-output");
+
+    DoFnOperator<
+        KV<String, Integer>, KV<String, Integer>, WindowedValue<KV<String, Integer>>> doFnOperator =
+        new DoFnOperator<>(
+            fn,
+            windowedValueCoder,
+            outputTag,
+            Collections.<TupleTag<?>>emptyList(),
+            new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<String, Integer>>>(),
+            windowingStrategy,
+            new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
+            Collections.<PCollectionView<?>>emptyList(), /* side inputs */
+            PipelineOptionsFactory.as(FlinkPipelineOptions.class),
+            StringUtf8Coder.of() /* key coder */);
+
+    KeyedOneInputStreamOperatorTestHarness<
+        String,
+        WindowedValue<KV<String, Integer>>,
+        WindowedValue<KV<String, Integer>>> testHarness =
+        new KeyedOneInputStreamOperatorTestHarness<>(
+            doFnOperator,
+            new KeySelector<WindowedValue<KV<String, Integer>>, String>() {
+              @Override
+              public String getKey(
+                  WindowedValue<KV<String, Integer>> kvWindowedValue) throws Exception {
+                return kvWindowedValue.getValue().getKey();
+              }
+            },
+            new CoderTypeInformation<>(StringUtf8Coder.of()));
+
+    testHarness.open();
+
+    testHarness.processWatermark(0);
+
+    assertEquals(0, testHarness.numKeyedStateEntries());
+
+    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
+
+    testHarness.processElement(
+        new StreamRecord<>(
+            WindowedValue.of(KV.of("key1", 5), new Instant(1), window1, PaneInfo.NO_FIRING)));
+
+    testHarness.processElement(
+        new StreamRecord<>(
+            WindowedValue.of(KV.of("key2", 7), new Instant(3), window1, PaneInfo.NO_FIRING)));
+
+    assertThat(
+        this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        contains(
+            WindowedValue.of(
+                KV.of("key1", 5 + offset), new Instant(1), window1, PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                KV.of("key2", 7 + offset), new Instant(3), window1, PaneInfo.NO_FIRING)));
+
+    assertEquals(2, testHarness.numKeyedStateEntries());
+
+    testHarness.getOutput().clear();
+
+    // this should trigger both the window.maxTimestamp() timer and the GC timer
+    // this tests that the GC timer fires after the user timer
+    testHarness.processWatermark(
+        window1.maxTimestamp()
+            .plus(windowingStrategy.getAllowedLateness())
+            .plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS)
+            .getMillis());
+
+    assertThat(
+        this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        contains(
+            WindowedValue.of(
+                KV.of("key1", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING),
+            WindowedValue.of(
+                KV.of("key2", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING)));
+
+    // ensure the state was garbage collected
+    assertEquals(0, testHarness.numKeyedStateEntries());
+
+    testHarness.close();
+  }
+
+  public void testSideInputs(boolean keyed) throws Exception {
+
+    WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
+        WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
+
+    TupleTag<String> outputTag = new TupleTag<>("main-output");
+
+    ImmutableMap<Integer, PCollectionView<?>> sideInputMapping =
+        ImmutableMap.<Integer, PCollectionView<?>>builder()
+            .put(1, view1)
+            .put(2, view2)
+            .build();
+
+    Coder<String> keyCoder = null;
+    if (keyed) {
+      keyCoder = StringUtf8Coder.of();
+    }
+
+    DoFnOperator<String, String, String> doFnOperator = new DoFnOperator<>(
+        new IdentityDoFn<String>(),
+        windowedValueCoder,
+        outputTag,
+        Collections.<TupleTag<?>>emptyList(),
+        new DoFnOperator.DefaultOutputManagerFactory<String>(),
+        WindowingStrategy.globalDefault(),
+        sideInputMapping, /* side-input mapping */
+        ImmutableList.<PCollectionView<?>>of(view1, view2), /* side inputs */
+        PipelineOptionsFactory.as(FlinkPipelineOptions.class),
+        keyCoder);
+
+    TwoInputStreamOperatorTestHarness<WindowedValue<String>, RawUnionValue, String> testHarness =
+        new TwoInputStreamOperatorTestHarness<>(doFnOperator);
+
+    if (keyed) {
+      // we use a dummy key for the second input since it is considered to be broadcast
+      testHarness = new KeyedTwoInputStreamOperatorTestHarness<>(
+          doFnOperator,
+          new StringKeySelector(),
+          new DummyKeySelector(),
+          BasicTypeInfo.STRING_TYPE_INFO);
+    }
+
+    testHarness.open();
+
+    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(100));
+    IntervalWindow secondWindow = new IntervalWindow(new Instant(0), new Instant(500));
+
+    // test the keep of sideInputs events
+    testHarness.processElement2(
+        new StreamRecord<>(
+            new RawUnionValue(
+                1,
+                valuesInWindow(ImmutableList.of("hello", "ciao"), new Instant(0), firstWindow))));
+    testHarness.processElement2(
+        new StreamRecord<>(
+            new RawUnionValue(
+                2,
+                valuesInWindow(ImmutableList.of("foo", "bar"), new Instant(0), secondWindow))));
+
+    // push in a regular elements
+    WindowedValue<String> helloElement = valueInWindow("Hello", new Instant(0), firstWindow);
+    WindowedValue<String> worldElement = valueInWindow("World", new Instant(1000), firstWindow);
+    testHarness.processElement1(new StreamRecord<>(helloElement));
+    testHarness.processElement1(new StreamRecord<>(worldElement));
+
+    // test the keep of pushed-back events
+    testHarness.processElement2(
+        new StreamRecord<>(
+            new RawUnionValue(
+                1,
+                valuesInWindow(ImmutableList.of("hello", "ciao"),
+                    new Instant(1000), firstWindow))));
+    testHarness.processElement2(
+        new StreamRecord<>(
+            new RawUnionValue(
+                2,
+                valuesInWindow(ImmutableList.of("foo", "bar"), new Instant(1000), secondWindow))));
+
+    assertThat(
+        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
+        contains(helloElement, worldElement));
+
+    testHarness.close();
+
+  }
+
+  /**
+   * {@link TwoInputStreamOperatorTestHarness} support OperatorStateBackend,
+   * but don't support KeyedStateBackend. So we just test sideInput of normal ParDo.
+   */
+  @Test
+  @SuppressWarnings("unchecked")
+  public void testNormalParDoSideInputs() throws Exception {
+    testSideInputs(false);
+  }
+
+  @Test
+  public void testKeyedSideInputs() throws Exception {
+    testSideInputs(true);
+  }
+
+  private <T> Iterable<WindowedValue<T>> stripStreamRecordFromWindowedValue(
+      Iterable<Object> input) {
+
+    return FluentIterable.from(input).filter(new Predicate<Object>() {
+      @Override
+      public boolean apply(@Nullable Object o) {
+        return o instanceof StreamRecord && ((StreamRecord) o).getValue() instanceof WindowedValue;
+      }
+    }).transform(new Function<Object, WindowedValue<T>>() {
+      @Nullable
+      @Override
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      public WindowedValue<T> apply(@Nullable Object o) {
+        if (o instanceof StreamRecord && ((StreamRecord) o).getValue() instanceof WindowedValue) {
+          return (WindowedValue) ((StreamRecord) o).getValue();
+        }
+        throw new RuntimeException("unreachable");
+      }
+    });
+  }
+
+  private Iterable<RawUnionValue> stripStreamRecordFromRawUnion(Iterable<Object> input) {
+    return FluentIterable.from(input).filter(new Predicate<Object>() {
+      @Override
+      public boolean apply(@Nullable Object o) {
+        return o instanceof StreamRecord && ((StreamRecord) o).getValue() instanceof RawUnionValue;
+      }
+    }).transform(new Function<Object, RawUnionValue>() {
+      @Nullable
+      @Override
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      public RawUnionValue apply(@Nullable Object o) {
+        if (o instanceof StreamRecord && ((StreamRecord) o).getValue() instanceof RawUnionValue) {
+          return (RawUnionValue) ((StreamRecord) o).getValue();
+        }
+        throw new RuntimeException("unreachable");
+      }
+    });
+  }
+
+  private static class MultiOutputDoFn extends DoFn<String, String> {
+    private TupleTag<String> additionalOutput1;
+    private TupleTag<String> additionalOutput2;
+
+    public MultiOutputDoFn(TupleTag<String> additionalOutput1, TupleTag<String> additionalOutput2) {
+      this.additionalOutput1 = additionalOutput1;
+      this.additionalOutput2 = additionalOutput2;
+    }
+
+    @ProcessElement
+    public void processElement(ProcessContext c) throws Exception {
+      if (c.element().equals("one")) {
+        c.output(additionalOutput1, "extra: one");
+      } else if (c.element().equals("two")) {
+        c.output(additionalOutput2, "extra: two");
+      } else {
+        c.output("got: " + c.element());
+        c.output(additionalOutput1, "got: " + c.element());
+        c.output(additionalOutput2, "got: " + c.element());
+      }
+    }
+  }
+
+  private static class IdentityDoFn<T> extends DoFn<T, T> {
+    @ProcessElement
+    public void processElement(ProcessContext c) throws Exception {
+      c.output(c.element());
+    }
+  }
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  private WindowedValue<Iterable<?>> valuesInWindow(
+      Iterable<?> values, Instant timestamp, BoundedWindow window) {
+    return (WindowedValue) WindowedValue.of(values, timestamp, window, PaneInfo.NO_FIRING);
+  }
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  private <T> WindowedValue<T> valueInWindow(
+      T value, Instant timestamp, BoundedWindow window) {
+    return WindowedValue.of(value, timestamp, window, PaneInfo.NO_FIRING);
+  }
+
+
+  private static class DummyKeySelector implements KeySelector<RawUnionValue, String> {
+    @Override
+    public String getKey(RawUnionValue stringWindowedValue) throws Exception {
+      return "dummy_key";
+    }
+  }
+
+  private static class StringKeySelector implements KeySelector<WindowedValue<String>, String> {
+    @Override
+    public String getKey(WindowedValue<String> stringWindowedValue) throws Exception {
+      return stringWindowedValue.getValue();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkBroadcastStateInternalsTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkBroadcastStateInternalsTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkBroadcastStateInternalsTest.java
new file mode 100644
index 0000000..7e7d1e1
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkBroadcastStateInternalsTest.java
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.streaming;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertThat;
+
+import java.util.Arrays;
+import org.apache.beam.runners.core.StateMerging;
+import org.apache.beam.runners.core.StateNamespace;
+import org.apache.beam.runners.core.StateNamespaceForTest;
+import org.apache.beam.runners.core.StateTag;
+import org.apache.beam.runners.core.StateTags;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkBroadcastStateInternals;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.VarIntCoder;
+import org.apache.beam.sdk.transforms.Sum;
+import org.apache.beam.sdk.util.state.BagState;
+import org.apache.beam.sdk.util.state.CombiningState;
+import org.apache.beam.sdk.util.state.GroupingState;
+import org.apache.beam.sdk.util.state.ReadableState;
+import org.apache.beam.sdk.util.state.ValueState;
+import org.apache.flink.runtime.operators.testutils.DummyEnvironment;
+import org.apache.flink.runtime.state.OperatorStateBackend;
+import org.apache.flink.runtime.state.memory.MemoryStateBackend;
+import org.hamcrest.Matchers;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link FlinkBroadcastStateInternals}. This is based on the tests for
+ * {@code InMemoryStateInternals}.
+ */
+@RunWith(JUnit4.class)
+public class FlinkBroadcastStateInternalsTest {
+  private static final StateNamespace NAMESPACE_1 = new StateNamespaceForTest("ns1");
+  private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
+  private static final StateNamespace NAMESPACE_3 = new StateNamespaceForTest("ns3");
+
+  private static final StateTag<Object, ValueState<String>> STRING_VALUE_ADDR =
+      StateTags.value("stringValue", StringUtf8Coder.of());
+  private static final StateTag<Object, CombiningState<Integer, int[], Integer>>
+      SUM_INTEGER_ADDR = StateTags.combiningValueFromInputInternal(
+          "sumInteger", VarIntCoder.of(), Sum.ofIntegers());
+  private static final StateTag<Object, BagState<String>> STRING_BAG_ADDR =
+      StateTags.bag("stringBag", StringUtf8Coder.of());
+
+  FlinkBroadcastStateInternals<String> underTest;
+
+  @Before
+  public void initStateInternals() {
+    MemoryStateBackend backend = new MemoryStateBackend();
+    try {
+      OperatorStateBackend operatorStateBackend =
+          backend.createOperatorStateBackend(new DummyEnvironment("test", 1, 0), "");
+      underTest = new FlinkBroadcastStateInternals<>(1, operatorStateBackend);
+
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Test
+  public void testValue() throws Exception {
+    ValueState<String> value = underTest.state(NAMESPACE_1, STRING_VALUE_ADDR);
+
+    assertEquals(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), value);
+    assertNotEquals(
+        underTest.state(NAMESPACE_2, STRING_VALUE_ADDR),
+        value);
+
+    assertThat(value.read(), Matchers.nullValue());
+    value.write("hello");
+    assertThat(value.read(), Matchers.equalTo("hello"));
+    value.write("world");
+    assertThat(value.read(), Matchers.equalTo("world"));
+
+    value.clear();
+    assertThat(value.read(), Matchers.nullValue());
+    assertEquals(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), value);
+
+  }
+
+  @Test
+  public void testBag() throws Exception {
+    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+
+    assertEquals(value, underTest.state(NAMESPACE_1, STRING_BAG_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, STRING_BAG_ADDR)));
+
+    assertThat(value.read(), Matchers.emptyIterable());
+    value.add("hello");
+    assertThat(value.read(), Matchers.containsInAnyOrder("hello"));
+
+    value.add("world");
+    assertThat(value.read(), Matchers.containsInAnyOrder("hello", "world"));
+
+    value.clear();
+    assertThat(value.read(), Matchers.emptyIterable());
+    assertEquals(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), value);
+
+  }
+
+  @Test
+  public void testBagIsEmpty() throws Exception {
+    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+
+    assertThat(value.isEmpty().read(), Matchers.is(true));
+    ReadableState<Boolean> readFuture = value.isEmpty();
+    value.add("hello");
+    assertThat(readFuture.read(), Matchers.is(false));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.is(true));
+  }
+
+  @Test
+  public void testMergeBagIntoSource() throws Exception {
+    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
+
+    bag1.add("Hello");
+    bag2.add("World");
+    bag1.add("!");
+
+    StateMerging.mergeBags(Arrays.asList(bag1, bag2), bag1);
+
+    // Reading the merged bag gets both the contents
+    assertThat(bag1.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+    assertThat(bag2.read(), Matchers.emptyIterable());
+  }
+
+  @Test
+  public void testMergeBagIntoNewNamespace() throws Exception {
+    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
+    BagState<String> bag3 = underTest.state(NAMESPACE_3, STRING_BAG_ADDR);
+
+    bag1.add("Hello");
+    bag2.add("World");
+    bag1.add("!");
+
+    StateMerging.mergeBags(Arrays.asList(bag1, bag2, bag3), bag3);
+
+    // Reading the merged bag gets both the contents
+    assertThat(bag3.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+    assertThat(bag1.read(), Matchers.emptyIterable());
+    assertThat(bag2.read(), Matchers.emptyIterable());
+  }
+
+  @Test
+  public void testCombiningValue() throws Exception {
+    GroupingState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+
+    // State instances are cached, but depend on the namespace.
+    assertEquals(value, underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR)));
+
+    assertThat(value.read(), Matchers.equalTo(0));
+    value.add(2);
+    assertThat(value.read(), Matchers.equalTo(2));
+
+    value.add(3);
+    assertThat(value.read(), Matchers.equalTo(5));
+
+    value.clear();
+    assertThat(value.read(), Matchers.equalTo(0));
+    assertEquals(underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR), value);
+  }
+
+  @Test
+  public void testCombiningIsEmpty() throws Exception {
+    GroupingState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+
+    assertThat(value.isEmpty().read(), Matchers.is(true));
+    ReadableState<Boolean> readFuture = value.isEmpty();
+    value.add(5);
+    assertThat(readFuture.read(), Matchers.is(false));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.is(true));
+  }
+
+  @Test
+  public void testMergeCombiningValueIntoSource() throws Exception {
+    CombiningState<Integer, int[], Integer> value1 =
+        underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+    CombiningState<Integer, int[], Integer> value2 =
+        underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
+
+    value1.add(5);
+    value2.add(10);
+    value1.add(6);
+
+    assertThat(value1.read(), Matchers.equalTo(11));
+    assertThat(value2.read(), Matchers.equalTo(10));
+
+    // Merging clears the old values and updates the result value.
+    StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value1);
+
+    assertThat(value1.read(), Matchers.equalTo(21));
+    assertThat(value2.read(), Matchers.equalTo(0));
+  }
+
+  @Test
+  public void testMergeCombiningValueIntoNewNamespace() throws Exception {
+    CombiningState<Integer, int[], Integer> value1 =
+        underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+    CombiningState<Integer, int[], Integer> value2 =
+        underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
+    CombiningState<Integer, int[], Integer> value3 =
+        underTest.state(NAMESPACE_3, SUM_INTEGER_ADDR);
+
+    value1.add(5);
+    value2.add(10);
+    value1.add(6);
+
+    StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value3);
+
+    // Merging clears the old values and updates the result value.
+    assertThat(value1.read(), Matchers.equalTo(0));
+    assertThat(value2.read(), Matchers.equalTo(0));
+    assertThat(value3.read(), Matchers.equalTo(21));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkKeyGroupStateInternalsTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkKeyGroupStateInternalsTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkKeyGroupStateInternalsTest.java
new file mode 100644
index 0000000..5433d07
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkKeyGroupStateInternalsTest.java
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.streaming;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.apache.beam.runners.core.StateMerging;
+import org.apache.beam.runners.core.StateNamespace;
+import org.apache.beam.runners.core.StateNamespaceForTest;
+import org.apache.beam.runners.core.StateTag;
+import org.apache.beam.runners.core.StateTags;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkKeyGroupStateInternals;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.util.state.BagState;
+import org.apache.beam.sdk.util.state.ReadableState;
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.api.java.typeutils.GenericTypeInfo;
+import org.apache.flink.runtime.jobgraph.JobVertexID;
+import org.apache.flink.runtime.operators.testutils.DummyEnvironment;
+import org.apache.flink.runtime.query.KvStateRegistry;
+import org.apache.flink.runtime.state.AbstractKeyedStateBackend;
+import org.apache.flink.runtime.state.KeyGroupRange;
+import org.apache.flink.runtime.state.KeyedStateBackend;
+import org.apache.flink.runtime.state.memory.MemoryStateBackend;
+import org.apache.flink.streaming.api.operators.KeyContext;
+import org.hamcrest.Matchers;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link FlinkKeyGroupStateInternals}. This is based on the tests for
+ * {@code InMemoryStateInternals}.
+ */
+@RunWith(JUnit4.class)
+public class FlinkKeyGroupStateInternalsTest {
+  private static final StateNamespace NAMESPACE_1 = new StateNamespaceForTest("ns1");
+  private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
+  private static final StateNamespace NAMESPACE_3 = new StateNamespaceForTest("ns3");
+
+  private static final StateTag<Object, BagState<String>> STRING_BAG_ADDR =
+      StateTags.bag("stringBag", StringUtf8Coder.of());
+
+  FlinkKeyGroupStateInternals<String> underTest;
+  private KeyedStateBackend keyedStateBackend;
+
+  @Before
+  public void initStateInternals() {
+    try {
+      keyedStateBackend = getKeyedStateBackend(2, new KeyGroupRange(0, 1));
+      underTest = new FlinkKeyGroupStateInternals<>(StringUtf8Coder.of(), keyedStateBackend);
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private KeyedStateBackend getKeyedStateBackend(int numberOfKeyGroups,
+                                                   KeyGroupRange keyGroupRange) {
+    MemoryStateBackend backend = new MemoryStateBackend();
+    try {
+      AbstractKeyedStateBackend<ByteBuffer> keyedStateBackend = backend.createKeyedStateBackend(
+          new DummyEnvironment("test", 1, 0),
+          new JobID(),
+          "test_op",
+          new GenericTypeInfo<>(ByteBuffer.class).createSerializer(new ExecutionConfig()),
+          numberOfKeyGroups,
+          keyGroupRange,
+          new KvStateRegistry().createTaskRegistry(new JobID(), new JobVertexID()));
+      keyedStateBackend.setCurrentKey(ByteBuffer.wrap(
+          CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "1")));
+      return keyedStateBackend;
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Test
+  public void testBag() throws Exception {
+    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+
+    assertEquals(value, underTest.state(NAMESPACE_1, STRING_BAG_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, STRING_BAG_ADDR)));
+
+    assertThat(value.read(), Matchers.emptyIterable());
+    value.add("hello");
+    assertThat(value.read(), Matchers.containsInAnyOrder("hello"));
+
+    value.add("world");
+    assertThat(value.read(), Matchers.containsInAnyOrder("hello", "world"));
+
+    value.clear();
+    assertThat(value.read(), Matchers.emptyIterable());
+    assertEquals(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), value);
+
+  }
+
+  @Test
+  public void testBagIsEmpty() throws Exception {
+    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+
+    assertThat(value.isEmpty().read(), Matchers.is(true));
+    ReadableState<Boolean> readFuture = value.isEmpty();
+    value.add("hello");
+    assertThat(readFuture.read(), Matchers.is(false));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.is(true));
+  }
+
+  @Test
+  public void testMergeBagIntoSource() throws Exception {
+    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
+
+    bag1.add("Hello");
+    bag2.add("World");
+    bag1.add("!");
+
+    StateMerging.mergeBags(Arrays.asList(bag1, bag2), bag1);
+
+    // Reading the merged bag gets both the contents
+    assertThat(bag1.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+    assertThat(bag2.read(), Matchers.emptyIterable());
+  }
+
+  @Test
+  public void testMergeBagIntoNewNamespace() throws Exception {
+    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
+    BagState<String> bag3 = underTest.state(NAMESPACE_3, STRING_BAG_ADDR);
+
+    bag1.add("Hello");
+    bag2.add("World");
+    bag1.add("!");
+
+    StateMerging.mergeBags(Arrays.asList(bag1, bag2, bag3), bag3);
+
+    // Reading the merged bag gets both the contents
+    assertThat(bag3.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+    assertThat(bag1.read(), Matchers.emptyIterable());
+    assertThat(bag2.read(), Matchers.emptyIterable());
+  }
+
+  @Test
+  public void testKeyGroupAndCheckpoint() throws Exception {
+    // assign to keyGroup 0
+    ByteBuffer key0 = ByteBuffer.wrap(
+        CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "11111111"));
+    // assign to keyGroup 1
+    ByteBuffer key1 = ByteBuffer.wrap(
+        CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "22222222"));
+    FlinkKeyGroupStateInternals<String> allState;
+    {
+      KeyedStateBackend keyedStateBackend = getKeyedStateBackend(2, new KeyGroupRange(0, 1));
+      allState = new FlinkKeyGroupStateInternals<>(
+          StringUtf8Coder.of(), keyedStateBackend);
+      BagState<String> valueForNamespace0 = allState.state(NAMESPACE_1, STRING_BAG_ADDR);
+      BagState<String> valueForNamespace1 = allState.state(NAMESPACE_2, STRING_BAG_ADDR);
+      keyedStateBackend.setCurrentKey(key0);
+      valueForNamespace0.add("0");
+      valueForNamespace1.add("2");
+      keyedStateBackend.setCurrentKey(key1);
+      valueForNamespace0.add("1");
+      valueForNamespace1.add("3");
+      assertThat(valueForNamespace0.read(), Matchers.containsInAnyOrder("0", "1"));
+      assertThat(valueForNamespace1.read(), Matchers.containsInAnyOrder("2", "3"));
+    }
+
+    ClassLoader classLoader = FlinkKeyGroupStateInternalsTest.class.getClassLoader();
+
+    // 1. scale up
+    ByteArrayOutputStream out0 = new ByteArrayOutputStream();
+    allState.snapshotKeyGroupState(0, new DataOutputStream(out0));
+    DataInputStream in0 = new DataInputStream(
+        new ByteArrayInputStream(out0.toByteArray()));
+    {
+      KeyedStateBackend keyedStateBackend = getKeyedStateBackend(2, new KeyGroupRange(0, 0));
+      FlinkKeyGroupStateInternals<String> state0 =
+          new FlinkKeyGroupStateInternals<>(
+              StringUtf8Coder.of(), keyedStateBackend);
+      state0.restoreKeyGroupState(0, in0, classLoader);
+      BagState<String> valueForNamespace0 = state0.state(NAMESPACE_1, STRING_BAG_ADDR);
+      BagState<String> valueForNamespace1 = state0.state(NAMESPACE_2, STRING_BAG_ADDR);
+      assertThat(valueForNamespace0.read(), Matchers.containsInAnyOrder("0"));
+      assertThat(valueForNamespace1.read(), Matchers.containsInAnyOrder("2"));
+    }
+
+    ByteArrayOutputStream out1 = new ByteArrayOutputStream();
+    allState.snapshotKeyGroupState(1, new DataOutputStream(out1));
+    DataInputStream in1 = new DataInputStream(
+        new ByteArrayInputStream(out1.toByteArray()));
+    {
+      KeyedStateBackend keyedStateBackend = getKeyedStateBackend(2, new KeyGroupRange(1, 1));
+      FlinkKeyGroupStateInternals<String> state1 =
+          new FlinkKeyGroupStateInternals<>(
+              StringUtf8Coder.of(), keyedStateBackend);
+      state1.restoreKeyGroupState(1, in1, classLoader);
+      BagState<String> valueForNamespace0 = state1.state(NAMESPACE_1, STRING_BAG_ADDR);
+      BagState<String> valueForNamespace1 = state1.state(NAMESPACE_2, STRING_BAG_ADDR);
+      assertThat(valueForNamespace0.read(), Matchers.containsInAnyOrder("1"));
+      assertThat(valueForNamespace1.read(), Matchers.containsInAnyOrder("3"));
+    }
+
+    // 2. scale down
+    {
+      KeyedStateBackend keyedStateBackend = getKeyedStateBackend(2, new KeyGroupRange(0, 1));
+      FlinkKeyGroupStateInternals<String> newAllState = new FlinkKeyGroupStateInternals<>(
+          StringUtf8Coder.of(), keyedStateBackend);
+      in0.reset();
+      in1.reset();
+      newAllState.restoreKeyGroupState(0, in0, classLoader);
+      newAllState.restoreKeyGroupState(1, in1, classLoader);
+      BagState<String> valueForNamespace0 = newAllState.state(NAMESPACE_1, STRING_BAG_ADDR);
+      BagState<String> valueForNamespace1 = newAllState.state(NAMESPACE_2, STRING_BAG_ADDR);
+      assertThat(valueForNamespace0.read(), Matchers.containsInAnyOrder("0", "1"));
+      assertThat(valueForNamespace1.read(), Matchers.containsInAnyOrder("2", "3"));
+    }
+
+  }
+
+  private static class TestKeyContext implements KeyContext {
+
+    private Object key;
+
+    @Override
+    public void setCurrentKey(Object key) {
+      this.key = key;
+    }
+
+    @Override
+    public Object getCurrentKey() {
+      return key;
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkSplitStateInternalsTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkSplitStateInternalsTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkSplitStateInternalsTest.java
new file mode 100644
index 0000000..08ae0c4
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkSplitStateInternalsTest.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.streaming;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+
+import org.apache.beam.runners.core.StateNamespace;
+import org.apache.beam.runners.core.StateNamespaceForTest;
+import org.apache.beam.runners.core.StateTag;
+import org.apache.beam.runners.core.StateTags;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkSplitStateInternals;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.util.state.BagState;
+import org.apache.beam.sdk.util.state.ReadableState;
+import org.apache.flink.runtime.operators.testutils.DummyEnvironment;
+import org.apache.flink.runtime.state.OperatorStateBackend;
+import org.apache.flink.runtime.state.memory.MemoryStateBackend;
+import org.hamcrest.Matchers;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link FlinkSplitStateInternals}. This is based on the tests for
+ * {@code InMemoryStateInternals}.
+ */
+@RunWith(JUnit4.class)
+public class FlinkSplitStateInternalsTest {
+  private static final StateNamespace NAMESPACE_1 = new StateNamespaceForTest("ns1");
+  private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
+
+  private static final StateTag<Object, BagState<String>> STRING_BAG_ADDR =
+      StateTags.bag("stringBag", StringUtf8Coder.of());
+
+  FlinkSplitStateInternals<String> underTest;
+
+  @Before
+  public void initStateInternals() {
+    MemoryStateBackend backend = new MemoryStateBackend();
+    try {
+      OperatorStateBackend operatorStateBackend =
+          backend.createOperatorStateBackend(new DummyEnvironment("test", 1, 0), "");
+      underTest = new FlinkSplitStateInternals<>(operatorStateBackend);
+
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Test
+  public void testBag() throws Exception {
+    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+
+    assertEquals(value, underTest.state(NAMESPACE_1, STRING_BAG_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, STRING_BAG_ADDR)));
+
+    assertThat(value.read(), Matchers.emptyIterable());
+    value.add("hello");
+    assertThat(value.read(), Matchers.containsInAnyOrder("hello"));
+
+    value.add("world");
+    assertThat(value.read(), Matchers.containsInAnyOrder("hello", "world"));
+
+    value.clear();
+    assertThat(value.read(), Matchers.emptyIterable());
+    assertEquals(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), value);
+
+  }
+
+  @Test
+  public void testBagIsEmpty() throws Exception {
+    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+
+    assertThat(value.isEmpty().read(), Matchers.is(true));
+    ReadableState<Boolean> readFuture = value.isEmpty();
+    value.add("hello");
+    assertThat(readFuture.read(), Matchers.is(false));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.is(true));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkStateInternalsTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkStateInternalsTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkStateInternalsTest.java
new file mode 100644
index 0000000..d140271
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/FlinkStateInternalsTest.java
@@ -0,0 +1,395 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.streaming;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
+import static org.junit.Assert.assertThat;
+
+import java.nio.ByteBuffer;
+import java.util.Arrays;
+import org.apache.beam.runners.core.StateMerging;
+import org.apache.beam.runners.core.StateNamespace;
+import org.apache.beam.runners.core.StateNamespaceForTest;
+import org.apache.beam.runners.core.StateTag;
+import org.apache.beam.runners.core.StateTags;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.VarIntCoder;
+import org.apache.beam.sdk.transforms.Sum;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFns;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.util.state.BagState;
+import org.apache.beam.sdk.util.state.CombiningState;
+import org.apache.beam.sdk.util.state.GroupingState;
+import org.apache.beam.sdk.util.state.ReadableState;
+import org.apache.beam.sdk.util.state.ValueState;
+import org.apache.beam.sdk.util.state.WatermarkHoldState;
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.JobID;
+import org.apache.flink.api.java.typeutils.GenericTypeInfo;
+import org.apache.flink.runtime.jobgraph.JobVertexID;
+import org.apache.flink.runtime.operators.testutils.DummyEnvironment;
+import org.apache.flink.runtime.query.KvStateRegistry;
+import org.apache.flink.runtime.state.AbstractKeyedStateBackend;
+import org.apache.flink.runtime.state.KeyGroupRange;
+import org.apache.flink.runtime.state.memory.MemoryStateBackend;
+import org.hamcrest.Matchers;
+import org.joda.time.Instant;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link FlinkStateInternals}. This is based on the tests for
+ * {@code InMemoryStateInternals}.
+ */
+@RunWith(JUnit4.class)
+public class FlinkStateInternalsTest {
+  private static final BoundedWindow WINDOW_1 = new IntervalWindow(new Instant(0), new Instant(10));
+  private static final StateNamespace NAMESPACE_1 = new StateNamespaceForTest("ns1");
+  private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
+  private static final StateNamespace NAMESPACE_3 = new StateNamespaceForTest("ns3");
+
+  private static final StateTag<Object, ValueState<String>> STRING_VALUE_ADDR =
+      StateTags.value("stringValue", StringUtf8Coder.of());
+  private static final StateTag<Object, CombiningState<Integer, int[], Integer>>
+      SUM_INTEGER_ADDR = StateTags.combiningValueFromInputInternal(
+          "sumInteger", VarIntCoder.of(), Sum.ofIntegers());
+  private static final StateTag<Object, BagState<String>> STRING_BAG_ADDR =
+      StateTags.bag("stringBag", StringUtf8Coder.of());
+  private static final StateTag<Object, WatermarkHoldState<BoundedWindow>>
+      WATERMARK_EARLIEST_ADDR =
+      StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
+  private static final StateTag<Object, WatermarkHoldState<BoundedWindow>>
+      WATERMARK_LATEST_ADDR =
+      StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtLatestInputTimestamp());
+  private static final StateTag<Object, WatermarkHoldState<BoundedWindow>> WATERMARK_EOW_ADDR =
+      StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtEndOfWindow());
+
+  FlinkStateInternals<String> underTest;
+
+  @Before
+  public void initStateInternals() {
+    MemoryStateBackend backend = new MemoryStateBackend();
+    try {
+      AbstractKeyedStateBackend<ByteBuffer> keyedStateBackend = backend.createKeyedStateBackend(
+          new DummyEnvironment("test", 1, 0),
+          new JobID(),
+          "test_op",
+          new GenericTypeInfo<>(ByteBuffer.class).createSerializer(new ExecutionConfig()),
+          1,
+          new KeyGroupRange(0, 0),
+          new KvStateRegistry().createTaskRegistry(new JobID(), new JobVertexID()));
+      underTest = new FlinkStateInternals<>(keyedStateBackend, StringUtf8Coder.of());
+
+      keyedStateBackend.setCurrentKey(
+          ByteBuffer.wrap(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "Hello")));
+    } catch (Exception e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Test
+  public void testValue() throws Exception {
+    ValueState<String> value = underTest.state(NAMESPACE_1, STRING_VALUE_ADDR);
+
+    assertEquals(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), value);
+    assertNotEquals(
+        underTest.state(NAMESPACE_2, STRING_VALUE_ADDR),
+        value);
+
+    assertThat(value.read(), Matchers.nullValue());
+    value.write("hello");
+    assertThat(value.read(), Matchers.equalTo("hello"));
+    value.write("world");
+    assertThat(value.read(), Matchers.equalTo("world"));
+
+    value.clear();
+    assertThat(value.read(), Matchers.nullValue());
+    assertEquals(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), value);
+
+  }
+
+  @Test
+  public void testBag() throws Exception {
+    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+
+    assertEquals(value, underTest.state(NAMESPACE_1, STRING_BAG_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, STRING_BAG_ADDR)));
+
+    assertThat(value.read(), Matchers.emptyIterable());
+    value.add("hello");
+    assertThat(value.read(), Matchers.containsInAnyOrder("hello"));
+
+    value.add("world");
+    assertThat(value.read(), Matchers.containsInAnyOrder("hello", "world"));
+
+    value.clear();
+    assertThat(value.read(), Matchers.emptyIterable());
+    assertEquals(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), value);
+
+  }
+
+  @Test
+  public void testBagIsEmpty() throws Exception {
+    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+
+    assertThat(value.isEmpty().read(), Matchers.is(true));
+    ReadableState<Boolean> readFuture = value.isEmpty();
+    value.add("hello");
+    assertThat(readFuture.read(), Matchers.is(false));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.is(true));
+  }
+
+  @Test
+  public void testMergeBagIntoSource() throws Exception {
+    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
+
+    bag1.add("Hello");
+    bag2.add("World");
+    bag1.add("!");
+
+    StateMerging.mergeBags(Arrays.asList(bag1, bag2), bag1);
+
+    // Reading the merged bag gets both the contents
+    assertThat(bag1.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+    assertThat(bag2.read(), Matchers.emptyIterable());
+  }
+
+  @Test
+  public void testMergeBagIntoNewNamespace() throws Exception {
+    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
+    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
+    BagState<String> bag3 = underTest.state(NAMESPACE_3, STRING_BAG_ADDR);
+
+    bag1.add("Hello");
+    bag2.add("World");
+    bag1.add("!");
+
+    StateMerging.mergeBags(Arrays.asList(bag1, bag2, bag3), bag3);
+
+    // Reading the merged bag gets both the contents
+    assertThat(bag3.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
+    assertThat(bag1.read(), Matchers.emptyIterable());
+    assertThat(bag2.read(), Matchers.emptyIterable());
+  }
+
+  @Test
+  public void testCombiningValue() throws Exception {
+    GroupingState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+
+    // State instances are cached, but depend on the namespace.
+    assertEquals(value, underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR)));
+
+    assertThat(value.read(), Matchers.equalTo(0));
+    value.add(2);
+    assertThat(value.read(), Matchers.equalTo(2));
+
+    value.add(3);
+    assertThat(value.read(), Matchers.equalTo(5));
+
+    value.clear();
+    assertThat(value.read(), Matchers.equalTo(0));
+    assertEquals(underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR), value);
+  }
+
+  @Test
+  public void testCombiningIsEmpty() throws Exception {
+    GroupingState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+
+    assertThat(value.isEmpty().read(), Matchers.is(true));
+    ReadableState<Boolean> readFuture = value.isEmpty();
+    value.add(5);
+    assertThat(readFuture.read(), Matchers.is(false));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.is(true));
+  }
+
+  @Test
+  public void testMergeCombiningValueIntoSource() throws Exception {
+    CombiningState<Integer, int[], Integer> value1 =
+        underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+    CombiningState<Integer, int[], Integer> value2 =
+        underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
+
+    value1.add(5);
+    value2.add(10);
+    value1.add(6);
+
+    assertThat(value1.read(), Matchers.equalTo(11));
+    assertThat(value2.read(), Matchers.equalTo(10));
+
+    // Merging clears the old values and updates the result value.
+    StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value1);
+
+    assertThat(value1.read(), Matchers.equalTo(21));
+    assertThat(value2.read(), Matchers.equalTo(0));
+  }
+
+  @Test
+  public void testMergeCombiningValueIntoNewNamespace() throws Exception {
+    CombiningState<Integer, int[], Integer> value1 =
+        underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
+    CombiningState<Integer, int[], Integer> value2 =
+        underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
+    CombiningState<Integer, int[], Integer> value3 =
+        underTest.state(NAMESPACE_3, SUM_INTEGER_ADDR);
+
+    value1.add(5);
+    value2.add(10);
+    value1.add(6);
+
+    StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value3);
+
+    // Merging clears the old values and updates the result value.
+    assertThat(value1.read(), Matchers.equalTo(0));
+    assertThat(value2.read(), Matchers.equalTo(0));
+    assertThat(value3.read(), Matchers.equalTo(21));
+  }
+
+  @Test
+  public void testWatermarkEarliestState() throws Exception {
+    WatermarkHoldState<BoundedWindow> value =
+        underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
+
+    // State instances are cached, but depend on the namespace.
+    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR)));
+
+    assertThat(value.read(), Matchers.nullValue());
+    value.add(new Instant(2000));
+    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
+
+    value.add(new Instant(3000));
+    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
+
+    value.add(new Instant(1000));
+    assertThat(value.read(), Matchers.equalTo(new Instant(1000)));
+
+    value.clear();
+    assertThat(value.read(), Matchers.equalTo(null));
+    assertEquals(underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR), value);
+  }
+
+  @Test
+  public void testWatermarkLatestState() throws Exception {
+    WatermarkHoldState<BoundedWindow> value =
+        underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
+
+    // State instances are cached, but depend on the namespace.
+    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR)));
+
+    assertThat(value.read(), Matchers.nullValue());
+    value.add(new Instant(2000));
+    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
+
+    value.add(new Instant(3000));
+    assertThat(value.read(), Matchers.equalTo(new Instant(3000)));
+
+    value.add(new Instant(1000));
+    assertThat(value.read(), Matchers.equalTo(new Instant(3000)));
+
+    value.clear();
+    assertThat(value.read(), Matchers.equalTo(null));
+    assertEquals(underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR), value);
+  }
+
+  @Test
+  public void testWatermarkEndOfWindowState() throws Exception {
+    WatermarkHoldState<BoundedWindow> value = underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR);
+
+    // State instances are cached, but depend on the namespace.
+    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR));
+    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_EOW_ADDR)));
+
+    assertThat(value.read(), Matchers.nullValue());
+    value.add(new Instant(2000));
+    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
+
+    value.clear();
+    assertThat(value.read(), Matchers.equalTo(null));
+    assertEquals(underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR), value);
+  }
+
+  @Test
+  public void testWatermarkStateIsEmpty() throws Exception {
+    WatermarkHoldState<BoundedWindow> value =
+        underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
+
+    assertThat(value.isEmpty().read(), Matchers.is(true));
+    ReadableState<Boolean> readFuture = value.isEmpty();
+    value.add(new Instant(1000));
+    assertThat(readFuture.read(), Matchers.is(false));
+
+    value.clear();
+    assertThat(readFuture.read(), Matchers.is(true));
+  }
+
+  @Test
+  public void testMergeEarliestWatermarkIntoSource() throws Exception {
+    WatermarkHoldState<BoundedWindow> value1 =
+        underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
+    WatermarkHoldState<BoundedWindow> value2 =
+        underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR);
+
+    value1.add(new Instant(3000));
+    value2.add(new Instant(5000));
+    value1.add(new Instant(4000));
+    value2.add(new Instant(2000));
+
+    // Merging clears the old values and updates the merged value.
+    StateMerging.mergeWatermarks(Arrays.asList(value1, value2), value1, WINDOW_1);
+
+    assertThat(value1.read(), Matchers.equalTo(new Instant(2000)));
+    assertThat(value2.read(), Matchers.equalTo(null));
+  }
+
+  @Test
+  public void testMergeLatestWatermarkIntoSource() throws Exception {
+    WatermarkHoldState<BoundedWindow> value1 =
+        underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
+    WatermarkHoldState<BoundedWindow> value2 =
+        underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR);
+    WatermarkHoldState<BoundedWindow> value3 =
+        underTest.state(NAMESPACE_3, WATERMARK_LATEST_ADDR);
+
+    value1.add(new Instant(3000));
+    value2.add(new Instant(5000));
+    value1.add(new Instant(4000));
+    value2.add(new Instant(2000));
+
+    // Merging clears the old values and updates the result value.
+    StateMerging.mergeWatermarks(Arrays.asList(value1, value2), value3, WINDOW_1);
+
+    // Merging clears the old values and updates the result value.
+    assertThat(value3.read(), Matchers.equalTo(new Instant(5000)));
+    assertThat(value1.read(), Matchers.equalTo(null));
+    assertThat(value2.read(), Matchers.equalTo(null));
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java
new file mode 100644
index 0000000..663b910
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.streaming;
+
+import com.google.common.base.Joiner;
+import java.io.Serializable;
+import java.util.Arrays;
+import org.apache.beam.runners.flink.FlinkTestPipeline;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.windowing.AfterWatermark;
+import org.apache.beam.sdk.transforms.windowing.FixedWindows;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.flink.streaming.util.StreamingProgramTestBase;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+/**
+ * Test for GroupByNullKey.
+ */
+public class GroupByNullKeyTest extends StreamingProgramTestBase implements Serializable {
+
+
+  protected String resultPath;
+
+  static final String[] EXPECTED_RESULT = new String[] {
+      "k: null v: user1 user1 user1 user2 user2 user2 user2 user3"
+  };
+
+  public GroupByNullKeyTest(){
+  }
+
+  @Override
+  protected void preSubmit() throws Exception {
+    resultPath = getTempDirPath("result");
+  }
+
+  @Override
+  protected void postSubmit() throws Exception {
+    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
+  }
+
+  /**
+   * DoFn extracting user and timestamp.
+   */
+  private static class ExtractUserAndTimestamp extends DoFn<KV<Integer, String>, String> {
+    @ProcessElement
+    public void processElement(ProcessContext c) {
+      KV<Integer, String> record = c.element();
+      int timestamp = record.getKey();
+      String userName = record.getValue();
+      if (userName != null) {
+        // Sets the implicit timestamp field to be used in windowing.
+        c.outputWithTimestamp(userName, new Instant(timestamp));
+      }
+    }
+  }
+
+  @Override
+  protected void testProgram() throws Exception {
+
+    Pipeline p = FlinkTestPipeline.createForStreaming();
+
+    PCollection<String> output =
+      p.apply(Create.of(Arrays.asList(
+          KV.<Integer, String>of(0, "user1"),
+          KV.<Integer, String>of(1, "user1"),
+          KV.<Integer, String>of(2, "user1"),
+          KV.<Integer, String>of(10, "user2"),
+          KV.<Integer, String>of(1, "user2"),
+          KV.<Integer, String>of(15000, "user2"),
+          KV.<Integer, String>of(12000, "user2"),
+          KV.<Integer, String>of(25000, "user3"))))
+          .apply(ParDo.of(new ExtractUserAndTimestamp()))
+          .apply(Window.<String>into(FixedWindows.of(Duration.standardHours(1)))
+              .triggering(AfterWatermark.pastEndOfWindow())
+              .withAllowedLateness(Duration.ZERO)
+              .discardingFiredPanes())
+
+          .apply(ParDo.of(new DoFn<String, KV<Void, String>>() {
+            @ProcessElement
+            public void processElement(ProcessContext c) throws Exception {
+              String elem = c.element();
+              c.output(KV.<Void, String>of(null, elem));
+            }
+          }))
+          .apply(GroupByKey.<Void, String>create())
+          .apply(ParDo.of(new DoFn<KV<Void, Iterable<String>>, String>() {
+            @ProcessElement
+            public void processElement(ProcessContext c) throws Exception {
+              KV<Void, Iterable<String>> elem = c.element();
+              StringBuilder str = new StringBuilder();
+              str.append("k: " + elem.getKey() + " v:");
+              for (String v : elem.getValue()) {
+                str.append(" " + v);
+              }
+              c.output(str.toString());
+            }
+          }));
+    output.apply(TextIO.Write.to(resultPath));
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TestCountingSource.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TestCountingSource.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TestCountingSource.java
new file mode 100644
index 0000000..3a08088
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TestCountingSource.java
@@ -0,0 +1,254 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.streaming;
+
+import static org.apache.beam.sdk.util.CoderUtils.encodeToByteArray;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ThreadLocalRandom;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.DelegateCoder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.VarIntCoder;
+import org.apache.beam.sdk.io.UnboundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.values.KV;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An unbounded source for testing the unbounded sources framework code.
+ *
+ * <p>Each split of this sources produces records of the form KV(split_id, i),
+ * where i counts up from 0.  Each record has a timestamp of i, and the watermark
+ * accurately tracks these timestamps.  The reader will occasionally return false
+ * from {@code advance}, in order to simulate a source where not all the data is
+ * available immediately.
+ */
+public class TestCountingSource
+    extends UnboundedSource<KV<Integer, Integer>, TestCountingSource.CounterMark> {
+  private static final Logger LOG = LoggerFactory.getLogger(TestCountingSource.class);
+
+  private static List<Integer> finalizeTracker;
+  private final int numMessagesPerShard;
+  private final int shardNumber;
+  private final boolean dedup;
+  private final boolean throwOnFirstSnapshot;
+  private final boolean allowSplitting;
+
+  /**
+   * We only allow an exception to be thrown from getCheckpointMark
+   * at most once. This must be static since the entire TestCountingSource
+   * instance may re-serialized when the pipeline recovers and retries.
+   */
+  private static boolean thrown = false;
+
+  public static void setFinalizeTracker(List<Integer> finalizeTracker) {
+    TestCountingSource.finalizeTracker = finalizeTracker;
+  }
+
+  public TestCountingSource(int numMessagesPerShard) {
+    this(numMessagesPerShard, 0, false, false, true);
+  }
+
+  public TestCountingSource withDedup() {
+    return new TestCountingSource(
+        numMessagesPerShard, shardNumber, true, throwOnFirstSnapshot, true);
+  }
+
+  private TestCountingSource withShardNumber(int shardNumber) {
+    return new TestCountingSource(
+        numMessagesPerShard, shardNumber, dedup, throwOnFirstSnapshot, true);
+  }
+
+  public TestCountingSource withThrowOnFirstSnapshot(boolean throwOnFirstSnapshot) {
+    return new TestCountingSource(
+        numMessagesPerShard, shardNumber, dedup, throwOnFirstSnapshot, true);
+  }
+
+  public TestCountingSource withoutSplitting() {
+    return new TestCountingSource(
+        numMessagesPerShard, shardNumber, dedup, throwOnFirstSnapshot, false);
+  }
+
+  private TestCountingSource(int numMessagesPerShard, int shardNumber, boolean dedup,
+                             boolean throwOnFirstSnapshot, boolean allowSplitting) {
+    this.numMessagesPerShard = numMessagesPerShard;
+    this.shardNumber = shardNumber;
+    this.dedup = dedup;
+    this.throwOnFirstSnapshot = throwOnFirstSnapshot;
+    this.allowSplitting = allowSplitting;
+  }
+
+  public int getShardNumber() {
+    return shardNumber;
+  }
+
+  @Override
+  public List<TestCountingSource> split(
+      int desiredNumSplits, PipelineOptions options) {
+    List<TestCountingSource> splits = new ArrayList<>();
+    int numSplits = allowSplitting ? desiredNumSplits : 1;
+    for (int i = 0; i < numSplits; i++) {
+      splits.add(withShardNumber(i));
+    }
+    return splits;
+  }
+
+  class CounterMark implements UnboundedSource.CheckpointMark {
+    int current;
+
+    public CounterMark(int current) {
+      this.current = current;
+    }
+
+    @Override
+    public void finalizeCheckpoint() {
+      if (finalizeTracker != null) {
+        finalizeTracker.add(current);
+      }
+    }
+  }
+
+  @Override
+  public Coder<CounterMark> getCheckpointMarkCoder() {
+    return DelegateCoder.of(
+        VarIntCoder.of(),
+        new DelegateCoder.CodingFunction<CounterMark, Integer>() {
+          @Override
+          public Integer apply(CounterMark input) {
+            return input.current;
+          }
+        },
+        new DelegateCoder.CodingFunction<Integer, CounterMark>() {
+          @Override
+          public CounterMark apply(Integer input) {
+            return new CounterMark(input);
+          }
+        });
+  }
+
+  @Override
+  public boolean requiresDeduping() {
+    return dedup;
+  }
+
+  /**
+   * Public only so that the checkpoint can be conveyed from {@link #getCheckpointMark()} to
+   * {@link TestCountingSource#createReader(PipelineOptions, CounterMark)} without cast.
+   */
+  public class CountingSourceReader extends UnboundedReader<KV<Integer, Integer>> {
+    private int current;
+
+    public CountingSourceReader(int startingPoint) {
+      this.current = startingPoint;
+    }
+
+    @Override
+    public boolean start() {
+      return advance();
+    }
+
+    @Override
+    public boolean advance() {
+      if (current >= numMessagesPerShard - 1) {
+        return false;
+      }
+      // If testing dedup, occasionally insert a duplicate value;
+      if (current >= 0 && dedup && ThreadLocalRandom.current().nextInt(5) == 0) {
+        return true;
+      }
+      current++;
+      return true;
+    }
+
+    @Override
+    public KV<Integer, Integer> getCurrent() {
+      return KV.of(shardNumber, current);
+    }
+
+    @Override
+    public Instant getCurrentTimestamp() {
+      return new Instant(current);
+    }
+
+    @Override
+    public byte[] getCurrentRecordId() {
+      try {
+        return encodeToByteArray(KvCoder.of(VarIntCoder.of(), VarIntCoder.of()), getCurrent());
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }
+
+    @Override
+    public void close() {}
+
+    @Override
+    public TestCountingSource getCurrentSource() {
+      return TestCountingSource.this;
+    }
+
+    @Override
+    public Instant getWatermark() {
+      // The watermark is a promise about future elements, and the timestamps of elements are
+      // strictly increasing for this source.
+      return new Instant(current + 1);
+    }
+
+    @Override
+    public CounterMark getCheckpointMark() {
+      if (throwOnFirstSnapshot && !thrown) {
+        thrown = true;
+        LOG.error("Throwing exception while checkpointing counter");
+        throw new RuntimeException("failed during checkpoint");
+      }
+      // The checkpoint can assume all records read, including the current, have
+      // been commited.
+      return new CounterMark(current);
+    }
+
+    @Override
+    public long getSplitBacklogBytes() {
+      return 7L;
+    }
+  }
+
+  @Override
+  public CountingSourceReader createReader(
+      PipelineOptions options, @Nullable CounterMark checkpointMark) {
+    if (checkpointMark == null) {
+      LOG.debug("creating reader");
+    } else {
+      LOG.debug("restoring reader from checkpoint with current = {}", checkpointMark.current);
+    }
+    return new CountingSourceReader(checkpointMark != null ? checkpointMark.current : -1);
+  }
+
+  @Override
+  public void validate() {}
+
+  @Override
+  public Coder<KV<Integer, Integer>> getDefaultOutputCoder() {
+    return KvCoder.of(VarIntCoder.of(), VarIntCoder.of());
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java
new file mode 100644
index 0000000..9e6bba8
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/TopWikipediaSessionsITCase.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.streaming;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.common.base.Joiner;
+import java.io.Serializable;
+import java.util.Arrays;
+import org.apache.beam.runners.flink.FlinkTestPipeline;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.transforms.Count;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.windowing.Sessions;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.flink.streaming.util.StreamingProgramTestBase;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+
+/**
+ * Session window test.
+ */
+public class TopWikipediaSessionsITCase extends StreamingProgramTestBase implements Serializable {
+  protected String resultPath;
+
+  public TopWikipediaSessionsITCase(){
+  }
+
+  static final String[] EXPECTED_RESULT = new String[] {
+      "user: user1 value:3",
+      "user: user1 value:1",
+      "user: user2 value:4",
+      "user: user2 value:6",
+      "user: user3 value:7",
+      "user: user3 value:2"
+  };
+
+  @Override
+  protected void preSubmit() throws Exception {
+    resultPath = getTempDirPath("result");
+  }
+
+  @Override
+  protected void postSubmit() throws Exception {
+    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
+  }
+
+  @Override
+  protected void testProgram() throws Exception {
+
+    Pipeline p = FlinkTestPipeline.createForStreaming();
+
+    Long now = (System.currentTimeMillis() + 10000) / 1000;
+
+    PCollection<KV<String, Long>> output =
+      p.apply(Create.of(Arrays.asList(new TableRow().set("timestamp", now).set
+          ("contributor_username", "user1"), new TableRow().set("timestamp", now + 10).set
+          ("contributor_username", "user3"), new TableRow().set("timestamp", now).set
+          ("contributor_username", "user2"), new TableRow().set("timestamp", now).set
+          ("contributor_username", "user1"), new TableRow().set("timestamp", now + 2).set
+          ("contributor_username", "user1"), new TableRow().set("timestamp", now).set
+          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 1).set
+          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 5).set
+          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 7).set
+          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 8).set
+          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 200).set
+          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 230).set
+          ("contributor_username", "user1"), new TableRow().set("timestamp", now + 230).set
+          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 240).set
+          ("contributor_username", "user2"), new TableRow().set("timestamp", now + 245).set
+          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 235).set
+          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 236).set
+          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 237).set
+          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 238).set
+          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 239).set
+          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 240).set
+          ("contributor_username", "user3"), new TableRow().set("timestamp", now + 241).set
+          ("contributor_username", "user2"), new TableRow().set("timestamp", now)
+          .set("contributor_username", "user3"))))
+
+
+
+      .apply(ParDo.of(new DoFn<TableRow, String>() {
+        @ProcessElement
+        public void processElement(ProcessContext c) throws Exception {
+          TableRow row = c.element();
+          long timestamp = (Integer) row.get("timestamp");
+          String userName = (String) row.get("contributor_username");
+          if (userName != null) {
+            // Sets the timestamp field to be used in windowing.
+            c.outputWithTimestamp(userName, new Instant(timestamp * 1000L));
+          }
+        }
+      }))
+
+      .apply(Window.<String>into(Sessions.withGapDuration(Duration.standardMinutes(1))))
+
+      .apply(Count.<String>perElement());
+
+    PCollection<String> format = output.apply(ParDo.of(new DoFn<KV<String, Long>, String>() {
+      @ProcessElement
+      public void processElement(ProcessContext c) throws Exception {
+        KV<String, Long> el = c.element();
+        String out = "user: " + el.getKey() + " value:" + el.getValue();
+        c.output(out);
+      }
+    }));
+
+    format.apply(TextIO.Write.to(resultPath));
+
+    p.run();
+  }
+}

[37/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkRunner.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkRunner.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkRunner.java
deleted file mode 100644
index 8f50105..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TestFlinkRunner.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.Pipeline.PipelineExecutionException;
-import org.apache.beam.sdk.PipelineResult;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.options.PipelineOptionsValidator;
-import org.apache.beam.sdk.runners.PipelineRunner;
-import org.apache.beam.sdk.util.UserCodeException;
-
-/**
- * Test Flink runner.
- */
-public class TestFlinkRunner extends PipelineRunner<PipelineResult> {
-
-  private FlinkRunner delegate;
-
-  private TestFlinkRunner(FlinkPipelineOptions options) {
-    // We use [auto] for testing since this will make it pick up the Testing ExecutionEnvironment
-    options.setFlinkMaster("[auto]");
-    this.delegate = FlinkRunner.fromOptions(options);
-  }
-
-  public static TestFlinkRunner fromOptions(PipelineOptions options) {
-    FlinkPipelineOptions flinkOptions =
-        PipelineOptionsValidator.validate(FlinkPipelineOptions.class, options);
-    return new TestFlinkRunner(flinkOptions);
-  }
-
-  public static TestFlinkRunner create(boolean streaming) {
-    FlinkPipelineOptions flinkOptions = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
-    flinkOptions.setRunner(TestFlinkRunner.class);
-    flinkOptions.setStreaming(streaming);
-    return TestFlinkRunner.fromOptions(flinkOptions);
-  }
-
-  @Override
-  public PipelineResult run(Pipeline pipeline) {
-    try {
-      return delegate.run(pipeline);
-    } catch (Throwable t) {
-      // Special case hack to pull out assertion errors from PAssert; instead there should
-      // probably be a better story along the lines of UserCodeException.
-      UserCodeException innermostUserCodeException = null;
-      Throwable current = t;
-      for (; current.getCause() != null; current = current.getCause()) {
-        if (current instanceof UserCodeException) {
-          innermostUserCodeException = ((UserCodeException) current);
-        }
-      }
-      if (innermostUserCodeException != null) {
-        current = innermostUserCodeException.getCause();
-      }
-      if (current instanceof AssertionError) {
-        throw (AssertionError) current;
-      }
-      throw new PipelineExecutionException(current);
-    }
-  }
-
-  public PipelineOptions getPipelineOptions() {
-    return delegate.getPipelineOptions();
-  }
-}
-
-

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TranslationMode.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TranslationMode.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TranslationMode.java
deleted file mode 100644
index ad54750..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/TranslationMode.java
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-/**
- * The translation mode of the Beam Pipeline.
- */
-enum TranslationMode {
-
-  /** Uses the batch mode of Flink. */
-  BATCH,
-
-  /** Uses the streaming mode of Flink. */
-  STREAMING
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/package-info.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/package-info.java
deleted file mode 100644
index 57f1e59..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Internal implementation of the Beam runner for Apache Flink.
- */
-package org.apache.beam.runners.flink;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAggregatorFactory.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAggregatorFactory.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAggregatorFactory.java
deleted file mode 100644
index fb2493b..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAggregatorFactory.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import org.apache.beam.runners.core.AggregatorFactory;
-import org.apache.beam.runners.core.ExecutionContext;
-import org.apache.beam.runners.flink.translation.wrappers.SerializableFnAggregatorWrapper;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.flink.api.common.functions.RuntimeContext;
-
-/**
- * A {@link AggregatorFactory} for the Flink Batch Runner.
- */
-public class FlinkAggregatorFactory implements AggregatorFactory{
-
-  private final RuntimeContext runtimeContext;
-
-  public FlinkAggregatorFactory(RuntimeContext runtimeContext) {
-    this.runtimeContext = runtimeContext;
-  }
-
-  @Override
-  public <InputT, AccumT, OutputT> Aggregator<InputT, OutputT> createAggregatorForDoFn(
-      Class<?> fnClass, ExecutionContext.StepContext stepContext, String aggregatorName,
-      Combine.CombineFn<InputT, AccumT, OutputT> combine) {
-    @SuppressWarnings("unchecked")
-    SerializableFnAggregatorWrapper<InputT, OutputT> result =
-        (SerializableFnAggregatorWrapper<InputT, OutputT>)
-            runtimeContext.getAccumulator(aggregatorName);
-
-    if (result == null) {
-      result = new SerializableFnAggregatorWrapper<>(combine);
-      runtimeContext.addAccumulator(aggregatorName, result);
-    }
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java
deleted file mode 100644
index 447b1e5..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.common.collect.Iterables;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.WindowFn;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.joda.time.Instant;
-
-/**
- * {@link org.apache.beam.sdk.transforms.windowing.WindowFn.AssignContext} for
- * Flink functions.
- */
-class FlinkAssignContext<InputT, W extends BoundedWindow>
-    extends WindowFn<InputT, W>.AssignContext {
-  private final WindowedValue<InputT> value;
-
-  FlinkAssignContext(WindowFn<InputT, W> fn, WindowedValue<InputT> value) {
-    fn.super();
-    checkArgument(
-        Iterables.size(value.getWindows()) == 1,
-        String.format(
-            "%s passed to window assignment must be in a single window, but it was in %s: %s",
-            WindowedValue.class.getSimpleName(),
-            Iterables.size(value.getWindows()),
-            value.getWindows()));
-    this.value = value;
-  }
-
-  @Override
-  public InputT element() {
-    return value.getValue();
-  }
-
-  @Override
-  public Instant timestamp() {
-    return value.getTimestamp();
-  }
-
-  @Override
-  public BoundedWindow window() {
-    return Iterables.getOnlyElement(value.getWindows());
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java
deleted file mode 100644
index c3a5095..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import java.util.Collection;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.WindowFn;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.flink.api.common.functions.FlatMapFunction;
-import org.apache.flink.util.Collector;
-
-/**
- * Flink {@link FlatMapFunction} for implementing
- * {@link org.apache.beam.sdk.transforms.windowing.Window.Assign}.
- */
-public class FlinkAssignWindows<T, W extends BoundedWindow>
-    implements FlatMapFunction<WindowedValue<T>, WindowedValue<T>> {
-
-  private final WindowFn<T, W> windowFn;
-
-  public FlinkAssignWindows(WindowFn<T, W> windowFn) {
-    this.windowFn = windowFn;
-  }
-
-  @Override
-  public void flatMap(
-      WindowedValue<T> input, Collector<WindowedValue<T>> collector) throws Exception {
-    Collection<W> windows = windowFn.assignWindows(new FlinkAssignContext<>(windowFn, input));
-    for (W window: windows) {
-      collector.collect(
-          WindowedValue.of(input.getValue(), input.getTimestamp(), window, input.getPane()));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
deleted file mode 100644
index 51582af..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import java.util.Collections;
-import java.util.Map;
-import org.apache.beam.runners.core.DoFnRunner;
-import org.apache.beam.runners.core.DoFnRunners;
-import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
-import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.api.common.functions.RichMapPartitionFunction;
-import org.apache.flink.api.common.functions.RuntimeContext;
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.util.Collector;
-
-/**
- * Encapsulates a {@link DoFn}
- * inside a Flink {@link org.apache.flink.api.common.functions.RichMapPartitionFunction}.
- *
- * <p>We get a mapping from {@link org.apache.beam.sdk.values.TupleTag} to output index
- * and must tag all outputs with the output number. Afterwards a filter will filter out
- * those elements that are not to be in a specific output.
- */
-public class FlinkDoFnFunction<InputT, OutputT>
-    extends RichMapPartitionFunction<WindowedValue<InputT>, WindowedValue<OutputT>> {
-
-  private final SerializedPipelineOptions serializedOptions;
-
-  private final DoFn<InputT, OutputT> doFn;
-  private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
-
-  private final WindowingStrategy<?, ?> windowingStrategy;
-
-  private final Map<TupleTag<?>, Integer> outputMap;
-  private final TupleTag<OutputT> mainOutputTag;
-
-  private transient DoFnInvoker<InputT, OutputT> doFnInvoker;
-
-  public FlinkDoFnFunction(
-      DoFn<InputT, OutputT> doFn,
-      WindowingStrategy<?, ?> windowingStrategy,
-      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
-      PipelineOptions options,
-      Map<TupleTag<?>, Integer> outputMap,
-      TupleTag<OutputT> mainOutputTag) {
-
-    this.doFn = doFn;
-    this.sideInputs = sideInputs;
-    this.serializedOptions = new SerializedPipelineOptions(options);
-    this.windowingStrategy = windowingStrategy;
-    this.outputMap = outputMap;
-    this.mainOutputTag = mainOutputTag;
-
-  }
-
-  @Override
-  public void mapPartition(
-      Iterable<WindowedValue<InputT>> values,
-      Collector<WindowedValue<OutputT>> out) throws Exception {
-
-    RuntimeContext runtimeContext = getRuntimeContext();
-
-    DoFnRunners.OutputManager outputManager;
-    if (outputMap == null) {
-      outputManager = new FlinkDoFnFunction.DoFnOutputManager(out);
-    } else {
-      // it has some additional outputs
-      outputManager =
-          new FlinkDoFnFunction.MultiDoFnOutputManager((Collector) out, outputMap);
-    }
-
-    DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(
-        serializedOptions.getPipelineOptions(), doFn,
-        new FlinkSideInputReader(sideInputs, runtimeContext),
-        outputManager,
-        mainOutputTag,
-        // see SimpleDoFnRunner, just use it to limit number of additional outputs
-        Collections.<TupleTag<?>>emptyList(),
-        new FlinkNoOpStepContext(),
-        new FlinkAggregatorFactory(runtimeContext),
-        windowingStrategy);
-
-    doFnRunner.startBundle();
-
-    for (WindowedValue<InputT> value : values) {
-      doFnRunner.processElement(value);
-    }
-
-    doFnRunner.finishBundle();
-  }
-
-  @Override
-  public void open(Configuration parameters) throws Exception {
-    doFnInvoker = DoFnInvokers.invokerFor(doFn);
-    doFnInvoker.invokeSetup();
-  }
-
-  @Override
-  public void close() throws Exception {
-    doFnInvoker.invokeTeardown();
-  }
-
-  static class DoFnOutputManager
-      implements DoFnRunners.OutputManager {
-
-    private Collector collector;
-
-    DoFnOutputManager(Collector collector) {
-      this.collector = collector;
-    }
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
-      collector.collect(output);
-    }
-  }
-
-  static class MultiDoFnOutputManager
-      implements DoFnRunners.OutputManager {
-
-    private Collector<WindowedValue<RawUnionValue>> collector;
-    private Map<TupleTag<?>, Integer> outputMap;
-
-    MultiDoFnOutputManager(Collector<WindowedValue<RawUnionValue>> collector,
-                      Map<TupleTag<?>, Integer> outputMap) {
-      this.collector = collector;
-      this.outputMap = outputMap;
-    }
-
-    @Override
-    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
-      collector.collect(WindowedValue.of(new RawUnionValue(outputMap.get(tag), output.getValue()),
-          output.getTimestamp(), output.getWindows(), output.getPane()));
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java
deleted file mode 100644
index 26fd0b4..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import org.apache.beam.runners.core.PerKeyCombineFnRunner;
-import org.apache.beam.runners.core.PerKeyCombineFnRunners;
-import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.CombineFnBase;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
-import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.flink.api.common.functions.RichGroupReduceFunction;
-import org.apache.flink.util.Collector;
-import org.joda.time.Instant;
-
-/**
- * Special version of {@link FlinkReduceFunction} that supports merging windows. This
- * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
- * same behaviour as {@code MergeOverlappingIntervalWindows}.
- *
- * <p>This is different from the pair of function for the non-merging windows case
- * in that we cannot do combining before the shuffle because elements would not
- * yet be in their correct windows for side-input access.
- */
-public class FlinkMergingNonShuffleReduceFunction<
-    K, InputT, AccumT, OutputT, W extends IntervalWindow>
-    extends RichGroupReduceFunction<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, OutputT>>> {
-
-  private final CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> combineFn;
-
-  private final WindowingStrategy<?, W> windowingStrategy;
-
-  private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
-
-  private final SerializedPipelineOptions serializedOptions;
-
-  public FlinkMergingNonShuffleReduceFunction(
-      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn,
-      WindowingStrategy<?, W> windowingStrategy,
-      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
-      PipelineOptions pipelineOptions) {
-
-    this.combineFn = keyedCombineFn;
-
-    this.windowingStrategy = windowingStrategy;
-    this.sideInputs = sideInputs;
-
-    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
-
-  }
-
-  @Override
-  public void reduce(
-      Iterable<WindowedValue<KV<K, InputT>>> elements,
-      Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
-
-    PipelineOptions options = serializedOptions.getPipelineOptions();
-
-    FlinkSideInputReader sideInputReader =
-        new FlinkSideInputReader(sideInputs, getRuntimeContext());
-
-    PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> combineFnRunner =
-        PerKeyCombineFnRunners.create(combineFn);
-
-    @SuppressWarnings("unchecked")
-    OutputTimeFn<? super BoundedWindow> outputTimeFn =
-        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
-
-    // get all elements so that we can sort them, has to fit into
-    // memory
-    // this seems very unprudent, but correct, for now
-    List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
-    for (WindowedValue<KV<K, InputT>> inputValue : elements) {
-      for (WindowedValue<KV<K, InputT>> exploded : inputValue.explodeWindows()) {
-        sortedInput.add(exploded);
-      }
-    }
-    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
-      @Override
-      public int compare(
-          WindowedValue<KV<K, InputT>> o1,
-          WindowedValue<KV<K, InputT>> o2) {
-        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
-            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
-      }
-    });
-
-    // merge windows, we have to do it in an extra pre-processing step and
-    // can't do it as we go since the window of early elements would not
-    // be correct when calling the CombineFn
-    mergeWindow(sortedInput);
-
-    // iterate over the elements that are sorted by window timestamp
-    final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
-
-    // create accumulator using the first elements key
-    WindowedValue<KV<K, InputT>> currentValue = iterator.next();
-    K key = currentValue.getValue().getKey();
-    IntervalWindow currentWindow =
-        (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
-    InputT firstValue = currentValue.getValue().getValue();
-    AccumT accumulator =
-        combineFnRunner.createAccumulator(key, options, sideInputReader, currentValue.getWindows());
-    accumulator = combineFnRunner.addInput(key, accumulator, firstValue,
-        options, sideInputReader, currentValue.getWindows());
-
-    // we use this to keep track of the timestamps assigned by the OutputTimeFn
-    Instant windowTimestamp =
-        outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
-
-    while (iterator.hasNext()) {
-      WindowedValue<KV<K, InputT>> nextValue = iterator.next();
-      IntervalWindow nextWindow =
-          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
-
-      if (currentWindow.equals(nextWindow)) {
-        // continue accumulating and merge windows
-
-        InputT value = nextValue.getValue().getValue();
-        accumulator = combineFnRunner.addInput(key, accumulator, value,
-            options, sideInputReader, currentValue.getWindows());
-
-        windowTimestamp = outputTimeFn.combine(
-            windowTimestamp,
-            outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
-
-      } else {
-        // emit the value that we currently have
-        out.collect(
-            WindowedValue.of(
-                KV.of(key, combineFnRunner.extractOutput(key, accumulator,
-                    options, sideInputReader, currentValue.getWindows())),
-                windowTimestamp,
-                currentWindow,
-                PaneInfo.NO_FIRING));
-
-        currentWindow = nextWindow;
-        currentValue = nextValue;
-        InputT value = nextValue.getValue().getValue();
-        accumulator = combineFnRunner.createAccumulator(key,
-            options, sideInputReader, currentValue.getWindows());
-        accumulator = combineFnRunner.addInput(key, accumulator, value,
-            options, sideInputReader, currentValue.getWindows());
-        windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
-      }
-
-    }
-
-    // emit the final accumulator
-    out.collect(
-        WindowedValue.of(
-            KV.of(key, combineFnRunner.extractOutput(key, accumulator,
-                options, sideInputReader, currentValue.getWindows())),
-            windowTimestamp,
-            currentWindow,
-            PaneInfo.NO_FIRING));
-  }
-
-  /**
-   * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
-   * This replaces windows in the input list.
-   */
-  private void mergeWindow(List<WindowedValue<KV<K, InputT>>> elements) {
-    int currentStart = 0;
-    IntervalWindow currentWindow =
-        (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
-
-    for (int i = 1; i < elements.size(); i++) {
-      WindowedValue<KV<K, InputT>> nextValue = elements.get(i);
-      IntervalWindow nextWindow =
-          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
-      if (currentWindow.intersects(nextWindow)) {
-        // we continue
-        currentWindow = currentWindow.span(nextWindow);
-      } else {
-        // retrofit the merged window to all windows up to "currentStart"
-        for (int j = i - 1; j >= currentStart; j--) {
-          WindowedValue<KV<K, InputT>> value = elements.get(j);
-          elements.set(
-              j,
-              WindowedValue.of(
-                  value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
-        }
-        currentStart = i;
-        currentWindow = nextWindow;
-      }
-    }
-    if (currentStart < elements.size() - 1) {
-      // we have to retrofit the last batch
-      for (int j = elements.size() - 1; j >= currentStart; j--) {
-        WindowedValue<KV<K, InputT>> value = elements.get(j);
-        elements.set(
-            j,
-            WindowedValue.of(
-                value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
-      }
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java
deleted file mode 100644
index c68f155..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java
+++ /dev/null
@@ -1,201 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import org.apache.beam.runners.core.PerKeyCombineFnRunner;
-import org.apache.beam.runners.core.PerKeyCombineFnRunners;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.CombineFnBase;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
-import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.flink.util.Collector;
-import org.joda.time.Instant;
-
-/**
- * Special version of {@link FlinkPartialReduceFunction} that supports merging windows. This
- * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
- * same behaviour as {@code MergeOverlappingIntervalWindows}.
- */
-public class FlinkMergingPartialReduceFunction<K, InputT, AccumT, W extends IntervalWindow>
-    extends FlinkPartialReduceFunction<K, InputT, AccumT, W> {
-
-  public FlinkMergingPartialReduceFunction(
-      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn,
-      WindowingStrategy<?, W> windowingStrategy,
-      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
-      PipelineOptions pipelineOptions) {
-    super(combineFn, windowingStrategy, sideInputs, pipelineOptions);
-  }
-
-  @Override
-  public void combine(
-      Iterable<WindowedValue<KV<K, InputT>>> elements,
-      Collector<WindowedValue<KV<K, AccumT>>> out) throws Exception {
-
-    PipelineOptions options = serializedOptions.getPipelineOptions();
-
-    FlinkSideInputReader sideInputReader =
-        new FlinkSideInputReader(sideInputs, getRuntimeContext());
-
-    PerKeyCombineFnRunner<K, InputT, AccumT, ?> combineFnRunner =
-        PerKeyCombineFnRunners.create(combineFn);
-
-    @SuppressWarnings("unchecked")
-    OutputTimeFn<? super BoundedWindow> outputTimeFn =
-        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
-
-    // get all elements so that we can sort them, has to fit into
-    // memory
-    // this seems very unprudent, but correct, for now
-    List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
-    for (WindowedValue<KV<K, InputT>> inputValue : elements) {
-      for (WindowedValue<KV<K, InputT>> exploded : inputValue.explodeWindows()) {
-        sortedInput.add(exploded);
-      }
-    }
-    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
-      @Override
-      public int compare(
-          WindowedValue<KV<K, InputT>> o1,
-          WindowedValue<KV<K, InputT>> o2) {
-        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
-            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
-      }
-    });
-
-    // merge windows, we have to do it in an extra pre-processing step and
-    // can't do it as we go since the window of early elements would not
-    // be correct when calling the CombineFn
-    mergeWindow(sortedInput);
-
-    // iterate over the elements that are sorted by window timestamp
-    final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
-
-    // create accumulator using the first elements key
-    WindowedValue<KV<K, InputT>> currentValue = iterator.next();
-    K key = currentValue.getValue().getKey();
-    IntervalWindow currentWindow =
-        (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
-    InputT firstValue = currentValue.getValue().getValue();
-    AccumT accumulator = combineFnRunner.createAccumulator(key,
-        options, sideInputReader, currentValue.getWindows());
-    accumulator = combineFnRunner.addInput(key, accumulator, firstValue,
-        options, sideInputReader, currentValue.getWindows());
-
-    // we use this to keep track of the timestamps assigned by the OutputTimeFn
-    Instant windowTimestamp =
-        outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
-
-    while (iterator.hasNext()) {
-      WindowedValue<KV<K, InputT>> nextValue = iterator.next();
-      IntervalWindow nextWindow = (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
-
-      if (currentWindow.equals(nextWindow)) {
-        // continue accumulating and merge windows
-
-        InputT value = nextValue.getValue().getValue();
-        accumulator = combineFnRunner.addInput(key, accumulator, value,
-            options, sideInputReader, currentValue.getWindows());
-
-        windowTimestamp = outputTimeFn.combine(
-            windowTimestamp,
-            outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
-
-      } else {
-        // emit the value that we currently have
-        out.collect(
-            WindowedValue.of(
-                KV.of(key, accumulator),
-                windowTimestamp,
-                currentWindow,
-                PaneInfo.NO_FIRING));
-
-        currentWindow = nextWindow;
-        currentValue = nextValue;
-        InputT value = nextValue.getValue().getValue();
-        accumulator = combineFnRunner.createAccumulator(key,
-            options, sideInputReader, currentValue.getWindows());
-        accumulator = combineFnRunner.addInput(key, accumulator, value,
-            options, sideInputReader, currentValue.getWindows());
-        windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
-      }
-    }
-
-    // emit the final accumulator
-    out.collect(
-        WindowedValue.of(
-            KV.of(key, accumulator),
-            windowTimestamp,
-            currentWindow,
-            PaneInfo.NO_FIRING));
-  }
-
-  /**
-   * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
-   * This replaces windows in the input list.
-   */
-  private void mergeWindow(List<WindowedValue<KV<K, InputT>>> elements) {
-    int currentStart = 0;
-    IntervalWindow currentWindow =
-        (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
-
-    for (int i = 1; i < elements.size(); i++) {
-      WindowedValue<KV<K, InputT>> nextValue = elements.get(i);
-      IntervalWindow nextWindow =
-          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
-      if (currentWindow.intersects(nextWindow)) {
-        // we continue
-        currentWindow = currentWindow.span(nextWindow);
-      } else {
-        // retrofit the merged window to all windows up to "currentStart"
-        for (int j = i - 1; j >= currentStart; j--) {
-          WindowedValue<KV<K, InputT>> value = elements.get(j);
-          elements.set(
-              j,
-              WindowedValue.of(
-                  value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
-        }
-        currentStart = i;
-        currentWindow = nextWindow;
-      }
-    }
-    if (currentStart < elements.size() - 1) {
-      // we have to retrofit the last batch
-      for (int j = elements.size() - 1; j >= currentStart; j--) {
-        WindowedValue<KV<K, InputT>> value = elements.get(j);
-        elements.set(
-            j,
-            WindowedValue.of(
-                value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java
deleted file mode 100644
index 84b3adc..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import org.apache.beam.runners.core.PerKeyCombineFnRunner;
-import org.apache.beam.runners.core.PerKeyCombineFnRunners;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.CombineFnBase;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
-import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.flink.util.Collector;
-import org.joda.time.Instant;
-
-/**
- * Special version of {@link FlinkReduceFunction} that supports merging windows. This
- * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
- * same behaviour as {@code MergeOverlappingIntervalWindows}.
- */
-public class FlinkMergingReduceFunction<K, AccumT, OutputT, W extends IntervalWindow>
-    extends FlinkReduceFunction<K, AccumT, OutputT, W> {
-
-  public FlinkMergingReduceFunction(
-      CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> keyedCombineFn,
-      WindowingStrategy<?, W> windowingStrategy,
-      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
-      PipelineOptions pipelineOptions) {
-    super(keyedCombineFn, windowingStrategy, sideInputs, pipelineOptions);
-  }
-
-  @Override
-  public void reduce(
-      Iterable<WindowedValue<KV<K, AccumT>>> elements,
-      Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
-
-    PipelineOptions options = serializedOptions.getPipelineOptions();
-
-    FlinkSideInputReader sideInputReader =
-        new FlinkSideInputReader(sideInputs, getRuntimeContext());
-
-    PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combineFnRunner =
-        PerKeyCombineFnRunners.create(combineFn);
-
-    @SuppressWarnings("unchecked")
-    OutputTimeFn<? super BoundedWindow> outputTimeFn =
-        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
-
-    // get all elements so that we can sort them, has to fit into
-    // memory
-    // this seems very unprudent, but correct, for now
-    ArrayList<WindowedValue<KV<K, AccumT>>> sortedInput = Lists.newArrayList();
-    for (WindowedValue<KV<K, AccumT>> inputValue : elements) {
-      for (WindowedValue<KV<K, AccumT>> exploded : inputValue.explodeWindows()) {
-        sortedInput.add(exploded);
-      }
-    }
-    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, AccumT>>>() {
-      @Override
-      public int compare(
-          WindowedValue<KV<K, AccumT>> o1,
-          WindowedValue<KV<K, AccumT>> o2) {
-        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
-            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
-      }
-    });
-
-    // merge windows, we have to do it in an extra pre-processing step and
-    // can't do it as we go since the window of early elements would not
-    // be correct when calling the CombineFn
-    mergeWindow(sortedInput);
-
-    // iterate over the elements that are sorted by window timestamp
-    final Iterator<WindowedValue<KV<K, AccumT>>> iterator = sortedInput.iterator();
-
-    // get the first accumulator
-    WindowedValue<KV<K, AccumT>> currentValue = iterator.next();
-    K key = currentValue.getValue().getKey();
-    IntervalWindow currentWindow =
-        (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
-    AccumT accumulator = currentValue.getValue().getValue();
-
-    // we use this to keep track of the timestamps assigned by the OutputTimeFn,
-    // in FlinkPartialReduceFunction we already merge the timestamps assigned
-    // to individual elements, here we just merge them
-    List<Instant> windowTimestamps = new ArrayList<>();
-    windowTimestamps.add(currentValue.getTimestamp());
-
-    while (iterator.hasNext()) {
-      WindowedValue<KV<K, AccumT>> nextValue = iterator.next();
-      IntervalWindow nextWindow =
-          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
-
-      if (nextWindow.equals(currentWindow)) {
-        // continue accumulating and merge windows
-
-        accumulator = combineFnRunner.mergeAccumulators(
-            key, ImmutableList.of(accumulator, nextValue.getValue().getValue()),
-            options, sideInputReader, currentValue.getWindows());
-
-        windowTimestamps.add(nextValue.getTimestamp());
-      } else {
-        out.collect(
-            WindowedValue.of(
-                KV.of(key, combineFnRunner.extractOutput(key, accumulator,
-                    options, sideInputReader, currentValue.getWindows())),
-                outputTimeFn.merge(currentWindow, windowTimestamps),
-                currentWindow,
-                PaneInfo.NO_FIRING));
-
-        windowTimestamps.clear();
-
-        currentWindow = nextWindow;
-        currentValue = nextValue;
-        accumulator = nextValue.getValue().getValue();
-        windowTimestamps.add(nextValue.getTimestamp());
-      }
-    }
-
-    // emit the final accumulator
-    out.collect(
-        WindowedValue.of(
-            KV.of(key, combineFnRunner.extractOutput(key, accumulator,
-                options, sideInputReader, currentValue.getWindows())),
-            outputTimeFn.merge(currentWindow, windowTimestamps),
-            currentWindow,
-            PaneInfo.NO_FIRING));
-  }
-
-  /**
-   * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
-   * This replaces windows in the input list.
-   */
-  private void mergeWindow(List<WindowedValue<KV<K, AccumT>>> elements) {
-    int currentStart = 0;
-    IntervalWindow currentWindow =
-        (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
-
-    for (int i = 1; i < elements.size(); i++) {
-      WindowedValue<KV<K, AccumT>> nextValue = elements.get(i);
-      IntervalWindow nextWindow =
-          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
-      if (currentWindow.intersects(nextWindow)) {
-        // we continue
-        currentWindow = currentWindow.span(nextWindow);
-      } else {
-        // retrofit the merged window to all windows up to "currentStart"
-        for (int j = i - 1; j >= currentStart; j--) {
-          WindowedValue<KV<K, AccumT>> value = elements.get(j);
-          elements.set(
-              j,
-              WindowedValue.of(
-                  value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
-        }
-        currentStart = i;
-        currentWindow = nextWindow;
-      }
-    }
-    if (currentStart < elements.size() - 1) {
-      // we have to retrofit the last batch
-      for (int j = elements.size() - 1; j >= currentStart; j--) {
-        WindowedValue<KV<K, AccumT>> value = elements.get(j);
-        elements.set(
-            j,
-            WindowedValue.of(
-                value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
-      }
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
deleted file mode 100644
index 9071cc5..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.flink.api.common.functions.FlatMapFunction;
-import org.apache.flink.util.Collector;
-
-/**
- * A {@link FlatMapFunction} function that filters out those elements that don't belong in this
- * output. We need this to implement MultiOutput ParDo functions in combination with
- * {@link FlinkDoFnFunction}.
- */
-public class FlinkMultiOutputPruningFunction<T>
-    implements FlatMapFunction<WindowedValue<RawUnionValue>, WindowedValue<T>> {
-
-  private final int ourOutputTag;
-
-  public FlinkMultiOutputPruningFunction(int ourOutputTag) {
-    this.ourOutputTag = ourOutputTag;
-  }
-
-  @Override
-  @SuppressWarnings("unchecked")
-  public void flatMap(
-      WindowedValue<RawUnionValue> windowedValue,
-      Collector<WindowedValue<T>> collector) throws Exception {
-    int unionTag = windowedValue.getValue().getUnionTag();
-    if (unionTag == ourOutputTag) {
-      collector.collect(
-          (WindowedValue<T>) windowedValue.withValue(windowedValue.getValue().getValue()));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoOpStepContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoOpStepContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoOpStepContext.java
deleted file mode 100644
index 847a00a..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoOpStepContext.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import java.io.IOException;
-import org.apache.beam.runners.core.ExecutionContext.StepContext;
-import org.apache.beam.runners.core.StateInternals;
-import org.apache.beam.runners.core.TimerInternals;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.TupleTag;
-
-/**
- * A {@link StepContext} for Flink Batch Runner execution.
- */
-public class FlinkNoOpStepContext implements StepContext {
-
-  @Override
-  public String getStepName() {
-    return null;
-  }
-
-  @Override
-  public String getTransformName() {
-    return null;
-  }
-
-  @Override
-  public void noteOutput(WindowedValue<?> output) {
-
-  }
-
-  @Override
-  public void noteOutput(TupleTag<?> tag, WindowedValue<?> output) {
-
-  }
-
-  @Override
-  public <T, W extends BoundedWindow> void writePCollectionViewData(
-      TupleTag<?> tag,
-      Iterable<WindowedValue<T>> data,
-      Coder<Iterable<WindowedValue<T>>> dataCoder,
-      W window,
-      Coder<W> windowCoder) throws IOException {
-  }
-
-  @Override
-  public StateInternals<?> stateInternals() {
-    return null;
-  }
-
-  @Override
-  public TimerInternals timerInternals() {
-    return null;
-  }
-}
-

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
deleted file mode 100644
index 1d1ff9f..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.Map;
-import org.apache.beam.runners.core.PerKeyCombineFnRunner;
-import org.apache.beam.runners.core.PerKeyCombineFnRunners;
-import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.CombineFnBase;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.flink.api.common.functions.RichGroupCombineFunction;
-import org.apache.flink.util.Collector;
-import org.joda.time.Instant;
-
-/**
- * This is is the first step for executing a {@link org.apache.beam.sdk.transforms.Combine.PerKey}
- * on Flink. The second part is {@link FlinkReduceFunction}. This function performs a local
- * combine step before shuffling while the latter does the final combination after a shuffle.
- *
- * <p>The input to {@link #combine(Iterable, Collector)} are elements of the same key but
- * for different windows. We have to ensure that we only combine elements of matching
- * windows.
- */
-public class FlinkPartialReduceFunction<K, InputT, AccumT, W extends BoundedWindow>
-    extends RichGroupCombineFunction<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, AccumT>>> {
-
-  protected final CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn;
-
-  protected final WindowingStrategy<?, W> windowingStrategy;
-
-  protected final SerializedPipelineOptions serializedOptions;
-
-  protected final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
-
-  public FlinkPartialReduceFunction(
-      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn,
-      WindowingStrategy<?, W> windowingStrategy,
-      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
-      PipelineOptions pipelineOptions) {
-
-    this.combineFn = combineFn;
-    this.windowingStrategy = windowingStrategy;
-    this.sideInputs = sideInputs;
-    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
-
-  }
-
-  @Override
-  public void combine(
-      Iterable<WindowedValue<KV<K, InputT>>> elements,
-      Collector<WindowedValue<KV<K, AccumT>>> out) throws Exception {
-
-    PipelineOptions options = serializedOptions.getPipelineOptions();
-
-    FlinkSideInputReader sideInputReader =
-        new FlinkSideInputReader(sideInputs, getRuntimeContext());
-
-    PerKeyCombineFnRunner<K, InputT, AccumT, ?> combineFnRunner =
-        PerKeyCombineFnRunners.create(combineFn);
-
-    @SuppressWarnings("unchecked")
-    OutputTimeFn<? super BoundedWindow> outputTimeFn =
-        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
-
-    // get all elements so that we can sort them, has to fit into
-    // memory
-    // this seems very unprudent, but correct, for now
-    ArrayList<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
-    for (WindowedValue<KV<K, InputT>> inputValue : elements) {
-      for (WindowedValue<KV<K, InputT>> exploded : inputValue.explodeWindows()) {
-        sortedInput.add(exploded);
-      }
-    }
-    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
-      @Override
-      public int compare(
-          WindowedValue<KV<K, InputT>> o1,
-          WindowedValue<KV<K, InputT>> o2) {
-        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
-            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
-      }
-    });
-
-    // iterate over the elements that are sorted by window timestamp
-    //
-    final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
-
-    // create accumulator using the first elements key
-    WindowedValue<KV<K, InputT>> currentValue = iterator.next();
-    K key = currentValue.getValue().getKey();
-    BoundedWindow currentWindow = Iterables.getFirst(currentValue.getWindows(), null);
-    InputT firstValue = currentValue.getValue().getValue();
-    AccumT accumulator = combineFnRunner.createAccumulator(key,
-        options, sideInputReader, currentValue.getWindows());
-    accumulator = combineFnRunner.addInput(key, accumulator, firstValue,
-        options, sideInputReader, currentValue.getWindows());
-
-    // we use this to keep track of the timestamps assigned by the OutputTimeFn
-    Instant windowTimestamp =
-        outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
-
-    while (iterator.hasNext()) {
-      WindowedValue<KV<K, InputT>> nextValue = iterator.next();
-      BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
-
-      if (nextWindow.equals(currentWindow)) {
-        // continue accumulating
-        InputT value = nextValue.getValue().getValue();
-        accumulator = combineFnRunner.addInput(key, accumulator, value,
-            options, sideInputReader, currentValue.getWindows());
-
-        windowTimestamp = outputTimeFn.combine(
-            windowTimestamp,
-            outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
-
-      } else {
-        // emit the value that we currently have
-        out.collect(
-            WindowedValue.of(
-                KV.of(key, accumulator),
-                windowTimestamp,
-                currentWindow,
-                PaneInfo.NO_FIRING));
-
-        currentWindow = nextWindow;
-        currentValue = nextValue;
-        InputT value = nextValue.getValue().getValue();
-        accumulator = combineFnRunner.createAccumulator(key,
-            options, sideInputReader, currentValue.getWindows());
-        accumulator = combineFnRunner.addInput(key, accumulator, value,
-            options, sideInputReader, currentValue.getWindows());
-        windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
-      }
-    }
-
-    // emit the final accumulator
-    out.collect(
-        WindowedValue.of(
-            KV.of(key, accumulator),
-            windowTimestamp,
-            currentWindow,
-            PaneInfo.NO_FIRING));
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
deleted file mode 100644
index 3e4f742..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import org.apache.beam.runners.core.PerKeyCombineFnRunner;
-import org.apache.beam.runners.core.PerKeyCombineFnRunners;
-import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.CombineFnBase;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.flink.api.common.functions.RichGroupReduceFunction;
-import org.apache.flink.util.Collector;
-import org.joda.time.Instant;
-
-/**
- * This is the second part for executing a {@link org.apache.beam.sdk.transforms.Combine.PerKey}
- * on Flink, the second part is {@link FlinkReduceFunction}. This function performs the final
- * combination of the pre-combined values after a shuffle.
- *
- * <p>The input to {@link #reduce(Iterable, Collector)} are elements of the same key but
- * for different windows. We have to ensure that we only combine elements of matching
- * windows.
- */
-public class FlinkReduceFunction<K, AccumT, OutputT, W extends BoundedWindow>
-    extends RichGroupReduceFunction<WindowedValue<KV<K, AccumT>>, WindowedValue<KV<K, OutputT>>> {
-
-  protected final CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> combineFn;
-
-  protected final WindowingStrategy<?, W> windowingStrategy;
-
-  protected final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
-
-  protected final SerializedPipelineOptions serializedOptions;
-
-  public FlinkReduceFunction(
-      CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> keyedCombineFn,
-      WindowingStrategy<?, W> windowingStrategy,
-      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
-      PipelineOptions pipelineOptions) {
-
-    this.combineFn = keyedCombineFn;
-
-    this.windowingStrategy = windowingStrategy;
-    this.sideInputs = sideInputs;
-
-    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
-
-  }
-
-  @Override
-  public void reduce(
-      Iterable<WindowedValue<KV<K, AccumT>>> elements,
-      Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
-
-    PipelineOptions options = serializedOptions.getPipelineOptions();
-
-    FlinkSideInputReader sideInputReader =
-        new FlinkSideInputReader(sideInputs, getRuntimeContext());
-
-    PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combineFnRunner =
-        PerKeyCombineFnRunners.create(combineFn);
-
-    @SuppressWarnings("unchecked")
-    OutputTimeFn<? super BoundedWindow> outputTimeFn =
-        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
-
-
-    // get all elements so that we can sort them, has to fit into
-    // memory
-    // this seems very unprudent, but correct, for now
-    ArrayList<WindowedValue<KV<K, AccumT>>> sortedInput = Lists.newArrayList();
-    for (WindowedValue<KV<K, AccumT>> inputValue: elements) {
-      for (WindowedValue<KV<K, AccumT>> exploded: inputValue.explodeWindows()) {
-        sortedInput.add(exploded);
-      }
-    }
-    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, AccumT>>>() {
-      @Override
-      public int compare(
-          WindowedValue<KV<K, AccumT>> o1,
-          WindowedValue<KV<K, AccumT>> o2) {
-        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
-            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
-      }
-    });
-
-    // iterate over the elements that are sorted by window timestamp
-    //
-    final Iterator<WindowedValue<KV<K, AccumT>>> iterator = sortedInput.iterator();
-
-    // get the first accumulator
-    WindowedValue<KV<K, AccumT>> currentValue = iterator.next();
-    K key = currentValue.getValue().getKey();
-    BoundedWindow currentWindow = Iterables.getFirst(currentValue.getWindows(), null);
-    AccumT accumulator = currentValue.getValue().getValue();
-
-    // we use this to keep track of the timestamps assigned by the OutputTimeFn,
-    // in FlinkPartialReduceFunction we already merge the timestamps assigned
-    // to individual elements, here we just merge them
-    List<Instant> windowTimestamps = new ArrayList<>();
-    windowTimestamps.add(currentValue.getTimestamp());
-
-    while (iterator.hasNext()) {
-      WindowedValue<KV<K, AccumT>> nextValue = iterator.next();
-      BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
-
-      if (nextWindow.equals(currentWindow)) {
-        // continue accumulating
-        accumulator = combineFnRunner.mergeAccumulators(
-            key, ImmutableList.of(accumulator, nextValue.getValue().getValue()),
-            options, sideInputReader, currentValue.getWindows());
-
-        windowTimestamps.add(nextValue.getTimestamp());
-      } else {
-        // emit the value that we currently have
-        out.collect(
-            WindowedValue.of(
-                KV.of(key, combineFnRunner.extractOutput(key, accumulator,
-                    options, sideInputReader, currentValue.getWindows())),
-                outputTimeFn.merge(currentWindow, windowTimestamps),
-                currentWindow,
-                PaneInfo.NO_FIRING));
-
-        windowTimestamps.clear();
-
-        currentWindow = nextWindow;
-        currentValue = nextValue;
-        accumulator = nextValue.getValue().getValue();
-        windowTimestamps.add(nextValue.getTimestamp());
-      }
-
-    }
-
-    // emit the final accumulator
-    out.collect(
-        WindowedValue.of(
-            KV.of(key, combineFnRunner.extractOutput(key, accumulator,
-                options, sideInputReader, currentValue.getWindows())),
-            outputTimeFn.merge(currentWindow, windowTimestamps),
-            currentWindow,
-            PaneInfo.NO_FIRING));
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkSideInputReader.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkSideInputReader.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkSideInputReader.java
deleted file mode 100644
index c317182..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkSideInputReader.java
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-import javax.annotation.Nullable;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.util.SideInputReader;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.api.common.functions.RuntimeContext;
-
-/**
- * A {@link SideInputReader} for the Flink Batch Runner.
- */
-public class FlinkSideInputReader implements SideInputReader {
-
-  private final Map<TupleTag<?>, WindowingStrategy<?, ?>> sideInputs;
-
-  private RuntimeContext runtimeContext;
-
-  public FlinkSideInputReader(Map<PCollectionView<?>, WindowingStrategy<?, ?>> indexByView,
-                              RuntimeContext runtimeContext) {
-    sideInputs = new HashMap<>();
-    for (Map.Entry<PCollectionView<?>, WindowingStrategy<?, ?>> entry : indexByView.entrySet()) {
-      sideInputs.put(entry.getKey().getTagInternal(), entry.getValue());
-    }
-    this.runtimeContext = runtimeContext;
-  }
-
-  @Nullable
-  @Override
-  public <T> T get(PCollectionView<T> view, BoundedWindow window) {
-    checkNotNull(view, "View passed to sideInput cannot be null");
-    TupleTag<Iterable<WindowedValue<?>>> tag = view.getTagInternal();
-    checkNotNull(
-        sideInputs.get(tag),
-        "Side input for " + view + " not available.");
-
-    Map<BoundedWindow, T> sideInputs =
-        runtimeContext.getBroadcastVariableWithInitializer(
-            tag.getId(), new SideInputInitializer<>(view));
-    T result = sideInputs.get(window);
-    if (result == null) {
-      result = view.getViewFn().apply(Collections.<WindowedValue<?>>emptyList());
-    }
-    return result;
-  }
-
-  @Override
-  public <T> boolean contains(PCollectionView<T> view) {
-    return sideInputs.containsKey(view.getTagInternal());
-  }
-
-  @Override
-  public boolean isEmpty() {
-    return sideInputs.isEmpty();
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java
deleted file mode 100644
index c8193d2..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java
+++ /dev/null
@@ -1,198 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import static org.apache.flink.util.Preconditions.checkArgument;
-
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.Map;
-import org.apache.beam.runners.core.DoFnRunner;
-import org.apache.beam.runners.core.DoFnRunners;
-import org.apache.beam.runners.core.InMemoryStateInternals;
-import org.apache.beam.runners.core.InMemoryTimerInternals;
-import org.apache.beam.runners.core.StateInternals;
-import org.apache.beam.runners.core.StateNamespace;
-import org.apache.beam.runners.core.StateNamespaces;
-import org.apache.beam.runners.core.TimerInternals;
-import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
-import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.api.common.functions.RichGroupReduceFunction;
-import org.apache.flink.api.common.functions.RuntimeContext;
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.util.Collector;
-import org.joda.time.Instant;
-
-/**
- * A {@link RichGroupReduceFunction} for stateful {@link ParDo} in Flink Batch Runner.
- */
-public class FlinkStatefulDoFnFunction<K, V, OutputT>
-    extends RichGroupReduceFunction<WindowedValue<KV<K, V>>, WindowedValue<OutputT>> {
-
-  private final DoFn<KV<K, V>, OutputT> dofn;
-  private final WindowingStrategy<?, ?> windowingStrategy;
-  private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
-  private final SerializedPipelineOptions serializedOptions;
-  private final Map<TupleTag<?>, Integer> outputMap;
-  private final TupleTag<OutputT> mainOutputTag;
-  private transient DoFnInvoker doFnInvoker;
-
-  public FlinkStatefulDoFnFunction(
-      DoFn<KV<K, V>, OutputT> dofn,
-      WindowingStrategy<?, ?> windowingStrategy,
-      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
-      PipelineOptions pipelineOptions,
-      Map<TupleTag<?>, Integer> outputMap,
-      TupleTag<OutputT> mainOutputTag) {
-
-    this.dofn = dofn;
-    this.windowingStrategy = windowingStrategy;
-    this.sideInputs = sideInputs;
-    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
-    this.outputMap = outputMap;
-    this.mainOutputTag = mainOutputTag;
-  }
-
-  @Override
-  public void reduce(
-      Iterable<WindowedValue<KV<K, V>>> values,
-      Collector<WindowedValue<OutputT>> out) throws Exception {
-    RuntimeContext runtimeContext = getRuntimeContext();
-
-    DoFnRunners.OutputManager outputManager;
-    if (outputMap == null) {
-      outputManager = new FlinkDoFnFunction.DoFnOutputManager(out);
-    } else {
-      // it has some additional Outputs
-      outputManager =
-          new FlinkDoFnFunction.MultiDoFnOutputManager((Collector) out, outputMap);
-    }
-
-    final Iterator<WindowedValue<KV<K, V>>> iterator = values.iterator();
-
-    // get the first value, we need this for initializing the state internals with the key.
-    // we are guaranteed to have a first value, otherwise reduce() would not have been called.
-    WindowedValue<KV<K, V>> currentValue = iterator.next();
-    final K key = currentValue.getValue().getKey();
-
-    final InMemoryStateInternals<K> stateInternals = InMemoryStateInternals.forKey(key);
-
-    // Used with Batch, we know that all the data is available for this key. We can't use the
-    // timer manager from the context because it doesn't exist. So we create one and advance
-    // time to the end after processing all elements.
-    final InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
-    timerInternals.advanceProcessingTime(Instant.now());
-    timerInternals.advanceSynchronizedProcessingTime(Instant.now());
-
-    DoFnRunner<KV<K, V>, OutputT> doFnRunner = DoFnRunners.simpleRunner(
-        serializedOptions.getPipelineOptions(), dofn,
-        new FlinkSideInputReader(sideInputs, runtimeContext),
-        outputManager,
-        mainOutputTag,
-        // see SimpleDoFnRunner, just use it to limit number of additional outputs
-        Collections.<TupleTag<?>>emptyList(),
-        new FlinkNoOpStepContext() {
-          @Override
-          public StateInternals<?> stateInternals() {
-            return stateInternals;
-          }
-          @Override
-          public TimerInternals timerInternals() {
-            return timerInternals;
-          }
-        },
-        new FlinkAggregatorFactory(runtimeContext),
-        windowingStrategy);
-
-    doFnRunner.startBundle();
-
-    doFnRunner.processElement(currentValue);
-    while (iterator.hasNext()) {
-      currentValue = iterator.next();
-      doFnRunner.processElement(currentValue);
-    }
-
-    // Finish any pending windows by advancing the input watermark to infinity.
-    timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
-
-    // Finally, advance the processing time to infinity to fire any timers.
-    timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
-    timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
-
-    fireEligibleTimers(timerInternals, doFnRunner);
-
-    doFnRunner.finishBundle();
-  }
-
-  private void fireEligibleTimers(
-      InMemoryTimerInternals timerInternals, DoFnRunner<KV<K, V>, OutputT> runner)
-      throws Exception {
-
-    while (true) {
-
-      TimerInternals.TimerData timer;
-      boolean hasFired = false;
-
-      while ((timer = timerInternals.removeNextEventTimer()) != null) {
-        hasFired = true;
-        fireTimer(timer, runner);
-      }
-      while ((timer = timerInternals.removeNextProcessingTimer()) != null) {
-        hasFired = true;
-        fireTimer(timer, runner);
-      }
-      while ((timer = timerInternals.removeNextSynchronizedProcessingTimer()) != null) {
-        hasFired = true;
-        fireTimer(timer, runner);
-      }
-      if (!hasFired) {
-        break;
-      }
-    }
-  }
-
-  private void fireTimer(
-      TimerInternals.TimerData timer, DoFnRunner<KV<K, V>, OutputT> doFnRunner) {
-    StateNamespace namespace = timer.getNamespace();
-    checkArgument(namespace instanceof StateNamespaces.WindowNamespace);
-    BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
-    doFnRunner.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
-  }
-
-  @Override
-  public void open(Configuration parameters) throws Exception {
-    doFnInvoker = DoFnInvokers.invokerFor(dofn);
-    doFnInvoker.invokeSetup();
-  }
-
-  @Override
-  public void close() throws Exception {
-    doFnInvoker.invokeTeardown();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java
deleted file mode 100644
index 12222b4..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.functions;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.flink.api.common.functions.BroadcastVariableInitializer;
-
-/**
- * {@link BroadcastVariableInitializer} that initializes the broadcast input as a {@code Map}
- * from window to side input.
- */
-public class SideInputInitializer<ElemT, ViewT, W extends BoundedWindow>
-    implements BroadcastVariableInitializer<WindowedValue<ElemT>, Map<BoundedWindow, ViewT>> {
-
-  PCollectionView<ViewT> view;
-
-  public SideInputInitializer(PCollectionView<ViewT> view) {
-    this.view = view;
-  }
-
-  @Override
-  public Map<BoundedWindow, ViewT> initializeBroadcastVariable(
-      Iterable<WindowedValue<ElemT>> inputValues) {
-
-    // first partition into windows
-    Map<BoundedWindow, List<WindowedValue<ElemT>>> partitionedElements = new HashMap<>();
-    for (WindowedValue<ElemT> value: inputValues) {
-      for (BoundedWindow window: value.getWindows()) {
-        List<WindowedValue<ElemT>> windowedValues = partitionedElements.get(window);
-        if (windowedValues == null) {
-          windowedValues = new ArrayList<>();
-          partitionedElements.put(window, windowedValues);
-        }
-        windowedValues.add(value);
-      }
-    }
-
-    Map<BoundedWindow, ViewT> resultMap = new HashMap<>();
-
-    for (Map.Entry<BoundedWindow, List<WindowedValue<ElemT>>> elements:
-        partitionedElements.entrySet()) {
-
-      @SuppressWarnings("unchecked")
-      Iterable<WindowedValue<?>> elementsIterable =
-          (List<WindowedValue<?>>) (List<?>) elements.getValue();
-
-      resultMap.put(elements.getKey(), view.getViewFn().apply(elementsIterable));
-    }
-
-    return resultMap;
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/package-info.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/package-info.java
deleted file mode 100644
index 9f11212..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/functions/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Internal implementation of the Beam runner for Apache Flink.
- */
-package org.apache.beam.runners.flink.translation.functions;

[04/50] [abbrv] beam git commit: ProcessFn remembers more info about its application context

Posted by dh...@apache.org.

ProcessFn remembers more info about its application context


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/3fd88901
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/3fd88901
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/3fd88901

Branch: refs/heads/DSL_SQL
Commit: 3fd889015afa8528801d2c35c8c9f72b944ea472
Parents: a51bdd2
Author: Eugene Kirpichov <ki...@google.com>
Authored: Sat Apr 15 16:39:51 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 18:02:06 2017 -0700

----------------------------------------------------------------------
 .../beam/runners/core/SplittableParDo.java      | 35 +++++++++++++++-----
 .../beam/runners/core/SplittableParDoTest.java  |  8 ++++-
 2 files changed, 34 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/3fd88901/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDo.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDo.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDo.java
index 9cc965a..44db1f7 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDo.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/SplittableParDo.java
@@ -115,7 +115,7 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
             fn,
             input.getCoder(),
             restrictionCoder,
-            input.getWindowingStrategy(),
+            (WindowingStrategy<InputT, ?>) input.getWindowingStrategy(),
             parDo.getSideInputs(),
             parDo.getMainOutputTag(),
             parDo.getAdditionalOutputTags()));
@@ -185,7 +185,7 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
     private final DoFn<InputT, OutputT> fn;
     private final Coder<InputT> elementCoder;
     private final Coder<RestrictionT> restrictionCoder;
-    private final WindowingStrategy<?, ?> windowingStrategy;
+    private final WindowingStrategy<InputT, ?> windowingStrategy;
     private final List<PCollectionView<?>> sideInputs;
     private final TupleTag<OutputT> mainOutputTag;
     private final TupleTagList additionalOutputTags;
@@ -202,7 +202,7 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
         DoFn<InputT, OutputT> fn,
         Coder<InputT> elementCoder,
         Coder<RestrictionT> restrictionCoder,
-        WindowingStrategy<?, ?> windowingStrategy,
+        WindowingStrategy<InputT, ?> windowingStrategy,
         List<PCollectionView<?>> sideInputs,
         TupleTag<OutputT> mainOutputTag,
         TupleTagList additionalOutputTags) {
@@ -234,7 +234,7 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
     public ProcessFn<InputT, OutputT, RestrictionT, TrackerT> newProcessFn(
         DoFn<InputT, OutputT> fn) {
       return new SplittableParDo.ProcessFn<>(
-          fn, elementCoder, restrictionCoder, windowingStrategy.getWindowFn().windowCoder());
+          fn, elementCoder, restrictionCoder, windowingStrategy);
     }
 
     @Override
@@ -351,7 +351,9 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
     private StateTag<Object, ValueState<RestrictionT>> restrictionTag;
 
     private final DoFn<InputT, OutputT> fn;
-    private final Coder<? extends BoundedWindow> windowCoder;
+    private final Coder<InputT> elementCoder;
+    private final Coder<RestrictionT> restrictionCoder;
+    private final WindowingStrategy<InputT, ?> inputWindowingStrategy;
 
     private transient StateInternalsFactory<String> stateInternalsFactory;
     private transient TimerInternalsFactory<String> timerInternalsFactory;
@@ -364,11 +366,16 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
         DoFn<InputT, OutputT> fn,
         Coder<InputT> elementCoder,
         Coder<RestrictionT> restrictionCoder,
-        Coder<? extends BoundedWindow> windowCoder) {
+        WindowingStrategy<InputT, ?> inputWindowingStrategy) {
       this.fn = fn;
-      this.windowCoder = windowCoder;
+      this.elementCoder = elementCoder;
+      this.restrictionCoder = restrictionCoder;
+      this.inputWindowingStrategy = inputWindowingStrategy;
       this.elementTag =
-          StateTags.value("element", WindowedValue.getFullCoder(elementCoder, this.windowCoder));
+          StateTags.value(
+              "element",
+              WindowedValue.getFullCoder(
+                  elementCoder, inputWindowingStrategy.getWindowFn().windowCoder()));
       this.restrictionTag = StateTags.value("restriction", restrictionCoder);
     }
 
@@ -389,6 +396,18 @@ public class SplittableParDo<InputT, OutputT, RestrictionT>
       return fn;
     }
 
+    public Coder<InputT> getElementCoder() {
+      return elementCoder;
+    }
+
+    public Coder<RestrictionT> getRestrictionCoder() {
+      return restrictionCoder;
+    }
+
+    public WindowingStrategy<InputT, ?> getInputWindowingStrategy() {
+      return inputWindowingStrategy;
+    }
+
     @Setup
     public void setup() throws Exception {
       invoker = DoFnInvokers.invokerFor(fn);

http://git-wip-us.apache.org/repos/asf/beam/blob/3fd88901/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoTest.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoTest.java b/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoTest.java
index 2c89543..5629635 100644
--- a/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoTest.java
+++ b/runners/core-java/src/test/java/org/apache/beam/runners/core/SplittableParDoTest.java
@@ -51,11 +51,13 @@ import org.apache.beam.sdk.transforms.splittabledofn.OffsetRange;
 import org.apache.beam.sdk.transforms.splittabledofn.OffsetRangeTracker;
 import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.FixedWindows;
 import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
 import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
 import org.apache.beam.sdk.transforms.windowing.PaneInfo;
 import org.apache.beam.sdk.util.SideInputReader;
 import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TimestampedValue;
@@ -220,9 +222,13 @@ public class SplittableParDoTest {
         int maxOutputsPerBundle,
         Duration maxBundleDuration)
         throws Exception {
+      // The exact windowing strategy doesn't matter in this test, but it should be able to
+      // encode IntervalWindow's because that's what all tests here use.
+      WindowingStrategy<InputT, BoundedWindow> windowingStrategy =
+          (WindowingStrategy) WindowingStrategy.of(FixedWindows.of(Duration.standardSeconds(1)));
       final SplittableParDo.ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn =
           new SplittableParDo.ProcessFn<>(
-              fn, inputCoder, restrictionCoder, IntervalWindow.getCoder());
+              fn, inputCoder, restrictionCoder, windowingStrategy);
       this.tester = DoFnTester.of(processFn);
       this.timerInternals = new InMemoryTimerInternals();
       this.stateInternals = new TestInMemoryStateInternals<>("dummy");

[36/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/package-info.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/package-info.java
deleted file mode 100644
index af4b354..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Internal implementation of the Beam runner for Apache Flink.
- */
-package org.apache.beam.runners.flink.translation;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
deleted file mode 100644
index 9b449aa..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
+++ /dev/null
@@ -1,120 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.types;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.flink.api.common.ExecutionConfig;
-import org.apache.flink.api.common.typeinfo.AtomicType;
-import org.apache.flink.api.common.typeinfo.TypeInformation;
-import org.apache.flink.api.common.typeutils.TypeComparator;
-import org.apache.flink.api.common.typeutils.TypeSerializer;
-
-/**
- * Flink {@link org.apache.flink.api.common.typeinfo.TypeInformation} for
- * Dataflow {@link org.apache.beam.sdk.coders.Coder}s.
- */
-public class CoderTypeInformation<T> extends TypeInformation<T> implements AtomicType<T> {
-
-  private final Coder<T> coder;
-
-  public CoderTypeInformation(Coder<T> coder) {
-    checkNotNull(coder);
-    this.coder = coder;
-  }
-
-  public Coder<T> getCoder() {
-    return coder;
-  }
-
-  @Override
-  public boolean isBasicType() {
-    return false;
-  }
-
-  @Override
-  public boolean isTupleType() {
-    return false;
-  }
-
-  @Override
-  public int getArity() {
-    return 1;
-  }
-
-  @Override
-  @SuppressWarnings("unchecked")
-  public Class<T> getTypeClass() {
-    // We don't have the Class, so we have to pass null here. What a shame...
-    return (Class<T>) Object.class;
-  }
-
-  @Override
-  public boolean isKeyType() {
-    return true;
-  }
-
-  @Override
-  @SuppressWarnings("unchecked")
-  public TypeSerializer<T> createSerializer(ExecutionConfig config) {
-    return new CoderTypeSerializer<>(coder);
-  }
-
-  @Override
-  public int getTotalFields() {
-    return 2;
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-    if (o == null || getClass() != o.getClass()) {
-      return false;
-    }
-
-    CoderTypeInformation that = (CoderTypeInformation) o;
-
-    return coder.equals(that.coder);
-
-  }
-
-  @Override
-  public int hashCode() {
-    return coder.hashCode();
-  }
-
-  @Override
-  public boolean canEqual(Object obj) {
-    return obj instanceof CoderTypeInformation;
-  }
-
-  @Override
-  public String toString() {
-    return "CoderTypeInformation{coder=" + coder + '}';
-  }
-
-  @Override
-  public TypeComparator<T> createComparator(boolean sortOrderAscending, ExecutionConfig
-      executionConfig) {
-    throw new UnsupportedOperationException(
-        "Non-encoded values cannot be compared directly.");
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
deleted file mode 100644
index e210ed9..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.types;
-
-import java.io.EOFException;
-import java.io.IOException;
-import org.apache.beam.runners.flink.translation.wrappers.DataInputViewWrapper;
-import org.apache.beam.runners.flink.translation.wrappers.DataOutputViewWrapper;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.CoderException;
-import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.flink.api.common.typeutils.TypeSerializer;
-import org.apache.flink.core.memory.DataInputView;
-import org.apache.flink.core.memory.DataOutputView;
-
-/**
- * Flink {@link org.apache.flink.api.common.typeutils.TypeSerializer} for
- * Dataflow {@link org.apache.beam.sdk.coders.Coder Coders}.
- */
-public class CoderTypeSerializer<T> extends TypeSerializer<T> {
-
-  private Coder<T> coder;
-
-  public CoderTypeSerializer(Coder<T> coder) {
-    this.coder = coder;
-  }
-
-  @Override
-  public boolean isImmutableType() {
-    return false;
-  }
-
-  @Override
-  public CoderTypeSerializer<T> duplicate() {
-    return new CoderTypeSerializer<>(coder);
-  }
-
-  @Override
-  public T createInstance() {
-    return null;
-  }
-
-  @Override
-  public T copy(T t) {
-    try {
-      return CoderUtils.clone(coder, t);
-    } catch (CoderException e) {
-      throw new RuntimeException("Could not clone.", e);
-    }
-  }
-
-  @Override
-  public T copy(T t, T reuse) {
-    return copy(t);
-  }
-
-  @Override
-  public int getLength() {
-    return -1;
-  }
-
-  @Override
-  public void serialize(T t, DataOutputView dataOutputView) throws IOException {
-    DataOutputViewWrapper outputWrapper = new DataOutputViewWrapper(dataOutputView);
-    coder.encode(t, outputWrapper, Coder.Context.NESTED);
-  }
-
-  @Override
-  public T deserialize(DataInputView dataInputView) throws IOException {
-    try {
-      DataInputViewWrapper inputWrapper = new DataInputViewWrapper(dataInputView);
-      return coder.decode(inputWrapper, Coder.Context.NESTED);
-    } catch (CoderException e) {
-      Throwable cause = e.getCause();
-      if (cause instanceof EOFException) {
-        throw (EOFException) cause;
-      } else {
-        throw e;
-      }
-    }
-  }
-
-  @Override
-  public T deserialize(T t, DataInputView dataInputView) throws IOException {
-    return deserialize(dataInputView);
-  }
-
-  @Override
-  public void copy(
-      DataInputView dataInputView,
-      DataOutputView dataOutputView) throws IOException {
-    serialize(deserialize(dataInputView), dataOutputView);
-  }
-
-  @Override
-  public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-    if (o == null || getClass() != o.getClass()) {
-      return false;
-    }
-
-    CoderTypeSerializer that = (CoderTypeSerializer) o;
-    return coder.equals(that.coder);
-  }
-
-  @Override
-  public boolean canEqual(Object obj) {
-    return obj instanceof CoderTypeSerializer;
-  }
-
-  @Override
-  public int hashCode() {
-    return coder.hashCode();
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueComparator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueComparator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueComparator.java
deleted file mode 100644
index 667ef45..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueComparator.java
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.types;
-
-import java.io.IOException;
-import java.util.Arrays;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.flink.api.common.typeutils.TypeComparator;
-import org.apache.flink.core.memory.DataInputView;
-import org.apache.flink.core.memory.DataOutputView;
-import org.apache.flink.core.memory.MemorySegment;
-
-/**
- * Flink {@link org.apache.flink.api.common.typeutils.TypeComparator} for Beam values that have
- * been encoded to byte data by a {@link Coder}.
- */
-public class EncodedValueComparator extends TypeComparator<byte[]> {
-
-  /** For storing the Reference in encoded form. */
-  private transient byte[] encodedReferenceKey;
-
-  private final boolean ascending;
-
-  public EncodedValueComparator(boolean ascending) {
-    this.ascending = ascending;
-  }
-
-  @Override
-  public int hash(byte[] record) {
-    return Arrays.hashCode(record);
-  }
-
-  @Override
-  public void setReference(byte[] toCompare) {
-    this.encodedReferenceKey = toCompare;
-  }
-
-  @Override
-  public boolean equalToReference(byte[] candidate) {
-    if (encodedReferenceKey.length != candidate.length) {
-      return false;
-    }
-    int len = candidate.length;
-    for (int i = 0; i < len; i++) {
-      if (encodedReferenceKey[i] != candidate[i]) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  @Override
-  public int compareToReference(TypeComparator<byte[]> other) {
-    // VERY IMPORTANT: compareToReference does not behave like Comparable.compare
-    // the meaning of the return value is inverted.
-
-    EncodedValueComparator otherEncodedValueComparator = (EncodedValueComparator) other;
-
-    int len = Math.min(
-        encodedReferenceKey.length,
-        otherEncodedValueComparator.encodedReferenceKey.length);
-
-    for (int i = 0; i < len; i++) {
-      byte b1 = encodedReferenceKey[i];
-      byte b2 = otherEncodedValueComparator.encodedReferenceKey[i];
-      int result = (b1 < b2 ? -1 : (b1 == b2 ? 0 : 1));
-      if (result != 0) {
-        return ascending ? -result : result;
-      }
-    }
-    int result =
-        encodedReferenceKey.length - otherEncodedValueComparator.encodedReferenceKey.length;
-    return ascending ? -result : result;
-  }
-
-
-  @Override
-  public int compare(byte[] first, byte[] second) {
-    int len = Math.min(first.length, second.length);
-    for (int i = 0; i < len; i++) {
-      byte b1 = first[i];
-      byte b2 = second[i];
-      int result = (b1 < b2 ? -1 : (b1 == b2 ? 0 : 1));
-      if (result != 0) {
-        return ascending ? result : -result;
-      }
-    }
-    int result = first.length - second.length;
-    return ascending ? result : -result;
-  }
-
-  @Override
-  public int compareSerialized(
-      DataInputView firstSource,
-      DataInputView secondSource) throws IOException {
-    int lengthFirst = firstSource.readInt();
-    int lengthSecond = secondSource.readInt();
-
-    int len = Math.min(lengthFirst, lengthSecond);
-    for (int i = 0; i < len; i++) {
-      byte b1 = firstSource.readByte();
-      byte b2 = secondSource.readByte();
-      int result = (b1 < b2 ? -1 : (b1 == b2 ? 0 : 1));
-      if (result != 0) {
-        return ascending ? result : -result;
-      }
-    }
-
-    int result = lengthFirst - lengthSecond;
-    return ascending ? result : -result;
-  }
-
-
-
-  @Override
-  public boolean supportsNormalizedKey() {
-    // disabled because this seems to not work with some coders,
-    // such as the AvroCoder
-    return false;
-  }
-
-  @Override
-  public boolean supportsSerializationWithKeyNormalization() {
-    return false;
-  }
-
-  @Override
-  public int getNormalizeKeyLen() {
-    return Integer.MAX_VALUE;
-  }
-
-  @Override
-  public boolean isNormalizedKeyPrefixOnly(int keyBytes) {
-    return true;
-  }
-
-  @Override
-  public void putNormalizedKey(byte[] record, MemorySegment target, int offset, int numBytes) {
-    final int limit = offset + numBytes;
-
-    target.put(offset, record, 0, Math.min(numBytes, record.length));
-
-    offset += record.length;
-
-    while (offset < limit) {
-      target.put(offset++, (byte) 0);
-    }
-  }
-
-  @Override
-  public void writeWithKeyNormalization(byte[] record, DataOutputView target) throws IOException {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public byte[] readWithKeyDenormalization(byte[] reuse, DataInputView source) throws IOException {
-    throw new UnsupportedOperationException();
-  }
-
-  @Override
-  public boolean invertNormalizedKey() {
-    return !ascending;
-  }
-
-  @Override
-  public TypeComparator<byte[]> duplicate() {
-    return new EncodedValueComparator(ascending);
-  }
-
-  @Override
-  public int extractKeys(Object record, Object[] target, int index) {
-    target[index] = record;
-    return 1;
-  }
-
-  @Override
-  public TypeComparator[] getFlatComparators() {
-    return new TypeComparator[] { this.duplicate() };
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueSerializer.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueSerializer.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueSerializer.java
deleted file mode 100644
index 41db61e..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueSerializer.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.types;
-
-import java.io.IOException;
-
-import org.apache.beam.sdk.coders.Coder;
-
-import org.apache.flink.api.common.typeutils.TypeSerializer;
-import org.apache.flink.core.memory.DataInputView;
-import org.apache.flink.core.memory.DataOutputView;
-
-/**
- * {@link TypeSerializer} for values that were encoded using a {@link Coder}.
- */
-public final class EncodedValueSerializer extends TypeSerializer<byte[]> {
-
-  private static final long serialVersionUID = 1L;
-
-  private static final byte[] EMPTY = new byte[0];
-
-  @Override
-  public boolean isImmutableType() {
-    return true;
-  }
-
-  @Override
-  public byte[] createInstance() {
-    return EMPTY;
-  }
-
-  @Override
-  public byte[] copy(byte[] from) {
-    return from;
-  }
-
-  @Override
-  public byte[] copy(byte[] from, byte[] reuse) {
-    return copy(from);
-  }
-
-  @Override
-  public int getLength() {
-    return -1;
-  }
-
-
-  @Override
-  public void serialize(byte[] record, DataOutputView target) throws IOException {
-    if (record == null) {
-      throw new IllegalArgumentException("The record must not be null.");
-    }
-
-    final int len = record.length;
-    target.writeInt(len);
-    target.write(record);
-  }
-
-  @Override
-  public byte[] deserialize(DataInputView source) throws IOException {
-    final int len = source.readInt();
-    byte[] result = new byte[len];
-    source.readFully(result);
-    return result;
-  }
-
-  @Override
-  public byte[] deserialize(byte[] reuse, DataInputView source) throws IOException {
-    return deserialize(source);
-  }
-
-  @Override
-  public void copy(DataInputView source, DataOutputView target) throws IOException {
-    final int len = source.readInt();
-    target.writeInt(len);
-    target.write(source, len);
-  }
-
-  @Override
-  public boolean canEqual(Object obj) {
-    return obj instanceof EncodedValueSerializer;
-  }
-
-  @Override
-  public int hashCode() {
-    return this.getClass().hashCode();
-  }
-
-  @Override
-  public boolean equals(Object obj) {
-    return obj instanceof EncodedValueSerializer;
-  }
-
-  @Override
-  public TypeSerializer<byte[]> duplicate() {
-    return this;
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueTypeInformation.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueTypeInformation.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueTypeInformation.java
deleted file mode 100644
index e24bf31..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueTypeInformation.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.types;
-
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.flink.api.common.ExecutionConfig;
-import org.apache.flink.api.common.typeinfo.AtomicType;
-import org.apache.flink.api.common.typeinfo.TypeInformation;
-import org.apache.flink.api.common.typeutils.TypeComparator;
-import org.apache.flink.api.common.typeutils.TypeSerializer;
-
-/**
- * Flink {@link TypeInformation} for Beam values that have been encoded to byte data
- * by a {@link Coder}.
- */
-public class EncodedValueTypeInformation
-    extends TypeInformation<byte[]>
-    implements AtomicType<byte[]> {
-
-  private static final long serialVersionUID = 1L;
-
-  @Override
-  public boolean isBasicType() {
-    return false;
-  }
-
-  @Override
-  public boolean isTupleType() {
-    return false;
-  }
-
-  @Override
-  public int getArity() {
-    return 0;
-  }
-
-  @Override
-  public int getTotalFields() {
-    return 0;
-  }
-
-  @Override
-  public Class<byte[]> getTypeClass() {
-    return byte[].class;
-  }
-
-  @Override
-  public boolean isKeyType() {
-    return true;
-  }
-
-  @Override
-  public TypeSerializer<byte[]> createSerializer(ExecutionConfig executionConfig) {
-    return new EncodedValueSerializer();
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    return other instanceof EncodedValueTypeInformation;
-  }
-
-  @Override
-  public int hashCode() {
-    return this.getClass().hashCode();
-  }
-
-  @Override
-  public boolean canEqual(Object obj) {
-    return obj instanceof EncodedValueTypeInformation;
-  }
-
-  @Override
-  public String toString() {
-    return "EncodedValueTypeInformation";
-  }
-
-  @Override
-  public TypeComparator<byte[]> createComparator(
-      boolean sortOrderAscending,
-      ExecutionConfig executionConfig) {
-    return new EncodedValueComparator(sortOrderAscending);
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/InspectableByteArrayOutputStream.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/InspectableByteArrayOutputStream.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/InspectableByteArrayOutputStream.java
deleted file mode 100644
index 36b5ba3..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/InspectableByteArrayOutputStream.java
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.types;
-
-import java.io.ByteArrayOutputStream;
-
-/**
- * Version of {@link java.io.ByteArrayOutputStream} that allows to retrieve the internal
- * byte[] buffer without incurring an array copy.
- */
-public class InspectableByteArrayOutputStream extends ByteArrayOutputStream {
-
-  /**
-   * Get the underlying byte array.
-   */
-  public byte[] getBuffer() {
-    return buf;
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvKeySelector.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvKeySelector.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvKeySelector.java
deleted file mode 100644
index 9df6836..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/KvKeySelector.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.types;
-
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.KV;
-import org.apache.flink.api.common.typeinfo.TypeInformation;
-import org.apache.flink.api.java.functions.KeySelector;
-import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
-
-/**
- * {@link KeySelector} that extracts the key from a {@link KV} and returns
- * it in encoded form as a {@code byte} array.
- */
-public class KvKeySelector<InputT, K>
-    implements KeySelector<WindowedValue<KV<K, InputT>>, byte[]>, ResultTypeQueryable<byte[]> {
-
-  private final Coder<K> keyCoder;
-
-  public KvKeySelector(Coder<K> keyCoder) {
-    this.keyCoder = keyCoder;
-  }
-
-  @Override
-  public byte[] getKey(WindowedValue<KV<K, InputT>> value) throws Exception {
-    return CoderUtils.encodeToByteArray(keyCoder, value.getValue().getKey());
-  }
-
-  @Override
-  public TypeInformation<byte[]> getProducedType() {
-    return new EncodedValueTypeInformation();
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/package-info.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/package-info.java
deleted file mode 100644
index 6fb3182..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/types/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Internal implementation of the Beam runner for Apache Flink.
- */
-package org.apache.beam.runners.flink.translation.types;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/utils/SerializedPipelineOptions.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/utils/SerializedPipelineOptions.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/utils/SerializedPipelineOptions.java
deleted file mode 100644
index 2256bb1..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/utils/SerializedPipelineOptions.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.flink.translation.utils;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.io.Serializable;
-import org.apache.beam.sdk.io.FileSystems;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.util.IOChannelUtils;
-
-/**
- * Encapsulates the PipelineOptions in serialized form to ship them to the cluster.
- */
-public class SerializedPipelineOptions implements Serializable {
-
-  private final byte[] serializedOptions;
-
-  /** Lazily initialized copy of deserialized options. */
-  private transient PipelineOptions pipelineOptions;
-
-  public SerializedPipelineOptions(PipelineOptions options) {
-    checkNotNull(options, "PipelineOptions must not be null.");
-
-    try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
-      new ObjectMapper().writeValue(baos, options);
-      this.serializedOptions = baos.toByteArray();
-    } catch (Exception e) {
-      throw new RuntimeException("Couldn't serialize PipelineOptions.", e);
-    }
-
-  }
-
-  public PipelineOptions getPipelineOptions() {
-    if (pipelineOptions == null) {
-      try {
-        pipelineOptions = new ObjectMapper().readValue(serializedOptions, PipelineOptions.class);
-
-        IOChannelUtils.registerIOFactoriesAllowOverride(pipelineOptions);
-        FileSystems.setDefaultConfigInWorkers(pipelineOptions);
-      } catch (IOException e) {
-        throw new RuntimeException("Couldn't deserialize the PipelineOptions.", e);
-      }
-    }
-
-    return pipelineOptions;
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/utils/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/utils/package-info.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/utils/package-info.java
deleted file mode 100644
index 5dedd53..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/utils/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Internal implementation of the Beam runner for Apache Flink.
- */
-package org.apache.beam.runners.flink.translation.utils;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataInputViewWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataInputViewWrapper.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataInputViewWrapper.java
deleted file mode 100644
index 82a2c4e..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataInputViewWrapper.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers;
-
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import org.apache.flink.core.memory.DataInputView;
-
-/**
- * Wrapper for {@link DataInputView}. We need this because Flink reads data using a
- * {@link org.apache.flink.core.memory.DataInputView} while
- * Dataflow {@link org.apache.beam.sdk.coders.Coder}s expect an
- * {@link java.io.InputStream}.
- */
-public class DataInputViewWrapper extends InputStream {
-
-  private DataInputView inputView;
-
-  public DataInputViewWrapper(DataInputView inputView) {
-    this.inputView = inputView;
-  }
-
-  public void setInputView(DataInputView inputView) {
-    this.inputView = inputView;
-  }
-
-  @Override
-  public int read() throws IOException {
-    try {
-      return inputView.readUnsignedByte();
-    } catch (EOFException e) {
-      // translate between DataInput and InputStream,
-      // DataInput signals EOF by exception, InputStream does it by returning -1
-      return -1;
-    }
-  }
-
-  @Override
-  public int read(byte[] b, int off, int len) throws IOException {
-    return inputView.read(b, off, len);
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataOutputViewWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataOutputViewWrapper.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataOutputViewWrapper.java
deleted file mode 100644
index f2d9db2..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataOutputViewWrapper.java
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import org.apache.flink.core.memory.DataOutputView;
-
-/**
- * Wrapper for {@link org.apache.flink.core.memory.DataOutputView}. We need this because
- * Flink writes data using a {@link org.apache.flink.core.memory.DataInputView} while
- * Dataflow {@link org.apache.beam.sdk.coders.Coder}s expect an
- * {@link java.io.OutputStream}.
- */
-public class DataOutputViewWrapper extends OutputStream {
-
-  private DataOutputView outputView;
-
-  public DataOutputViewWrapper(DataOutputView outputView) {
-    this.outputView = outputView;
-  }
-
-  public void setOutputView(DataOutputView outputView) {
-    this.outputView = outputView;
-  }
-
-  @Override
-  public void write(int b) throws IOException {
-    outputView.write(b);
-  }
-
-  @Override
-  public void write(byte[] b, int off, int len) throws IOException {
-    outputView.write(b, off, len);
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
deleted file mode 100644
index 70d97e3..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers;
-
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Lists;
-import java.io.Serializable;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.flink.api.common.accumulators.Accumulator;
-
-/**
- * Wrapper that wraps a {@link org.apache.beam.sdk.transforms.Combine.CombineFn}
- * in a Flink {@link org.apache.flink.api.common.accumulators.Accumulator} for using
- * the function as an aggregator in a {@link org.apache.beam.sdk.transforms.ParDo}
- * operation.
- */
-public class SerializableFnAggregatorWrapper<InputT, OutputT>
-    implements Aggregator<InputT, OutputT>, Accumulator<InputT, Serializable> {
-
-  private OutputT aa;
-  private Combine.CombineFn<InputT, ?, OutputT> combiner;
-
-  public SerializableFnAggregatorWrapper(Combine.CombineFn<InputT, ?, OutputT> combiner) {
-    this.combiner = combiner;
-    resetLocal();
-  }
-
-  @Override
-  @SuppressWarnings("unchecked")
-  public void add(InputT value) {
-    this.aa = combiner.apply(ImmutableList.of((InputT) aa, value));
-  }
-
-  @Override
-  public Serializable getLocalValue() {
-    return (Serializable) aa;
-  }
-
-  @Override
-  public void resetLocal() {
-    this.aa = combiner.apply(ImmutableList.<InputT>of());
-  }
-
-  @Override
-  @SuppressWarnings("unchecked")
-  public void merge(Accumulator<InputT, Serializable> other) {
-    this.aa = combiner.apply(ImmutableList.of((InputT) aa, (InputT) other.getLocalValue()));
-  }
-
-  @Override
-  public void addValue(InputT value) {
-    add(value);
-  }
-
-  @Override
-  public String getName() {
-    return "Aggregator :" + combiner.toString();
-  }
-
-  @Override
-  public Combine.CombineFn<InputT, ?, OutputT> getCombineFn() {
-    return combiner;
-  }
-
-  @Override
-  public Accumulator<InputT, Serializable> clone() {
-    try {
-      super.clone();
-    } catch (CloneNotSupportedException e) {
-      // Flink Accumulators cannot throw CloneNotSupportedException, work around that.
-      throw new RuntimeException(e);
-    }
-
-    // copy it by merging
-    OutputT resultCopy = combiner.apply(Lists.newArrayList((InputT) aa));
-    SerializableFnAggregatorWrapper<InputT, OutputT> result = new
-        SerializableFnAggregatorWrapper<>(combiner);
-
-    result.aa = resultCopy;
-    return result;
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
deleted file mode 100644
index a87472b..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers;
-
-import java.io.IOException;
-import java.util.List;
-import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.sdk.io.BoundedSource;
-import org.apache.beam.sdk.io.Source;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.flink.api.common.io.DefaultInputSplitAssigner;
-import org.apache.flink.api.common.io.InputFormat;
-import org.apache.flink.api.common.io.statistics.BaseStatistics;
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.core.io.InputSplitAssigner;
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-
-/**
- * Wrapper for executing a {@link Source} as a Flink {@link InputFormat}.
- */
-public class SourceInputFormat<T>
-    implements InputFormat<WindowedValue<T>, SourceInputSplit<T>> {
-  private static final Logger LOG = LoggerFactory.getLogger(SourceInputFormat.class);
-
-  private final BoundedSource<T> initialSource;
-
-  private transient PipelineOptions options;
-  private final SerializedPipelineOptions serializedOptions;
-
-  private transient BoundedSource.BoundedReader<T> reader;
-  private boolean inputAvailable = false;
-
-  public SourceInputFormat(BoundedSource<T> initialSource, PipelineOptions options) {
-    this.initialSource = initialSource;
-    this.serializedOptions = new SerializedPipelineOptions(options);
-  }
-
-  @Override
-  public void configure(Configuration configuration) {
-    options = serializedOptions.getPipelineOptions();
-  }
-
-  @Override
-  public void open(SourceInputSplit<T> sourceInputSplit) throws IOException {
-    reader = ((BoundedSource<T>) sourceInputSplit.getSource()).createReader(options);
-    inputAvailable = reader.start();
-  }
-
-  @Override
-  public BaseStatistics getStatistics(BaseStatistics baseStatistics) throws IOException {
-    try {
-      final long estimatedSize = initialSource.getEstimatedSizeBytes(options);
-
-      return new BaseStatistics() {
-        @Override
-        public long getTotalInputSize() {
-          return estimatedSize;
-        }
-
-        @Override
-        public long getNumberOfRecords() {
-          return BaseStatistics.NUM_RECORDS_UNKNOWN;
-        }
-
-        @Override
-        public float getAverageRecordWidth() {
-          return BaseStatistics.AVG_RECORD_BYTES_UNKNOWN;
-        }
-      };
-    } catch (Exception e) {
-      LOG.warn("Could not read Source statistics: {}", e);
-    }
-
-    return null;
-  }
-
-  @Override
-  @SuppressWarnings("unchecked")
-  public SourceInputSplit<T>[] createInputSplits(int numSplits) throws IOException {
-    try {
-      long desiredSizeBytes = initialSource.getEstimatedSizeBytes(options) / numSplits;
-      List<? extends Source<T>> shards =
-          initialSource.split(desiredSizeBytes, options);
-      int numShards = shards.size();
-      SourceInputSplit<T>[] sourceInputSplits = new SourceInputSplit[numShards];
-      for (int i = 0; i < numShards; i++) {
-        sourceInputSplits[i] = new SourceInputSplit<>(shards.get(i), i);
-      }
-      return sourceInputSplits;
-    } catch (Exception e) {
-      throw new IOException("Could not create input splits from Source.", e);
-    }
-  }
-
-  @Override
-  public InputSplitAssigner getInputSplitAssigner(final SourceInputSplit[] sourceInputSplits) {
-    return new DefaultInputSplitAssigner(sourceInputSplits);
-  }
-
-
-  @Override
-  public boolean reachedEnd() throws IOException {
-    return !inputAvailable;
-  }
-
-  @Override
-  public WindowedValue<T> nextRecord(WindowedValue<T> t) throws IOException {
-    if (inputAvailable) {
-      final T current = reader.getCurrent();
-      final Instant timestamp = reader.getCurrentTimestamp();
-      // advance reader to have a record ready next time
-      inputAvailable = reader.advance();
-      return WindowedValue.of(
-          current,
-          timestamp,
-          GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
-    }
-
-    return null;
-  }
-
-  @Override
-  public void close() throws IOException {
-    // TODO null check can be removed once FLINK-3796 is fixed
-    if (reader != null) {
-      reader.close();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputSplit.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputSplit.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputSplit.java
deleted file mode 100644
index e4a7386..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputSplit.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers;
-
-import org.apache.beam.sdk.io.Source;
-import org.apache.flink.core.io.InputSplit;
-
-/**
- * {@link org.apache.flink.core.io.InputSplit} for
- * {@link org.apache.beam.runners.flink.translation.wrappers.SourceInputFormat}. We pass
- * the sharded Source around in the input split because Sources simply split up into several
- * Sources for sharding. This is different to how Flink creates a separate InputSplit from
- * an InputFormat.
- */
-public class SourceInputSplit<T> implements InputSplit {
-
-  private Source<T> source;
-  private int splitNumber;
-
-  public SourceInputSplit() {
-  }
-
-  public SourceInputSplit(Source<T> source, int splitNumber) {
-    this.source = source;
-    this.splitNumber = splitNumber;
-  }
-
-  @Override
-  public int getSplitNumber() {
-    return splitNumber;
-  }
-
-  public Source<T> getSource() {
-    return source;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/package-info.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/package-info.java
deleted file mode 100644
index 72f7deb..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Internal implementation of the Beam runner for Apache Flink.
- */
-package org.apache.beam.runners.flink.translation.wrappers;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
deleted file mode 100644
index 8a09286..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
+++ /dev/null
@@ -1,774 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming;
-
-import static org.apache.flink.util.Preconditions.checkArgument;
-
-import com.google.common.base.Optional;
-import com.google.common.collect.Iterables;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Map;
-import javax.annotation.Nullable;
-import org.apache.beam.runners.core.AggregatorFactory;
-import org.apache.beam.runners.core.DoFnRunner;
-import org.apache.beam.runners.core.DoFnRunners;
-import org.apache.beam.runners.core.ExecutionContext;
-import org.apache.beam.runners.core.GroupAlsoByWindowViaWindowSetNewDoFn;
-import org.apache.beam.runners.core.PushbackSideInputDoFnRunner;
-import org.apache.beam.runners.core.SideInputHandler;
-import org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner;
-import org.apache.beam.runners.core.StateInternals;
-import org.apache.beam.runners.core.StateNamespace;
-import org.apache.beam.runners.core.StateNamespaces;
-import org.apache.beam.runners.core.StateNamespaces.WindowNamespace;
-import org.apache.beam.runners.core.StateTag;
-import org.apache.beam.runners.core.StateTags;
-import org.apache.beam.runners.core.StatefulDoFnRunner;
-import org.apache.beam.runners.core.TimerInternals;
-import org.apache.beam.runners.core.TimerInternals.TimerData;
-import org.apache.beam.runners.flink.translation.types.CoderTypeSerializer;
-import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.runners.flink.translation.wrappers.SerializableFnAggregatorWrapper;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkBroadcastStateInternals;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkKeyGroupStateInternals;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkSplitStateInternals;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.state.KeyGroupCheckpointedOperator;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
-import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.util.NullSideInputReader;
-import org.apache.beam.sdk.util.SideInputReader;
-import org.apache.beam.sdk.util.TimeDomain;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.util.state.BagState;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.core.memory.DataInputViewStreamWrapper;
-import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
-import org.apache.flink.runtime.state.KeyGroupStatePartitionStreamProvider;
-import org.apache.flink.runtime.state.KeyGroupsList;
-import org.apache.flink.runtime.state.KeyedStateBackend;
-import org.apache.flink.runtime.state.KeyedStateCheckpointOutputStream;
-import org.apache.flink.runtime.state.StateInitializationContext;
-import org.apache.flink.runtime.state.StateSnapshotContext;
-import org.apache.flink.streaming.api.operators.AbstractStreamOperator;
-import org.apache.flink.streaming.api.operators.ChainingStrategy;
-import org.apache.flink.streaming.api.operators.HeapInternalTimerService;
-import org.apache.flink.streaming.api.operators.InternalTimer;
-import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
-import org.apache.flink.streaming.api.operators.Output;
-import org.apache.flink.streaming.api.operators.Triggerable;
-import org.apache.flink.streaming.api.operators.TwoInputStreamOperator;
-import org.apache.flink.streaming.api.watermark.Watermark;
-import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
-import org.joda.time.Instant;
-
-/**
- * Flink operator for executing {@link DoFn DoFns}.
- *
- * @param <InputT> the input type of the {@link DoFn}
- * @param <FnOutputT> the output type of the {@link DoFn}
- * @param <OutputT> the output type of the operator, this can be different from the fn output
- *                 type when we have additional tagged outputs
- */
-public class DoFnOperator<InputT, FnOutputT, OutputT>
-    extends AbstractStreamOperator<OutputT>
-    implements OneInputStreamOperator<WindowedValue<InputT>, OutputT>,
-      TwoInputStreamOperator<WindowedValue<InputT>, RawUnionValue, OutputT>,
-    KeyGroupCheckpointedOperator, Triggerable<Object, TimerData> {
-
-  protected DoFn<InputT, FnOutputT> doFn;
-
-  protected final SerializedPipelineOptions serializedOptions;
-
-  protected final TupleTag<FnOutputT> mainOutputTag;
-  protected final List<TupleTag<?>> additionalOutputTags;
-
-  protected final Collection<PCollectionView<?>> sideInputs;
-  protected final Map<Integer, PCollectionView<?>> sideInputTagMapping;
-
-  protected final WindowingStrategy<?, ?> windowingStrategy;
-
-  protected final OutputManagerFactory<OutputT> outputManagerFactory;
-
-  protected transient DoFnRunner<InputT, FnOutputT> doFnRunner;
-  protected transient PushbackSideInputDoFnRunner<InputT, FnOutputT> pushbackDoFnRunner;
-
-  protected transient SideInputHandler sideInputHandler;
-
-  protected transient SideInputReader sideInputReader;
-
-  protected transient DoFnRunners.OutputManager outputManager;
-
-  private transient DoFnInvoker<InputT, FnOutputT> doFnInvoker;
-
-  protected transient long currentInputWatermark;
-
-  protected transient long currentOutputWatermark;
-
-  private transient StateTag<Object, BagState<WindowedValue<InputT>>> pushedBackTag;
-
-  protected transient FlinkStateInternals<?> stateInternals;
-
-  private Coder<WindowedValue<InputT>> inputCoder;
-
-  private final Coder<?> keyCoder;
-
-  private final TimerInternals.TimerDataCoder timerCoder;
-
-  protected transient HeapInternalTimerService<?, TimerInternals.TimerData> timerService;
-
-  protected transient FlinkTimerInternals timerInternals;
-
-  private transient StateInternals<?> pushbackStateInternals;
-
-  private transient Optional<Long> pushedBackWatermark;
-
-  public DoFnOperator(
-      DoFn<InputT, FnOutputT> doFn,
-      Coder<WindowedValue<InputT>> inputCoder,
-      TupleTag<FnOutputT> mainOutputTag,
-      List<TupleTag<?>> additionalOutputTags,
-      OutputManagerFactory<OutputT> outputManagerFactory,
-      WindowingStrategy<?, ?> windowingStrategy,
-      Map<Integer, PCollectionView<?>> sideInputTagMapping,
-      Collection<PCollectionView<?>> sideInputs,
-      PipelineOptions options,
-      Coder<?> keyCoder) {
-    this.doFn = doFn;
-    this.inputCoder = inputCoder;
-    this.mainOutputTag = mainOutputTag;
-    this.additionalOutputTags = additionalOutputTags;
-    this.sideInputTagMapping = sideInputTagMapping;
-    this.sideInputs = sideInputs;
-    this.serializedOptions = new SerializedPipelineOptions(options);
-    this.windowingStrategy = windowingStrategy;
-    this.outputManagerFactory = outputManagerFactory;
-
-    setChainingStrategy(ChainingStrategy.ALWAYS);
-
-    this.keyCoder = keyCoder;
-
-    this.timerCoder =
-        TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder());
-  }
-
-  private ExecutionContext.StepContext createStepContext() {
-    return new StepContext();
-  }
-
-  // allow overriding this in WindowDoFnOperator because this one dynamically creates
-  // the DoFn
-  protected DoFn<InputT, FnOutputT> getDoFn() {
-    return doFn;
-  }
-
-  @Override
-  public void open() throws Exception {
-    super.open();
-
-    currentInputWatermark = Long.MIN_VALUE;
-    currentOutputWatermark = Long.MIN_VALUE;
-
-    AggregatorFactory aggregatorFactory = new AggregatorFactory() {
-      @Override
-      public <InputT, AccumT, OutputT> Aggregator<InputT, OutputT> createAggregatorForDoFn(
-          Class<?> fnClass,
-          ExecutionContext.StepContext stepContext,
-          String aggregatorName,
-          Combine.CombineFn<InputT, AccumT, OutputT> combine) {
-
-        @SuppressWarnings("unchecked")
-        SerializableFnAggregatorWrapper<InputT, OutputT> result =
-            (SerializableFnAggregatorWrapper<InputT, OutputT>)
-                getRuntimeContext().getAccumulator(aggregatorName);
-
-        if (result == null) {
-          result = new SerializableFnAggregatorWrapper<>(combine);
-          getRuntimeContext().addAccumulator(aggregatorName, result);
-        }
-        return result;
-      }
-    };
-
-    sideInputReader = NullSideInputReader.of(sideInputs);
-
-    if (!sideInputs.isEmpty()) {
-
-      pushedBackTag = StateTags.bag("pushed-back-values", inputCoder);
-
-      FlinkBroadcastStateInternals sideInputStateInternals =
-          new FlinkBroadcastStateInternals<>(
-              getContainingTask().getIndexInSubtaskGroup(), getOperatorStateBackend());
-
-      sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
-      sideInputReader = sideInputHandler;
-
-      // maybe init by initializeState
-      if (pushbackStateInternals == null) {
-        if (keyCoder != null) {
-          pushbackStateInternals = new FlinkKeyGroupStateInternals<>(keyCoder,
-              getKeyedStateBackend());
-        } else {
-          pushbackStateInternals =
-              new FlinkSplitStateInternals<Object>(getOperatorStateBackend());
-        }
-      }
-
-      pushedBackWatermark = Optional.absent();
-
-    }
-
-    outputManager = outputManagerFactory.create(output);
-
-    // StatefulPardo or WindowDoFn
-    if (keyCoder != null) {
-      stateInternals = new FlinkStateInternals<>((KeyedStateBackend) getKeyedStateBackend(),
-          keyCoder);
-
-      timerService = (HeapInternalTimerService<?, TimerInternals.TimerData>)
-          getInternalTimerService("beam-timer", new CoderTypeSerializer<>(timerCoder), this);
-
-      timerInternals = new FlinkTimerInternals();
-
-    }
-
-    // WindowDoFnOperator need use state and timer to get DoFn.
-    // So must wait StateInternals and TimerInternals ready.
-    this.doFn = getDoFn();
-    doFnInvoker = DoFnInvokers.invokerFor(doFn);
-
-    doFnInvoker.invokeSetup();
-
-    ExecutionContext.StepContext stepContext = createStepContext();
-
-    doFnRunner = DoFnRunners.simpleRunner(
-        serializedOptions.getPipelineOptions(),
-        doFn,
-        sideInputReader,
-        outputManager,
-        mainOutputTag,
-        additionalOutputTags,
-        stepContext,
-        aggregatorFactory,
-        windowingStrategy);
-
-    if (doFn instanceof GroupAlsoByWindowViaWindowSetNewDoFn) {
-      // When the doFn is this, we know it came from WindowDoFnOperator and
-      //   InputT = KeyedWorkItem<K, V>
-      //   OutputT = KV<K, V>
-      //
-      // for some K, V
-
-
-      doFnRunner = DoFnRunners.lateDataDroppingRunner(
-          (DoFnRunner) doFnRunner,
-          stepContext,
-          windowingStrategy,
-          ((GroupAlsoByWindowViaWindowSetNewDoFn) doFn).getDroppedDueToLatenessAggregator());
-    } else if (keyCoder != null) {
-      // It is a stateful DoFn
-
-      StatefulDoFnRunner.CleanupTimer cleanupTimer =
-          new StatefulDoFnRunner.TimeInternalsCleanupTimer(
-              stepContext.timerInternals(), windowingStrategy);
-
-      // we don't know the window type
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
-
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      StatefulDoFnRunner.StateCleaner<?> stateCleaner =
-          new StatefulDoFnRunner.StateInternalsStateCleaner<>(
-              doFn, stepContext.stateInternals(), windowCoder);
-
-      doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(
-          doFn,
-          doFnRunner,
-          stepContext,
-          aggregatorFactory,
-          windowingStrategy,
-          cleanupTimer,
-          stateCleaner);
-    }
-
-    pushbackDoFnRunner =
-        SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
-  }
-
-  @Override
-  public void close() throws Exception {
-    super.close();
-    doFnInvoker.invokeTeardown();
-  }
-
-  protected final long getPushbackWatermarkHold() {
-    // if we don't have side inputs we never hold the watermark
-    if (sideInputs.isEmpty()) {
-      return BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis();
-    }
-
-    try {
-      checkInitPushedBackWatermark();
-      return pushedBackWatermark.get();
-    } catch (Exception e) {
-      throw new RuntimeException("Error retrieving pushed back watermark state.", e);
-    }
-  }
-
-  private void checkInitPushedBackWatermark() {
-    // init and restore from pushedBack state.
-    // Not done in initializeState, because OperatorState is not ready.
-    if (!pushedBackWatermark.isPresent()) {
-
-      BagState<WindowedValue<InputT>> pushedBack =
-          pushbackStateInternals.state(StateNamespaces.global(), pushedBackTag);
-
-      long min = BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis();
-      for (WindowedValue<InputT> value : pushedBack.read()) {
-        min = Math.min(min, value.getTimestamp().getMillis());
-      }
-      setPushedBackWatermark(min);
-    }
-  }
-
-  @Override
-  public final void processElement(
-      StreamRecord<WindowedValue<InputT>> streamRecord) throws Exception {
-    doFnRunner.startBundle();
-    doFnRunner.processElement(streamRecord.getValue());
-    doFnRunner.finishBundle();
-  }
-
-  private void setPushedBackWatermark(long watermark) {
-    pushedBackWatermark = Optional.fromNullable(watermark);
-  }
-
-  @Override
-  public final void processElement1(
-      StreamRecord<WindowedValue<InputT>> streamRecord) throws Exception {
-    pushbackDoFnRunner.startBundle();
-    Iterable<WindowedValue<InputT>> justPushedBack =
-        pushbackDoFnRunner.processElementInReadyWindows(streamRecord.getValue());
-
-    BagState<WindowedValue<InputT>> pushedBack =
-        pushbackStateInternals.state(StateNamespaces.global(), pushedBackTag);
-
-    checkInitPushedBackWatermark();
-
-    long min = pushedBackWatermark.get();
-    for (WindowedValue<InputT> pushedBackValue : justPushedBack) {
-      min = Math.min(min, pushedBackValue.getTimestamp().getMillis());
-      pushedBack.add(pushedBackValue);
-    }
-    setPushedBackWatermark(min);
-    pushbackDoFnRunner.finishBundle();
-  }
-
-  @Override
-  public final void processElement2(
-      StreamRecord<RawUnionValue> streamRecord) throws Exception {
-    pushbackDoFnRunner.startBundle();
-
-    @SuppressWarnings("unchecked")
-    WindowedValue<Iterable<?>> value =
-        (WindowedValue<Iterable<?>>) streamRecord.getValue().getValue();
-
-    PCollectionView<?> sideInput = sideInputTagMapping.get(streamRecord.getValue().getUnionTag());
-    sideInputHandler.addSideInputValue(sideInput, value);
-
-    BagState<WindowedValue<InputT>> pushedBack =
-        pushbackStateInternals.state(StateNamespaces.global(), pushedBackTag);
-
-    List<WindowedValue<InputT>> newPushedBack = new ArrayList<>();
-
-    Iterable<WindowedValue<InputT>> pushedBackContents = pushedBack.read();
-    if (pushedBackContents != null) {
-      for (WindowedValue<InputT> elem : pushedBackContents) {
-
-        // we need to set the correct key in case the operator is
-        // a (keyed) window operator
-        setKeyContextElement1(new StreamRecord<>(elem));
-
-        Iterable<WindowedValue<InputT>> justPushedBack =
-            pushbackDoFnRunner.processElementInReadyWindows(elem);
-        Iterables.addAll(newPushedBack, justPushedBack);
-      }
-    }
-
-    pushedBack.clear();
-    long min = BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis();
-    for (WindowedValue<InputT> pushedBackValue : newPushedBack) {
-      min = Math.min(min, pushedBackValue.getTimestamp().getMillis());
-      pushedBack.add(pushedBackValue);
-    }
-    setPushedBackWatermark(min);
-
-    pushbackDoFnRunner.finishBundle();
-
-    // maybe output a new watermark
-    processWatermark1(new Watermark(currentInputWatermark));
-  }
-
-  @Override
-  public void processWatermark(Watermark mark) throws Exception {
-    processWatermark1(mark);
-  }
-
-  @Override
-  public void processWatermark1(Watermark mark) throws Exception {
-    if (keyCoder == null) {
-      this.currentInputWatermark = mark.getTimestamp();
-      long potentialOutputWatermark =
-          Math.min(getPushbackWatermarkHold(), currentInputWatermark);
-      if (potentialOutputWatermark > currentOutputWatermark) {
-        currentOutputWatermark = potentialOutputWatermark;
-        output.emitWatermark(new Watermark(currentOutputWatermark));
-      }
-    } else {
-      // fireTimers, so we need startBundle.
-      pushbackDoFnRunner.startBundle();
-
-      this.currentInputWatermark = mark.getTimestamp();
-
-      // hold back by the pushed back values waiting for side inputs
-      long actualInputWatermark = Math.min(getPushbackWatermarkHold(), mark.getTimestamp());
-
-      timerService.advanceWatermark(actualInputWatermark);
-
-      Instant watermarkHold = stateInternals.watermarkHold();
-
-      long combinedWatermarkHold = Math.min(watermarkHold.getMillis(), getPushbackWatermarkHold());
-
-      long potentialOutputWatermark = Math.min(currentInputWatermark, combinedWatermarkHold);
-
-      if (potentialOutputWatermark > currentOutputWatermark) {
-        currentOutputWatermark = potentialOutputWatermark;
-        output.emitWatermark(new Watermark(currentOutputWatermark));
-      }
-      pushbackDoFnRunner.finishBundle();
-    }
-  }
-
-  @Override
-  public void processWatermark2(Watermark mark) throws Exception {
-    // ignore watermarks from the side-input input
-  }
-
-  @Override
-  public void snapshotState(StateSnapshotContext context) throws Exception {
-    // copy from AbstractStreamOperator
-    if (getKeyedStateBackend() != null) {
-      KeyedStateCheckpointOutputStream out;
-
-      try {
-        out = context.getRawKeyedOperatorStateOutput();
-      } catch (Exception exception) {
-        throw new Exception("Could not open raw keyed operator state stream for "
-            + getOperatorName() + '.', exception);
-      }
-
-      try {
-        KeyGroupsList allKeyGroups = out.getKeyGroupList();
-        for (int keyGroupIdx : allKeyGroups) {
-          out.startNewKeyGroup(keyGroupIdx);
-
-          DataOutputViewStreamWrapper dov = new DataOutputViewStreamWrapper(out);
-
-          // if (this instanceof KeyGroupCheckpointedOperator)
-          snapshotKeyGroupState(keyGroupIdx, dov);
-
-          // We can't get all timerServices, so we just snapshot our timerService
-          // Maybe this is a normal DoFn that has no timerService
-          if (keyCoder != null) {
-            timerService.snapshotTimersForKeyGroup(dov, keyGroupIdx);
-          }
-
-        }
-      } catch (Exception exception) {
-        throw new Exception("Could not write timer service of " + getOperatorName()
-            + " to checkpoint state stream.", exception);
-      } finally {
-        try {
-          out.close();
-        } catch (Exception closeException) {
-          LOG.warn("Could not close raw keyed operator state stream for {}. This "
-              + "might have prevented deleting some state data.", getOperatorName(),
-              closeException);
-        }
-      }
-    }
-  }
-
-  @Override
-  public void snapshotKeyGroupState(int keyGroupIndex, DataOutputStream out) throws Exception {
-    if (!sideInputs.isEmpty() && keyCoder != null) {
-      ((FlinkKeyGroupStateInternals) pushbackStateInternals).snapshotKeyGroupState(
-          keyGroupIndex, out);
-    }
-  }
-
-  @Override
-  public void initializeState(StateInitializationContext context) throws Exception {
-    if (getKeyedStateBackend() != null) {
-      int totalKeyGroups = getKeyedStateBackend().getNumberOfKeyGroups();
-      KeyGroupsList localKeyGroupRange = getKeyedStateBackend().getKeyGroupRange();
-
-      for (KeyGroupStatePartitionStreamProvider streamProvider : context.getRawKeyedStateInputs()) {
-        DataInputViewStreamWrapper div = new DataInputViewStreamWrapper(streamProvider.getStream());
-
-        int keyGroupIdx = streamProvider.getKeyGroupId();
-        checkArgument(localKeyGroupRange.contains(keyGroupIdx),
-            "Key Group " + keyGroupIdx + " does not belong to the local range.");
-
-        // if (this instanceof KeyGroupRestoringOperator)
-        restoreKeyGroupState(keyGroupIdx, div);
-
-        // We just initialize our timerService
-        if (keyCoder != null) {
-          if (timerService == null) {
-            timerService = new HeapInternalTimerService<>(
-                totalKeyGroups,
-                localKeyGroupRange,
-                this,
-                getRuntimeContext().getProcessingTimeService());
-          }
-          timerService.restoreTimersForKeyGroup(div, keyGroupIdx, getUserCodeClassloader());
-        }
-      }
-    }
-  }
-
-  @Override
-  public void restoreKeyGroupState(int keyGroupIndex, DataInputStream in) throws Exception {
-    if (!sideInputs.isEmpty() && keyCoder != null) {
-      if (pushbackStateInternals == null) {
-        pushbackStateInternals = new FlinkKeyGroupStateInternals<>(keyCoder,
-            getKeyedStateBackend());
-      }
-      ((FlinkKeyGroupStateInternals) pushbackStateInternals)
-          .restoreKeyGroupState(keyGroupIndex, in, getUserCodeClassloader());
-    }
-  }
-
-  @Override
-  public void onEventTime(InternalTimer<Object, TimerData> timer) throws Exception {
-    fireTimer(timer);
-  }
-
-  @Override
-  public void onProcessingTime(InternalTimer<Object, TimerData> timer) throws Exception {
-    fireTimer(timer);
-  }
-
-  // allow overriding this in WindowDoFnOperator
-  public void fireTimer(InternalTimer<?, TimerData> timer) {
-    TimerInternals.TimerData timerData = timer.getNamespace();
-    StateNamespace namespace = timerData.getNamespace();
-    // This is a user timer, so namespace must be WindowNamespace
-    checkArgument(namespace instanceof WindowNamespace);
-    BoundedWindow window = ((WindowNamespace) namespace).getWindow();
-    pushbackDoFnRunner.onTimer(timerData.getTimerId(), window,
-        timerData.getTimestamp(), timerData.getDomain());
-  }
-
-  /**
-   * Factory for creating an {@link DoFnRunners.OutputManager} from
-   * a Flink {@link Output}.
-   */
-  interface OutputManagerFactory<OutputT> extends Serializable {
-    DoFnRunners.OutputManager create(Output<StreamRecord<OutputT>> output);
-  }
-
-  /**
-   * Default implementation of {@link OutputManagerFactory} that creates an
-   * {@link DoFnRunners.OutputManager} that only writes to
-   * a single logical output.
-   */
-  public static class DefaultOutputManagerFactory<OutputT>
-      implements OutputManagerFactory<OutputT> {
-    @Override
-    public DoFnRunners.OutputManager create(final Output<StreamRecord<OutputT>> output) {
-      return new DoFnRunners.OutputManager() {
-        @Override
-        public <T> void output(TupleTag<T> tag, WindowedValue<T> value) {
-          // with tagged outputs we can't get around this because we don't
-          // know our own output type...
-          @SuppressWarnings("unchecked")
-          OutputT castValue = (OutputT) value;
-          output.collect(new StreamRecord<>(castValue));
-        }
-      };
-    }
-  }
-
-  /**
-   * Implementation of {@link OutputManagerFactory} that creates an
-   * {@link DoFnRunners.OutputManager} that can write to multiple logical
-   * outputs by unioning them in a {@link RawUnionValue}.
-   */
-  public static class MultiOutputOutputManagerFactory
-      implements OutputManagerFactory<RawUnionValue> {
-
-    Map<TupleTag<?>, Integer> mapping;
-
-    public MultiOutputOutputManagerFactory(Map<TupleTag<?>, Integer> mapping) {
-      this.mapping = mapping;
-    }
-
-    @Override
-    public DoFnRunners.OutputManager create(final Output<StreamRecord<RawUnionValue>> output) {
-      return new DoFnRunners.OutputManager() {
-        @Override
-        public <T> void output(TupleTag<T> tag, WindowedValue<T> value) {
-          int intTag = mapping.get(tag);
-          output.collect(new StreamRecord<>(new RawUnionValue(intTag, value)));
-        }
-      };
-    }
-  }
-
-  /**
-   * {@link StepContext} for running {@link DoFn DoFns} on Flink. This does not allow
-   * accessing state or timer internals.
-   */
-  protected class StepContext implements ExecutionContext.StepContext {
-
-    @Override
-    public String getStepName() {
-      return null;
-    }
-
-    @Override
-    public String getTransformName() {
-      return null;
-    }
-
-    @Override
-    public void noteOutput(WindowedValue<?> output) {}
-
-    @Override
-    public void noteOutput(TupleTag<?> tag, WindowedValue<?> output) {}
-
-    @Override
-    public <T, W extends BoundedWindow> void writePCollectionViewData(
-        TupleTag<?> tag,
-        Iterable<WindowedValue<T>> data,
-        Coder<Iterable<WindowedValue<T>>> dataCoder,
-        W window,
-        Coder<W> windowCoder) throws IOException {
-      throw new UnsupportedOperationException("Writing side-input data is not supported.");
-    }
-
-    @Override
-    public StateInternals<?> stateInternals() {
-      return stateInternals;
-    }
-
-    @Override
-    public TimerInternals timerInternals() {
-      return timerInternals;
-    }
-  }
-
-  private class FlinkTimerInternals implements TimerInternals {
-
-    @Override
-    public void setTimer(
-        StateNamespace namespace, String timerId, Instant target, TimeDomain timeDomain) {
-      setTimer(TimerData.of(timerId, namespace, target, timeDomain));
-    }
-
-    @Deprecated
-    @Override
-    public void setTimer(TimerData timerKey) {
-      long time = timerKey.getTimestamp().getMillis();
-      if (timerKey.getDomain().equals(TimeDomain.EVENT_TIME)) {
-        timerService.registerEventTimeTimer(timerKey, time);
-      } else if (timerKey.getDomain().equals(TimeDomain.PROCESSING_TIME)) {
-        timerService.registerProcessingTimeTimer(timerKey, time);
-      } else {
-        throw new UnsupportedOperationException(
-            "Unsupported time domain: " + timerKey.getDomain());
-      }
-    }
-
-    @Deprecated
-    @Override
-    public void deleteTimer(StateNamespace namespace, String timerId) {
-      throw new UnsupportedOperationException(
-          "Canceling of a timer by ID is not yet supported.");
-    }
-
-    @Override
-    public void deleteTimer(StateNamespace namespace, String timerId, TimeDomain timeDomain) {
-      throw new UnsupportedOperationException(
-          "Canceling of a timer by ID is not yet supported.");
-    }
-
-    @Deprecated
-    @Override
-    public void deleteTimer(TimerData timerKey) {
-      long time = timerKey.getTimestamp().getMillis();
-      if (timerKey.getDomain().equals(TimeDomain.EVENT_TIME)) {
-        timerService.deleteEventTimeTimer(timerKey, time);
-      } else if (timerKey.getDomain().equals(TimeDomain.PROCESSING_TIME)) {
-        timerService.deleteProcessingTimeTimer(timerKey, time);
-      } else {
-        throw new UnsupportedOperationException(
-            "Unsupported time domain: " + timerKey.getDomain());
-      }
-    }
-
-    @Override
-    public Instant currentProcessingTime() {
-      return new Instant(timerService.currentProcessingTime());
-    }
-
-    @Nullable
-    @Override
-    public Instant currentSynchronizedProcessingTime() {
-      return new Instant(timerService.currentProcessingTime());
-    }
-
-    @Override
-    public Instant currentInputWatermarkTime() {
-      return new Instant(Math.min(currentInputWatermark, getPushbackWatermarkHold()));
-    }
-
-    @Nullable
-    @Override
-    public Instant currentOutputWatermarkTime() {
-      return new Instant(currentOutputWatermark);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/KvToByteBufferKeySelector.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/KvToByteBufferKeySelector.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/KvToByteBufferKeySelector.java
deleted file mode 100644
index dce2e68..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/KvToByteBufferKeySelector.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming;
-
-import java.nio.ByteBuffer;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.KV;
-import org.apache.flink.api.common.typeinfo.TypeInformation;
-import org.apache.flink.api.java.functions.KeySelector;
-import org.apache.flink.api.java.typeutils.GenericTypeInfo;
-import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
-
-/**
- * {@link KeySelector} that retrieves a key from a {@link KV}. This will return
- * the key as encoded by the provided {@link Coder} in a {@link ByteBuffer}. This ensures
- * that all key comparisons/hashing happen on the encoded form.
- */
-public class KvToByteBufferKeySelector<K, V>
-    implements KeySelector<WindowedValue<KV<K, V>>, ByteBuffer>,
-    ResultTypeQueryable<ByteBuffer> {
-
-  private final Coder<K> keyCoder;
-
-  public KvToByteBufferKeySelector(Coder<K> keyCoder) {
-    this.keyCoder = keyCoder;
-  }
-
-  @Override
-  public ByteBuffer getKey(WindowedValue<KV<K, V>> value) throws Exception {
-    K key = value.getValue().getKey();
-    byte[] keyBytes = CoderUtils.encodeToByteArray(keyCoder, key);
-    return ByteBuffer.wrap(keyBytes);
-  }
-
-  @Override
-  public TypeInformation<ByteBuffer> getProducedType() {
-    return new GenericTypeInfo<>(ByteBuffer.class);
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItem.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItem.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItem.java
deleted file mode 100644
index e843660..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItem.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming;
-
-import java.util.Collections;
-import org.apache.beam.runners.core.KeyedWorkItem;
-import org.apache.beam.runners.core.TimerInternals;
-import org.apache.beam.sdk.util.WindowedValue;
-
-/**
- * Singleton keyed word item.
- */
-public class SingletonKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
-
-  final K key;
-  final WindowedValue<ElemT> value;
-
-  public SingletonKeyedWorkItem(K key, WindowedValue<ElemT> value) {
-    this.key = key;
-    this.value = value;
-  }
-
-  @Override
-  public K key() {
-    return key;
-  }
-
-  public WindowedValue<ElemT> value() {
-    return value;
-  }
-
-  @Override
-  public Iterable<TimerInternals.TimerData> timersIterable() {
-    return Collections.EMPTY_LIST;
-  }
-
-  @Override
-  public Iterable<WindowedValue<ElemT>> elementsIterable() {
-    return Collections.singletonList(value);
-  }
-}

[03/50] [abbrv] beam git commit: Minor cleanups in ParDoEvaluator

Posted by dh...@apache.org.

Minor cleanups in ParDoEvaluator


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1cc16b0d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1cc16b0d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1cc16b0d

Branch: refs/heads/DSL_SQL
Commit: 1cc16b0d6cea7b01b01427758eaf427cc29635b6
Parents: 3fd8890
Author: Eugene Kirpichov <ki...@google.com>
Authored: Mon Apr 17 12:25:02 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 18:02:06 2017 -0700

----------------------------------------------------------------------
 ...oFnLifecycleManagerRemovingTransformEvaluator.java |  6 +++---
 .../apache/beam/runners/direct/ParDoEvaluator.java    | 14 +++++---------
 .../beam/runners/direct/ParDoEvaluatorFactory.java    |  2 +-
 .../SplittableProcessElementsEvaluatorFactory.java    |  2 +-
 ...ifecycleManagerRemovingTransformEvaluatorTest.java |  8 ++++----
 .../beam/runners/direct/ParDoEvaluatorTest.java       |  4 ++--
 6 files changed, 16 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/1cc16b0d/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DoFnLifecycleManagerRemovingTransformEvaluator.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DoFnLifecycleManagerRemovingTransformEvaluator.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DoFnLifecycleManagerRemovingTransformEvaluator.java
index 9bcd569..e537962 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DoFnLifecycleManagerRemovingTransformEvaluator.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DoFnLifecycleManagerRemovingTransformEvaluator.java
@@ -31,16 +31,16 @@ import org.slf4j.LoggerFactory;
 class DoFnLifecycleManagerRemovingTransformEvaluator<InputT> implements TransformEvaluator<InputT> {
   private static final Logger LOG =
       LoggerFactory.getLogger(DoFnLifecycleManagerRemovingTransformEvaluator.class);
-  private final ParDoEvaluator<InputT, ?> underlying;
+  private final ParDoEvaluator<InputT> underlying;
   private final DoFnLifecycleManager lifecycleManager;
 
   public static <InputT> DoFnLifecycleManagerRemovingTransformEvaluator<InputT> wrapping(
-      ParDoEvaluator<InputT, ?> underlying, DoFnLifecycleManager lifecycleManager) {
+      ParDoEvaluator<InputT> underlying, DoFnLifecycleManager lifecycleManager) {
     return new DoFnLifecycleManagerRemovingTransformEvaluator<>(underlying, lifecycleManager);
   }
 
   private DoFnLifecycleManagerRemovingTransformEvaluator(
-      ParDoEvaluator<InputT, ?> underlying, DoFnLifecycleManager lifecycleManager) {
+      ParDoEvaluator<InputT> underlying, DoFnLifecycleManager lifecycleManager) {
     this.underlying = underlying;
     this.lifecycleManager = lifecycleManager;
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/1cc16b0d/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
index 49d0723..131716f 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
@@ -40,9 +40,9 @@ import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
 
-class ParDoEvaluator<InputT, OutputT> implements TransformEvaluator<InputT> {
+class ParDoEvaluator<InputT> implements TransformEvaluator<InputT> {
 
-  public static <InputT, OutputT> ParDoEvaluator<InputT, OutputT> create(
+  public static <InputT, OutputT> ParDoEvaluator<InputT> create(
       EvaluationContext evaluationContext,
       DirectStepContext stepContext,
       AppliedPTransform<?, ?, ?> application,
@@ -93,13 +93,11 @@ class ParDoEvaluator<InputT, OutputT> implements TransformEvaluator<InputT> {
       throw UserCodeException.wrap(e);
     }
 
-    return new ParDoEvaluator<>(
-        evaluationContext, runner, application, aggregatorChanges, outputManager, stepContext);
+    return new ParDoEvaluator<>(runner, application, aggregatorChanges, outputManager, stepContext);
   }
 
   ////////////////////////////////////////////////////////////////////////////////////////////////
 
-  private final EvaluationContext evaluationContext;
   private final PushbackSideInputDoFnRunner<InputT, ?> fnRunner;
   private final AppliedPTransform<?, ?, ?> transform;
   private final AggregatorContainer.Mutator aggregatorChanges;
@@ -109,13 +107,11 @@ class ParDoEvaluator<InputT, OutputT> implements TransformEvaluator<InputT> {
   private final ImmutableList.Builder<WindowedValue<InputT>> unprocessedElements;
 
   private ParDoEvaluator(
-      EvaluationContext evaluationContext,
       PushbackSideInputDoFnRunner<InputT, ?> fnRunner,
       AppliedPTransform<?, ?, ?> transform,
       AggregatorContainer.Mutator aggregatorChanges,
       BundleOutputManager outputManager,
       DirectStepContext stepContext) {
-    this.evaluationContext = evaluationContext;
     this.fnRunner = fnRunner;
     this.transform = transform;
     this.outputManager = outputManager;
@@ -153,11 +149,11 @@ class ParDoEvaluator<InputT, OutputT> implements TransformEvaluator<InputT> {
     } catch (Exception e) {
       throw UserCodeException.wrap(e);
     }
-    StepTransformResult.Builder resultBuilder;
+    StepTransformResult.Builder<InputT> resultBuilder;
     CopyOnAccessInMemoryStateInternals<?> state = stepContext.commitState();
     if (state != null) {
       resultBuilder =
-          StepTransformResult.withHold(transform, state.getEarliestWatermarkHold())
+          StepTransformResult.<InputT>withHold(transform, state.getEarliestWatermarkHold())
               .withState(state);
     } else {
       resultBuilder = StepTransformResult.withoutHold(transform);

http://git-wip-us.apache.org/repos/asf/beam/blob/1cc16b0d/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluatorFactory.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluatorFactory.java
index 0372295..93f204a 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluatorFactory.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluatorFactory.java
@@ -126,7 +126,7 @@ final class ParDoEvaluatorFactory<InputT, OutputT> implements TransformEvaluator
         fnManager);
   }
 
-  ParDoEvaluator<InputT, OutputT> createParDoEvaluator(
+  ParDoEvaluator<InputT> createParDoEvaluator(
       AppliedPTransform<PCollection<InputT>, PCollectionTuple, ?> application,
       StructuralKey<?> key,
       List<PCollectionView<?>> sideInputs,

http://git-wip-us.apache.org/repos/asf/beam/blob/1cc16b0d/runners/direct-java/src/main/java/org/apache/beam/runners/direct/SplittableProcessElementsEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/SplittableProcessElementsEvaluatorFactory.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/SplittableProcessElementsEvaluatorFactory.java
index 64cef35..00b16dd 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/SplittableProcessElementsEvaluatorFactory.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/SplittableProcessElementsEvaluatorFactory.java
@@ -98,7 +98,7 @@ class SplittableProcessElementsEvaluatorFactory<
             .getExecutionContext(application, inputBundle.getKey())
             .getOrCreateStepContext(stepName, stepName);
 
-    ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>
+    ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>
         parDoEvaluator =
             delegateFactory.createParDoEvaluator(
                 application,

http://git-wip-us.apache.org/repos/asf/beam/blob/1cc16b0d/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DoFnLifecycleManagerRemovingTransformEvaluatorTest.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DoFnLifecycleManagerRemovingTransformEvaluatorTest.java b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DoFnLifecycleManagerRemovingTransformEvaluatorTest.java
index d046ce5..1ac4d6d 100644
--- a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DoFnLifecycleManagerRemovingTransformEvaluatorTest.java
+++ b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/DoFnLifecycleManagerRemovingTransformEvaluatorTest.java
@@ -53,7 +53,7 @@ public class DoFnLifecycleManagerRemovingTransformEvaluatorTest {
 
   @Test
   public void delegatesToUnderlying() throws Exception {
-    ParDoEvaluator<Object, Object> underlying = mock(ParDoEvaluator.class);
+    ParDoEvaluator<Object> underlying = mock(ParDoEvaluator.class);
     DoFn<?, ?> original = lifecycleManager.get();
     TransformEvaluator<Object> evaluator =
         DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(underlying, lifecycleManager);
@@ -72,7 +72,7 @@ public class DoFnLifecycleManagerRemovingTransformEvaluatorTest {
 
   @Test
   public void removesOnExceptionInProcessElement() throws Exception {
-    ParDoEvaluator<Object, Object> underlying = mock(ParDoEvaluator.class);
+    ParDoEvaluator<Object> underlying = mock(ParDoEvaluator.class);
     doThrow(Exception.class).when(underlying).processElement(any(WindowedValue.class));
 
     DoFn<?, ?> original = lifecycleManager.get();
@@ -91,7 +91,7 @@ public class DoFnLifecycleManagerRemovingTransformEvaluatorTest {
 
   @Test
   public void removesOnExceptionInOnTimer() throws Exception {
-    ParDoEvaluator<Object, Object> underlying = mock(ParDoEvaluator.class);
+    ParDoEvaluator<Object> underlying = mock(ParDoEvaluator.class);
     doThrow(Exception.class)
         .when(underlying)
         .onTimer(any(TimerData.class), any(BoundedWindow.class));
@@ -114,7 +114,7 @@ public class DoFnLifecycleManagerRemovingTransformEvaluatorTest {
 
   @Test
   public void removesOnExceptionInFinishBundle() throws Exception {
-    ParDoEvaluator<Object, Object> underlying = mock(ParDoEvaluator.class);
+    ParDoEvaluator<Object> underlying = mock(ParDoEvaluator.class);
     doThrow(Exception.class).when(underlying).finishBundle();
 
     DoFn<?, ?> original = lifecycleManager.get();

http://git-wip-us.apache.org/repos/asf/beam/blob/1cc16b0d/runners/direct-java/src/test/java/org/apache/beam/runners/direct/ParDoEvaluatorTest.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/ParDoEvaluatorTest.java b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/ParDoEvaluatorTest.java
index 65a1248..2be0f9d 100644
--- a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/ParDoEvaluatorTest.java
+++ b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/ParDoEvaluatorTest.java
@@ -98,7 +98,7 @@ public class ParDoEvaluatorTest {
     UncommittedBundle<Integer> outputBundle = bundleFactory.createBundle(output);
     when(evaluationContext.createBundle(output)).thenReturn(outputBundle);
 
-    ParDoEvaluator<Integer, Integer> evaluator =
+    ParDoEvaluator<Integer> evaluator =
         createEvaluator(singletonView, fn, output);
 
     IntervalWindow nonGlobalWindow = new IntervalWindow(new Instant(0), new Instant(10_000L));
@@ -130,7 +130,7 @@ public class ParDoEvaluatorTest {
             WindowedValue.timestampedValueInGlobalWindow(6, new Instant(2468L))));
   }
 
-  private ParDoEvaluator<Integer, Integer> createEvaluator(
+  private ParDoEvaluator<Integer> createEvaluator(
       PCollectionView<Integer> singletonView,
       RecorderFn fn,
       PCollection<Integer> output) {

[28/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueTypeInformation.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueTypeInformation.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueTypeInformation.java
new file mode 100644
index 0000000..e24bf31
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueTypeInformation.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.types;
+
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.typeinfo.AtomicType;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.common.typeutils.TypeComparator;
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+
+/**
+ * Flink {@link TypeInformation} for Beam values that have been encoded to byte data
+ * by a {@link Coder}.
+ */
+public class EncodedValueTypeInformation
+    extends TypeInformation<byte[]>
+    implements AtomicType<byte[]> {
+
+  private static final long serialVersionUID = 1L;
+
+  @Override
+  public boolean isBasicType() {
+    return false;
+  }
+
+  @Override
+  public boolean isTupleType() {
+    return false;
+  }
+
+  @Override
+  public int getArity() {
+    return 0;
+  }
+
+  @Override
+  public int getTotalFields() {
+    return 0;
+  }
+
+  @Override
+  public Class<byte[]> getTypeClass() {
+    return byte[].class;
+  }
+
+  @Override
+  public boolean isKeyType() {
+    return true;
+  }
+
+  @Override
+  public TypeSerializer<byte[]> createSerializer(ExecutionConfig executionConfig) {
+    return new EncodedValueSerializer();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    return other instanceof EncodedValueTypeInformation;
+  }
+
+  @Override
+  public int hashCode() {
+    return this.getClass().hashCode();
+  }
+
+  @Override
+  public boolean canEqual(Object obj) {
+    return obj instanceof EncodedValueTypeInformation;
+  }
+
+  @Override
+  public String toString() {
+    return "EncodedValueTypeInformation";
+  }
+
+  @Override
+  public TypeComparator<byte[]> createComparator(
+      boolean sortOrderAscending,
+      ExecutionConfig executionConfig) {
+    return new EncodedValueComparator(sortOrderAscending);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/InspectableByteArrayOutputStream.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/InspectableByteArrayOutputStream.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/InspectableByteArrayOutputStream.java
new file mode 100644
index 0000000..36b5ba3
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/InspectableByteArrayOutputStream.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.types;
+
+import java.io.ByteArrayOutputStream;
+
+/**
+ * Version of {@link java.io.ByteArrayOutputStream} that allows to retrieve the internal
+ * byte[] buffer without incurring an array copy.
+ */
+public class InspectableByteArrayOutputStream extends ByteArrayOutputStream {
+
+  /**
+   * Get the underlying byte array.
+   */
+  public byte[] getBuffer() {
+    return buf;
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/KvKeySelector.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/KvKeySelector.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/KvKeySelector.java
new file mode 100644
index 0000000..9df6836
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/KvKeySelector.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.types;
+
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.KV;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.java.functions.KeySelector;
+import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
+
+/**
+ * {@link KeySelector} that extracts the key from a {@link KV} and returns
+ * it in encoded form as a {@code byte} array.
+ */
+public class KvKeySelector<InputT, K>
+    implements KeySelector<WindowedValue<KV<K, InputT>>, byte[]>, ResultTypeQueryable<byte[]> {
+
+  private final Coder<K> keyCoder;
+
+  public KvKeySelector(Coder<K> keyCoder) {
+    this.keyCoder = keyCoder;
+  }
+
+  @Override
+  public byte[] getKey(WindowedValue<KV<K, InputT>> value) throws Exception {
+    return CoderUtils.encodeToByteArray(keyCoder, value.getValue().getKey());
+  }
+
+  @Override
+  public TypeInformation<byte[]> getProducedType() {
+    return new EncodedValueTypeInformation();
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/package-info.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/package-info.java
new file mode 100644
index 0000000..6fb3182
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internal implementation of the Beam runner for Apache Flink.
+ */
+package org.apache.beam.runners.flink.translation.types;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/SerializedPipelineOptions.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/SerializedPipelineOptions.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/SerializedPipelineOptions.java
new file mode 100644
index 0000000..2256bb1
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/SerializedPipelineOptions.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.flink.translation.utils;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import org.apache.beam.sdk.io.FileSystems;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.IOChannelUtils;
+
+/**
+ * Encapsulates the PipelineOptions in serialized form to ship them to the cluster.
+ */
+public class SerializedPipelineOptions implements Serializable {
+
+  private final byte[] serializedOptions;
+
+  /** Lazily initialized copy of deserialized options. */
+  private transient PipelineOptions pipelineOptions;
+
+  public SerializedPipelineOptions(PipelineOptions options) {
+    checkNotNull(options, "PipelineOptions must not be null.");
+
+    try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) {
+      new ObjectMapper().writeValue(baos, options);
+      this.serializedOptions = baos.toByteArray();
+    } catch (Exception e) {
+      throw new RuntimeException("Couldn't serialize PipelineOptions.", e);
+    }
+
+  }
+
+  public PipelineOptions getPipelineOptions() {
+    if (pipelineOptions == null) {
+      try {
+        pipelineOptions = new ObjectMapper().readValue(serializedOptions, PipelineOptions.class);
+
+        IOChannelUtils.registerIOFactoriesAllowOverride(pipelineOptions);
+        FileSystems.setDefaultConfigInWorkers(pipelineOptions);
+      } catch (IOException e) {
+        throw new RuntimeException("Couldn't deserialize the PipelineOptions.", e);
+      }
+    }
+
+    return pipelineOptions;
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/package-info.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/package-info.java
new file mode 100644
index 0000000..5dedd53
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/utils/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internal implementation of the Beam runner for Apache Flink.
+ */
+package org.apache.beam.runners.flink.translation.utils;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataInputViewWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataInputViewWrapper.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataInputViewWrapper.java
new file mode 100644
index 0000000..82a2c4e
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataInputViewWrapper.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import org.apache.flink.core.memory.DataInputView;
+
+/**
+ * Wrapper for {@link DataInputView}. We need this because Flink reads data using a
+ * {@link org.apache.flink.core.memory.DataInputView} while
+ * Dataflow {@link org.apache.beam.sdk.coders.Coder}s expect an
+ * {@link java.io.InputStream}.
+ */
+public class DataInputViewWrapper extends InputStream {
+
+  private DataInputView inputView;
+
+  public DataInputViewWrapper(DataInputView inputView) {
+    this.inputView = inputView;
+  }
+
+  public void setInputView(DataInputView inputView) {
+    this.inputView = inputView;
+  }
+
+  @Override
+  public int read() throws IOException {
+    try {
+      return inputView.readUnsignedByte();
+    } catch (EOFException e) {
+      // translate between DataInput and InputStream,
+      // DataInput signals EOF by exception, InputStream does it by returning -1
+      return -1;
+    }
+  }
+
+  @Override
+  public int read(byte[] b, int off, int len) throws IOException {
+    return inputView.read(b, off, len);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataOutputViewWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataOutputViewWrapper.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataOutputViewWrapper.java
new file mode 100644
index 0000000..f2d9db2
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/DataOutputViewWrapper.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import org.apache.flink.core.memory.DataOutputView;
+
+/**
+ * Wrapper for {@link org.apache.flink.core.memory.DataOutputView}. We need this because
+ * Flink writes data using a {@link org.apache.flink.core.memory.DataInputView} while
+ * Dataflow {@link org.apache.beam.sdk.coders.Coder}s expect an
+ * {@link java.io.OutputStream}.
+ */
+public class DataOutputViewWrapper extends OutputStream {
+
+  private DataOutputView outputView;
+
+  public DataOutputViewWrapper(DataOutputView outputView) {
+    this.outputView = outputView;
+  }
+
+  public void setOutputView(DataOutputView outputView) {
+    this.outputView = outputView;
+  }
+
+  @Override
+  public void write(int b) throws IOException {
+    outputView.write(b);
+  }
+
+  @Override
+  public void write(byte[] b, int off, int len) throws IOException {
+    outputView.write(b, off, len);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
new file mode 100644
index 0000000..70d97e3
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SerializableFnAggregatorWrapper.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+import java.io.Serializable;
+import org.apache.beam.sdk.transforms.Aggregator;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.flink.api.common.accumulators.Accumulator;
+
+/**
+ * Wrapper that wraps a {@link org.apache.beam.sdk.transforms.Combine.CombineFn}
+ * in a Flink {@link org.apache.flink.api.common.accumulators.Accumulator} for using
+ * the function as an aggregator in a {@link org.apache.beam.sdk.transforms.ParDo}
+ * operation.
+ */
+public class SerializableFnAggregatorWrapper<InputT, OutputT>
+    implements Aggregator<InputT, OutputT>, Accumulator<InputT, Serializable> {
+
+  private OutputT aa;
+  private Combine.CombineFn<InputT, ?, OutputT> combiner;
+
+  public SerializableFnAggregatorWrapper(Combine.CombineFn<InputT, ?, OutputT> combiner) {
+    this.combiner = combiner;
+    resetLocal();
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public void add(InputT value) {
+    this.aa = combiner.apply(ImmutableList.of((InputT) aa, value));
+  }
+
+  @Override
+  public Serializable getLocalValue() {
+    return (Serializable) aa;
+  }
+
+  @Override
+  public void resetLocal() {
+    this.aa = combiner.apply(ImmutableList.<InputT>of());
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public void merge(Accumulator<InputT, Serializable> other) {
+    this.aa = combiner.apply(ImmutableList.of((InputT) aa, (InputT) other.getLocalValue()));
+  }
+
+  @Override
+  public void addValue(InputT value) {
+    add(value);
+  }
+
+  @Override
+  public String getName() {
+    return "Aggregator :" + combiner.toString();
+  }
+
+  @Override
+  public Combine.CombineFn<InputT, ?, OutputT> getCombineFn() {
+    return combiner;
+  }
+
+  @Override
+  public Accumulator<InputT, Serializable> clone() {
+    try {
+      super.clone();
+    } catch (CloneNotSupportedException e) {
+      // Flink Accumulators cannot throw CloneNotSupportedException, work around that.
+      throw new RuntimeException(e);
+    }
+
+    // copy it by merging
+    OutputT resultCopy = combiner.apply(Lists.newArrayList((InputT) aa));
+    SerializableFnAggregatorWrapper<InputT, OutputT> result = new
+        SerializableFnAggregatorWrapper<>(combiner);
+
+    result.aa = resultCopy;
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
new file mode 100644
index 0000000..a87472b
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputFormat.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers;
+
+import java.io.IOException;
+import java.util.List;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.io.Source;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.flink.api.common.io.DefaultInputSplitAssigner;
+import org.apache.flink.api.common.io.InputFormat;
+import org.apache.flink.api.common.io.statistics.BaseStatistics;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.core.io.InputSplitAssigner;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * Wrapper for executing a {@link Source} as a Flink {@link InputFormat}.
+ */
+public class SourceInputFormat<T>
+    implements InputFormat<WindowedValue<T>, SourceInputSplit<T>> {
+  private static final Logger LOG = LoggerFactory.getLogger(SourceInputFormat.class);
+
+  private final BoundedSource<T> initialSource;
+
+  private transient PipelineOptions options;
+  private final SerializedPipelineOptions serializedOptions;
+
+  private transient BoundedSource.BoundedReader<T> reader;
+  private boolean inputAvailable = false;
+
+  public SourceInputFormat(BoundedSource<T> initialSource, PipelineOptions options) {
+    this.initialSource = initialSource;
+    this.serializedOptions = new SerializedPipelineOptions(options);
+  }
+
+  @Override
+  public void configure(Configuration configuration) {
+    options = serializedOptions.getPipelineOptions();
+  }
+
+  @Override
+  public void open(SourceInputSplit<T> sourceInputSplit) throws IOException {
+    reader = ((BoundedSource<T>) sourceInputSplit.getSource()).createReader(options);
+    inputAvailable = reader.start();
+  }
+
+  @Override
+  public BaseStatistics getStatistics(BaseStatistics baseStatistics) throws IOException {
+    try {
+      final long estimatedSize = initialSource.getEstimatedSizeBytes(options);
+
+      return new BaseStatistics() {
+        @Override
+        public long getTotalInputSize() {
+          return estimatedSize;
+        }
+
+        @Override
+        public long getNumberOfRecords() {
+          return BaseStatistics.NUM_RECORDS_UNKNOWN;
+        }
+
+        @Override
+        public float getAverageRecordWidth() {
+          return BaseStatistics.AVG_RECORD_BYTES_UNKNOWN;
+        }
+      };
+    } catch (Exception e) {
+      LOG.warn("Could not read Source statistics: {}", e);
+    }
+
+    return null;
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public SourceInputSplit<T>[] createInputSplits(int numSplits) throws IOException {
+    try {
+      long desiredSizeBytes = initialSource.getEstimatedSizeBytes(options) / numSplits;
+      List<? extends Source<T>> shards =
+          initialSource.split(desiredSizeBytes, options);
+      int numShards = shards.size();
+      SourceInputSplit<T>[] sourceInputSplits = new SourceInputSplit[numShards];
+      for (int i = 0; i < numShards; i++) {
+        sourceInputSplits[i] = new SourceInputSplit<>(shards.get(i), i);
+      }
+      return sourceInputSplits;
+    } catch (Exception e) {
+      throw new IOException("Could not create input splits from Source.", e);
+    }
+  }
+
+  @Override
+  public InputSplitAssigner getInputSplitAssigner(final SourceInputSplit[] sourceInputSplits) {
+    return new DefaultInputSplitAssigner(sourceInputSplits);
+  }
+
+
+  @Override
+  public boolean reachedEnd() throws IOException {
+    return !inputAvailable;
+  }
+
+  @Override
+  public WindowedValue<T> nextRecord(WindowedValue<T> t) throws IOException {
+    if (inputAvailable) {
+      final T current = reader.getCurrent();
+      final Instant timestamp = reader.getCurrentTimestamp();
+      // advance reader to have a record ready next time
+      inputAvailable = reader.advance();
+      return WindowedValue.of(
+          current,
+          timestamp,
+          GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
+    }
+
+    return null;
+  }
+
+  @Override
+  public void close() throws IOException {
+    // TODO null check can be removed once FLINK-3796 is fixed
+    if (reader != null) {
+      reader.close();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputSplit.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputSplit.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputSplit.java
new file mode 100644
index 0000000..e4a7386
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/SourceInputSplit.java
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers;
+
+import org.apache.beam.sdk.io.Source;
+import org.apache.flink.core.io.InputSplit;
+
+/**
+ * {@link org.apache.flink.core.io.InputSplit} for
+ * {@link org.apache.beam.runners.flink.translation.wrappers.SourceInputFormat}. We pass
+ * the sharded Source around in the input split because Sources simply split up into several
+ * Sources for sharding. This is different to how Flink creates a separate InputSplit from
+ * an InputFormat.
+ */
+public class SourceInputSplit<T> implements InputSplit {
+
+  private Source<T> source;
+  private int splitNumber;
+
+  public SourceInputSplit() {
+  }
+
+  public SourceInputSplit(Source<T> source, int splitNumber) {
+    this.source = source;
+    this.splitNumber = splitNumber;
+  }
+
+  @Override
+  public int getSplitNumber() {
+    return splitNumber;
+  }
+
+  public Source<T> getSource() {
+    return source;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/package-info.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/package-info.java
new file mode 100644
index 0000000..72f7deb
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internal implementation of the Beam runner for Apache Flink.
+ */
+package org.apache.beam.runners.flink.translation.wrappers;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
new file mode 100644
index 0000000..8a09286
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
@@ -0,0 +1,774 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming;
+
+import static org.apache.flink.util.Preconditions.checkArgument;
+
+import com.google.common.base.Optional;
+import com.google.common.collect.Iterables;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.beam.runners.core.AggregatorFactory;
+import org.apache.beam.runners.core.DoFnRunner;
+import org.apache.beam.runners.core.DoFnRunners;
+import org.apache.beam.runners.core.ExecutionContext;
+import org.apache.beam.runners.core.GroupAlsoByWindowViaWindowSetNewDoFn;
+import org.apache.beam.runners.core.PushbackSideInputDoFnRunner;
+import org.apache.beam.runners.core.SideInputHandler;
+import org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner;
+import org.apache.beam.runners.core.StateInternals;
+import org.apache.beam.runners.core.StateNamespace;
+import org.apache.beam.runners.core.StateNamespaces;
+import org.apache.beam.runners.core.StateNamespaces.WindowNamespace;
+import org.apache.beam.runners.core.StateTag;
+import org.apache.beam.runners.core.StateTags;
+import org.apache.beam.runners.core.StatefulDoFnRunner;
+import org.apache.beam.runners.core.TimerInternals;
+import org.apache.beam.runners.core.TimerInternals.TimerData;
+import org.apache.beam.runners.flink.translation.types.CoderTypeSerializer;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.runners.flink.translation.wrappers.SerializableFnAggregatorWrapper;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkBroadcastStateInternals;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkKeyGroupStateInternals;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkSplitStateInternals;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.state.KeyGroupCheckpointedOperator;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.Aggregator;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
+import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.NullSideInputReader;
+import org.apache.beam.sdk.util.SideInputReader;
+import org.apache.beam.sdk.util.TimeDomain;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.util.state.BagState;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.core.memory.DataInputViewStreamWrapper;
+import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
+import org.apache.flink.runtime.state.KeyGroupStatePartitionStreamProvider;
+import org.apache.flink.runtime.state.KeyGroupsList;
+import org.apache.flink.runtime.state.KeyedStateBackend;
+import org.apache.flink.runtime.state.KeyedStateCheckpointOutputStream;
+import org.apache.flink.runtime.state.StateInitializationContext;
+import org.apache.flink.runtime.state.StateSnapshotContext;
+import org.apache.flink.streaming.api.operators.AbstractStreamOperator;
+import org.apache.flink.streaming.api.operators.ChainingStrategy;
+import org.apache.flink.streaming.api.operators.HeapInternalTimerService;
+import org.apache.flink.streaming.api.operators.InternalTimer;
+import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
+import org.apache.flink.streaming.api.operators.Output;
+import org.apache.flink.streaming.api.operators.Triggerable;
+import org.apache.flink.streaming.api.operators.TwoInputStreamOperator;
+import org.apache.flink.streaming.api.watermark.Watermark;
+import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+import org.joda.time.Instant;
+
+/**
+ * Flink operator for executing {@link DoFn DoFns}.
+ *
+ * @param <InputT> the input type of the {@link DoFn}
+ * @param <FnOutputT> the output type of the {@link DoFn}
+ * @param <OutputT> the output type of the operator, this can be different from the fn output
+ *                 type when we have additional tagged outputs
+ */
+public class DoFnOperator<InputT, FnOutputT, OutputT>
+    extends AbstractStreamOperator<OutputT>
+    implements OneInputStreamOperator<WindowedValue<InputT>, OutputT>,
+      TwoInputStreamOperator<WindowedValue<InputT>, RawUnionValue, OutputT>,
+    KeyGroupCheckpointedOperator, Triggerable<Object, TimerData> {
+
+  protected DoFn<InputT, FnOutputT> doFn;
+
+  protected final SerializedPipelineOptions serializedOptions;
+
+  protected final TupleTag<FnOutputT> mainOutputTag;
+  protected final List<TupleTag<?>> additionalOutputTags;
+
+  protected final Collection<PCollectionView<?>> sideInputs;
+  protected final Map<Integer, PCollectionView<?>> sideInputTagMapping;
+
+  protected final WindowingStrategy<?, ?> windowingStrategy;
+
+  protected final OutputManagerFactory<OutputT> outputManagerFactory;
+
+  protected transient DoFnRunner<InputT, FnOutputT> doFnRunner;
+  protected transient PushbackSideInputDoFnRunner<InputT, FnOutputT> pushbackDoFnRunner;
+
+  protected transient SideInputHandler sideInputHandler;
+
+  protected transient SideInputReader sideInputReader;
+
+  protected transient DoFnRunners.OutputManager outputManager;
+
+  private transient DoFnInvoker<InputT, FnOutputT> doFnInvoker;
+
+  protected transient long currentInputWatermark;
+
+  protected transient long currentOutputWatermark;
+
+  private transient StateTag<Object, BagState<WindowedValue<InputT>>> pushedBackTag;
+
+  protected transient FlinkStateInternals<?> stateInternals;
+
+  private Coder<WindowedValue<InputT>> inputCoder;
+
+  private final Coder<?> keyCoder;
+
+  private final TimerInternals.TimerDataCoder timerCoder;
+
+  protected transient HeapInternalTimerService<?, TimerInternals.TimerData> timerService;
+
+  protected transient FlinkTimerInternals timerInternals;
+
+  private transient StateInternals<?> pushbackStateInternals;
+
+  private transient Optional<Long> pushedBackWatermark;
+
+  public DoFnOperator(
+      DoFn<InputT, FnOutputT> doFn,
+      Coder<WindowedValue<InputT>> inputCoder,
+      TupleTag<FnOutputT> mainOutputTag,
+      List<TupleTag<?>> additionalOutputTags,
+      OutputManagerFactory<OutputT> outputManagerFactory,
+      WindowingStrategy<?, ?> windowingStrategy,
+      Map<Integer, PCollectionView<?>> sideInputTagMapping,
+      Collection<PCollectionView<?>> sideInputs,
+      PipelineOptions options,
+      Coder<?> keyCoder) {
+    this.doFn = doFn;
+    this.inputCoder = inputCoder;
+    this.mainOutputTag = mainOutputTag;
+    this.additionalOutputTags = additionalOutputTags;
+    this.sideInputTagMapping = sideInputTagMapping;
+    this.sideInputs = sideInputs;
+    this.serializedOptions = new SerializedPipelineOptions(options);
+    this.windowingStrategy = windowingStrategy;
+    this.outputManagerFactory = outputManagerFactory;
+
+    setChainingStrategy(ChainingStrategy.ALWAYS);
+
+    this.keyCoder = keyCoder;
+
+    this.timerCoder =
+        TimerInternals.TimerDataCoder.of(windowingStrategy.getWindowFn().windowCoder());
+  }
+
+  private ExecutionContext.StepContext createStepContext() {
+    return new StepContext();
+  }
+
+  // allow overriding this in WindowDoFnOperator because this one dynamically creates
+  // the DoFn
+  protected DoFn<InputT, FnOutputT> getDoFn() {
+    return doFn;
+  }
+
+  @Override
+  public void open() throws Exception {
+    super.open();
+
+    currentInputWatermark = Long.MIN_VALUE;
+    currentOutputWatermark = Long.MIN_VALUE;
+
+    AggregatorFactory aggregatorFactory = new AggregatorFactory() {
+      @Override
+      public <InputT, AccumT, OutputT> Aggregator<InputT, OutputT> createAggregatorForDoFn(
+          Class<?> fnClass,
+          ExecutionContext.StepContext stepContext,
+          String aggregatorName,
+          Combine.CombineFn<InputT, AccumT, OutputT> combine) {
+
+        @SuppressWarnings("unchecked")
+        SerializableFnAggregatorWrapper<InputT, OutputT> result =
+            (SerializableFnAggregatorWrapper<InputT, OutputT>)
+                getRuntimeContext().getAccumulator(aggregatorName);
+
+        if (result == null) {
+          result = new SerializableFnAggregatorWrapper<>(combine);
+          getRuntimeContext().addAccumulator(aggregatorName, result);
+        }
+        return result;
+      }
+    };
+
+    sideInputReader = NullSideInputReader.of(sideInputs);
+
+    if (!sideInputs.isEmpty()) {
+
+      pushedBackTag = StateTags.bag("pushed-back-values", inputCoder);
+
+      FlinkBroadcastStateInternals sideInputStateInternals =
+          new FlinkBroadcastStateInternals<>(
+              getContainingTask().getIndexInSubtaskGroup(), getOperatorStateBackend());
+
+      sideInputHandler = new SideInputHandler(sideInputs, sideInputStateInternals);
+      sideInputReader = sideInputHandler;
+
+      // maybe init by initializeState
+      if (pushbackStateInternals == null) {
+        if (keyCoder != null) {
+          pushbackStateInternals = new FlinkKeyGroupStateInternals<>(keyCoder,
+              getKeyedStateBackend());
+        } else {
+          pushbackStateInternals =
+              new FlinkSplitStateInternals<Object>(getOperatorStateBackend());
+        }
+      }
+
+      pushedBackWatermark = Optional.absent();
+
+    }
+
+    outputManager = outputManagerFactory.create(output);
+
+    // StatefulPardo or WindowDoFn
+    if (keyCoder != null) {
+      stateInternals = new FlinkStateInternals<>((KeyedStateBackend) getKeyedStateBackend(),
+          keyCoder);
+
+      timerService = (HeapInternalTimerService<?, TimerInternals.TimerData>)
+          getInternalTimerService("beam-timer", new CoderTypeSerializer<>(timerCoder), this);
+
+      timerInternals = new FlinkTimerInternals();
+
+    }
+
+    // WindowDoFnOperator need use state and timer to get DoFn.
+    // So must wait StateInternals and TimerInternals ready.
+    this.doFn = getDoFn();
+    doFnInvoker = DoFnInvokers.invokerFor(doFn);
+
+    doFnInvoker.invokeSetup();
+
+    ExecutionContext.StepContext stepContext = createStepContext();
+
+    doFnRunner = DoFnRunners.simpleRunner(
+        serializedOptions.getPipelineOptions(),
+        doFn,
+        sideInputReader,
+        outputManager,
+        mainOutputTag,
+        additionalOutputTags,
+        stepContext,
+        aggregatorFactory,
+        windowingStrategy);
+
+    if (doFn instanceof GroupAlsoByWindowViaWindowSetNewDoFn) {
+      // When the doFn is this, we know it came from WindowDoFnOperator and
+      //   InputT = KeyedWorkItem<K, V>
+      //   OutputT = KV<K, V>
+      //
+      // for some K, V
+
+
+      doFnRunner = DoFnRunners.lateDataDroppingRunner(
+          (DoFnRunner) doFnRunner,
+          stepContext,
+          windowingStrategy,
+          ((GroupAlsoByWindowViaWindowSetNewDoFn) doFn).getDroppedDueToLatenessAggregator());
+    } else if (keyCoder != null) {
+      // It is a stateful DoFn
+
+      StatefulDoFnRunner.CleanupTimer cleanupTimer =
+          new StatefulDoFnRunner.TimeInternalsCleanupTimer(
+              stepContext.timerInternals(), windowingStrategy);
+
+      // we don't know the window type
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      Coder windowCoder = windowingStrategy.getWindowFn().windowCoder();
+
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      StatefulDoFnRunner.StateCleaner<?> stateCleaner =
+          new StatefulDoFnRunner.StateInternalsStateCleaner<>(
+              doFn, stepContext.stateInternals(), windowCoder);
+
+      doFnRunner = DoFnRunners.defaultStatefulDoFnRunner(
+          doFn,
+          doFnRunner,
+          stepContext,
+          aggregatorFactory,
+          windowingStrategy,
+          cleanupTimer,
+          stateCleaner);
+    }
+
+    pushbackDoFnRunner =
+        SimplePushbackSideInputDoFnRunner.create(doFnRunner, sideInputs, sideInputHandler);
+  }
+
+  @Override
+  public void close() throws Exception {
+    super.close();
+    doFnInvoker.invokeTeardown();
+  }
+
+  protected final long getPushbackWatermarkHold() {
+    // if we don't have side inputs we never hold the watermark
+    if (sideInputs.isEmpty()) {
+      return BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis();
+    }
+
+    try {
+      checkInitPushedBackWatermark();
+      return pushedBackWatermark.get();
+    } catch (Exception e) {
+      throw new RuntimeException("Error retrieving pushed back watermark state.", e);
+    }
+  }
+
+  private void checkInitPushedBackWatermark() {
+    // init and restore from pushedBack state.
+    // Not done in initializeState, because OperatorState is not ready.
+    if (!pushedBackWatermark.isPresent()) {
+
+      BagState<WindowedValue<InputT>> pushedBack =
+          pushbackStateInternals.state(StateNamespaces.global(), pushedBackTag);
+
+      long min = BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis();
+      for (WindowedValue<InputT> value : pushedBack.read()) {
+        min = Math.min(min, value.getTimestamp().getMillis());
+      }
+      setPushedBackWatermark(min);
+    }
+  }
+
+  @Override
+  public final void processElement(
+      StreamRecord<WindowedValue<InputT>> streamRecord) throws Exception {
+    doFnRunner.startBundle();
+    doFnRunner.processElement(streamRecord.getValue());
+    doFnRunner.finishBundle();
+  }
+
+  private void setPushedBackWatermark(long watermark) {
+    pushedBackWatermark = Optional.fromNullable(watermark);
+  }
+
+  @Override
+  public final void processElement1(
+      StreamRecord<WindowedValue<InputT>> streamRecord) throws Exception {
+    pushbackDoFnRunner.startBundle();
+    Iterable<WindowedValue<InputT>> justPushedBack =
+        pushbackDoFnRunner.processElementInReadyWindows(streamRecord.getValue());
+
+    BagState<WindowedValue<InputT>> pushedBack =
+        pushbackStateInternals.state(StateNamespaces.global(), pushedBackTag);
+
+    checkInitPushedBackWatermark();
+
+    long min = pushedBackWatermark.get();
+    for (WindowedValue<InputT> pushedBackValue : justPushedBack) {
+      min = Math.min(min, pushedBackValue.getTimestamp().getMillis());
+      pushedBack.add(pushedBackValue);
+    }
+    setPushedBackWatermark(min);
+    pushbackDoFnRunner.finishBundle();
+  }
+
+  @Override
+  public final void processElement2(
+      StreamRecord<RawUnionValue> streamRecord) throws Exception {
+    pushbackDoFnRunner.startBundle();
+
+    @SuppressWarnings("unchecked")
+    WindowedValue<Iterable<?>> value =
+        (WindowedValue<Iterable<?>>) streamRecord.getValue().getValue();
+
+    PCollectionView<?> sideInput = sideInputTagMapping.get(streamRecord.getValue().getUnionTag());
+    sideInputHandler.addSideInputValue(sideInput, value);
+
+    BagState<WindowedValue<InputT>> pushedBack =
+        pushbackStateInternals.state(StateNamespaces.global(), pushedBackTag);
+
+    List<WindowedValue<InputT>> newPushedBack = new ArrayList<>();
+
+    Iterable<WindowedValue<InputT>> pushedBackContents = pushedBack.read();
+    if (pushedBackContents != null) {
+      for (WindowedValue<InputT> elem : pushedBackContents) {
+
+        // we need to set the correct key in case the operator is
+        // a (keyed) window operator
+        setKeyContextElement1(new StreamRecord<>(elem));
+
+        Iterable<WindowedValue<InputT>> justPushedBack =
+            pushbackDoFnRunner.processElementInReadyWindows(elem);
+        Iterables.addAll(newPushedBack, justPushedBack);
+      }
+    }
+
+    pushedBack.clear();
+    long min = BoundedWindow.TIMESTAMP_MAX_VALUE.getMillis();
+    for (WindowedValue<InputT> pushedBackValue : newPushedBack) {
+      min = Math.min(min, pushedBackValue.getTimestamp().getMillis());
+      pushedBack.add(pushedBackValue);
+    }
+    setPushedBackWatermark(min);
+
+    pushbackDoFnRunner.finishBundle();
+
+    // maybe output a new watermark
+    processWatermark1(new Watermark(currentInputWatermark));
+  }
+
+  @Override
+  public void processWatermark(Watermark mark) throws Exception {
+    processWatermark1(mark);
+  }
+
+  @Override
+  public void processWatermark1(Watermark mark) throws Exception {
+    if (keyCoder == null) {
+      this.currentInputWatermark = mark.getTimestamp();
+      long potentialOutputWatermark =
+          Math.min(getPushbackWatermarkHold(), currentInputWatermark);
+      if (potentialOutputWatermark > currentOutputWatermark) {
+        currentOutputWatermark = potentialOutputWatermark;
+        output.emitWatermark(new Watermark(currentOutputWatermark));
+      }
+    } else {
+      // fireTimers, so we need startBundle.
+      pushbackDoFnRunner.startBundle();
+
+      this.currentInputWatermark = mark.getTimestamp();
+
+      // hold back by the pushed back values waiting for side inputs
+      long actualInputWatermark = Math.min(getPushbackWatermarkHold(), mark.getTimestamp());
+
+      timerService.advanceWatermark(actualInputWatermark);
+
+      Instant watermarkHold = stateInternals.watermarkHold();
+
+      long combinedWatermarkHold = Math.min(watermarkHold.getMillis(), getPushbackWatermarkHold());
+
+      long potentialOutputWatermark = Math.min(currentInputWatermark, combinedWatermarkHold);
+
+      if (potentialOutputWatermark > currentOutputWatermark) {
+        currentOutputWatermark = potentialOutputWatermark;
+        output.emitWatermark(new Watermark(currentOutputWatermark));
+      }
+      pushbackDoFnRunner.finishBundle();
+    }
+  }
+
+  @Override
+  public void processWatermark2(Watermark mark) throws Exception {
+    // ignore watermarks from the side-input input
+  }
+
+  @Override
+  public void snapshotState(StateSnapshotContext context) throws Exception {
+    // copy from AbstractStreamOperator
+    if (getKeyedStateBackend() != null) {
+      KeyedStateCheckpointOutputStream out;
+
+      try {
+        out = context.getRawKeyedOperatorStateOutput();
+      } catch (Exception exception) {
+        throw new Exception("Could not open raw keyed operator state stream for "
+            + getOperatorName() + '.', exception);
+      }
+
+      try {
+        KeyGroupsList allKeyGroups = out.getKeyGroupList();
+        for (int keyGroupIdx : allKeyGroups) {
+          out.startNewKeyGroup(keyGroupIdx);
+
+          DataOutputViewStreamWrapper dov = new DataOutputViewStreamWrapper(out);
+
+          // if (this instanceof KeyGroupCheckpointedOperator)
+          snapshotKeyGroupState(keyGroupIdx, dov);
+
+          // We can't get all timerServices, so we just snapshot our timerService
+          // Maybe this is a normal DoFn that has no timerService
+          if (keyCoder != null) {
+            timerService.snapshotTimersForKeyGroup(dov, keyGroupIdx);
+          }
+
+        }
+      } catch (Exception exception) {
+        throw new Exception("Could not write timer service of " + getOperatorName()
+            + " to checkpoint state stream.", exception);
+      } finally {
+        try {
+          out.close();
+        } catch (Exception closeException) {
+          LOG.warn("Could not close raw keyed operator state stream for {}. This "
+              + "might have prevented deleting some state data.", getOperatorName(),
+              closeException);
+        }
+      }
+    }
+  }
+
+  @Override
+  public void snapshotKeyGroupState(int keyGroupIndex, DataOutputStream out) throws Exception {
+    if (!sideInputs.isEmpty() && keyCoder != null) {
+      ((FlinkKeyGroupStateInternals) pushbackStateInternals).snapshotKeyGroupState(
+          keyGroupIndex, out);
+    }
+  }
+
+  @Override
+  public void initializeState(StateInitializationContext context) throws Exception {
+    if (getKeyedStateBackend() != null) {
+      int totalKeyGroups = getKeyedStateBackend().getNumberOfKeyGroups();
+      KeyGroupsList localKeyGroupRange = getKeyedStateBackend().getKeyGroupRange();
+
+      for (KeyGroupStatePartitionStreamProvider streamProvider : context.getRawKeyedStateInputs()) {
+        DataInputViewStreamWrapper div = new DataInputViewStreamWrapper(streamProvider.getStream());
+
+        int keyGroupIdx = streamProvider.getKeyGroupId();
+        checkArgument(localKeyGroupRange.contains(keyGroupIdx),
+            "Key Group " + keyGroupIdx + " does not belong to the local range.");
+
+        // if (this instanceof KeyGroupRestoringOperator)
+        restoreKeyGroupState(keyGroupIdx, div);
+
+        // We just initialize our timerService
+        if (keyCoder != null) {
+          if (timerService == null) {
+            timerService = new HeapInternalTimerService<>(
+                totalKeyGroups,
+                localKeyGroupRange,
+                this,
+                getRuntimeContext().getProcessingTimeService());
+          }
+          timerService.restoreTimersForKeyGroup(div, keyGroupIdx, getUserCodeClassloader());
+        }
+      }
+    }
+  }
+
+  @Override
+  public void restoreKeyGroupState(int keyGroupIndex, DataInputStream in) throws Exception {
+    if (!sideInputs.isEmpty() && keyCoder != null) {
+      if (pushbackStateInternals == null) {
+        pushbackStateInternals = new FlinkKeyGroupStateInternals<>(keyCoder,
+            getKeyedStateBackend());
+      }
+      ((FlinkKeyGroupStateInternals) pushbackStateInternals)
+          .restoreKeyGroupState(keyGroupIndex, in, getUserCodeClassloader());
+    }
+  }
+
+  @Override
+  public void onEventTime(InternalTimer<Object, TimerData> timer) throws Exception {
+    fireTimer(timer);
+  }
+
+  @Override
+  public void onProcessingTime(InternalTimer<Object, TimerData> timer) throws Exception {
+    fireTimer(timer);
+  }
+
+  // allow overriding this in WindowDoFnOperator
+  public void fireTimer(InternalTimer<?, TimerData> timer) {
+    TimerInternals.TimerData timerData = timer.getNamespace();
+    StateNamespace namespace = timerData.getNamespace();
+    // This is a user timer, so namespace must be WindowNamespace
+    checkArgument(namespace instanceof WindowNamespace);
+    BoundedWindow window = ((WindowNamespace) namespace).getWindow();
+    pushbackDoFnRunner.onTimer(timerData.getTimerId(), window,
+        timerData.getTimestamp(), timerData.getDomain());
+  }
+
+  /**
+   * Factory for creating an {@link DoFnRunners.OutputManager} from
+   * a Flink {@link Output}.
+   */
+  interface OutputManagerFactory<OutputT> extends Serializable {
+    DoFnRunners.OutputManager create(Output<StreamRecord<OutputT>> output);
+  }
+
+  /**
+   * Default implementation of {@link OutputManagerFactory} that creates an
+   * {@link DoFnRunners.OutputManager} that only writes to
+   * a single logical output.
+   */
+  public static class DefaultOutputManagerFactory<OutputT>
+      implements OutputManagerFactory<OutputT> {
+    @Override
+    public DoFnRunners.OutputManager create(final Output<StreamRecord<OutputT>> output) {
+      return new DoFnRunners.OutputManager() {
+        @Override
+        public <T> void output(TupleTag<T> tag, WindowedValue<T> value) {
+          // with tagged outputs we can't get around this because we don't
+          // know our own output type...
+          @SuppressWarnings("unchecked")
+          OutputT castValue = (OutputT) value;
+          output.collect(new StreamRecord<>(castValue));
+        }
+      };
+    }
+  }
+
+  /**
+   * Implementation of {@link OutputManagerFactory} that creates an
+   * {@link DoFnRunners.OutputManager} that can write to multiple logical
+   * outputs by unioning them in a {@link RawUnionValue}.
+   */
+  public static class MultiOutputOutputManagerFactory
+      implements OutputManagerFactory<RawUnionValue> {
+
+    Map<TupleTag<?>, Integer> mapping;
+
+    public MultiOutputOutputManagerFactory(Map<TupleTag<?>, Integer> mapping) {
+      this.mapping = mapping;
+    }
+
+    @Override
+    public DoFnRunners.OutputManager create(final Output<StreamRecord<RawUnionValue>> output) {
+      return new DoFnRunners.OutputManager() {
+        @Override
+        public <T> void output(TupleTag<T> tag, WindowedValue<T> value) {
+          int intTag = mapping.get(tag);
+          output.collect(new StreamRecord<>(new RawUnionValue(intTag, value)));
+        }
+      };
+    }
+  }
+
+  /**
+   * {@link StepContext} for running {@link DoFn DoFns} on Flink. This does not allow
+   * accessing state or timer internals.
+   */
+  protected class StepContext implements ExecutionContext.StepContext {
+
+    @Override
+    public String getStepName() {
+      return null;
+    }
+
+    @Override
+    public String getTransformName() {
+      return null;
+    }
+
+    @Override
+    public void noteOutput(WindowedValue<?> output) {}
+
+    @Override
+    public void noteOutput(TupleTag<?> tag, WindowedValue<?> output) {}
+
+    @Override
+    public <T, W extends BoundedWindow> void writePCollectionViewData(
+        TupleTag<?> tag,
+        Iterable<WindowedValue<T>> data,
+        Coder<Iterable<WindowedValue<T>>> dataCoder,
+        W window,
+        Coder<W> windowCoder) throws IOException {
+      throw new UnsupportedOperationException("Writing side-input data is not supported.");
+    }
+
+    @Override
+    public StateInternals<?> stateInternals() {
+      return stateInternals;
+    }
+
+    @Override
+    public TimerInternals timerInternals() {
+      return timerInternals;
+    }
+  }
+
+  private class FlinkTimerInternals implements TimerInternals {
+
+    @Override
+    public void setTimer(
+        StateNamespace namespace, String timerId, Instant target, TimeDomain timeDomain) {
+      setTimer(TimerData.of(timerId, namespace, target, timeDomain));
+    }
+
+    @Deprecated
+    @Override
+    public void setTimer(TimerData timerKey) {
+      long time = timerKey.getTimestamp().getMillis();
+      if (timerKey.getDomain().equals(TimeDomain.EVENT_TIME)) {
+        timerService.registerEventTimeTimer(timerKey, time);
+      } else if (timerKey.getDomain().equals(TimeDomain.PROCESSING_TIME)) {
+        timerService.registerProcessingTimeTimer(timerKey, time);
+      } else {
+        throw new UnsupportedOperationException(
+            "Unsupported time domain: " + timerKey.getDomain());
+      }
+    }
+
+    @Deprecated
+    @Override
+    public void deleteTimer(StateNamespace namespace, String timerId) {
+      throw new UnsupportedOperationException(
+          "Canceling of a timer by ID is not yet supported.");
+    }
+
+    @Override
+    public void deleteTimer(StateNamespace namespace, String timerId, TimeDomain timeDomain) {
+      throw new UnsupportedOperationException(
+          "Canceling of a timer by ID is not yet supported.");
+    }
+
+    @Deprecated
+    @Override
+    public void deleteTimer(TimerData timerKey) {
+      long time = timerKey.getTimestamp().getMillis();
+      if (timerKey.getDomain().equals(TimeDomain.EVENT_TIME)) {
+        timerService.deleteEventTimeTimer(timerKey, time);
+      } else if (timerKey.getDomain().equals(TimeDomain.PROCESSING_TIME)) {
+        timerService.deleteProcessingTimeTimer(timerKey, time);
+      } else {
+        throw new UnsupportedOperationException(
+            "Unsupported time domain: " + timerKey.getDomain());
+      }
+    }
+
+    @Override
+    public Instant currentProcessingTime() {
+      return new Instant(timerService.currentProcessingTime());
+    }
+
+    @Nullable
+    @Override
+    public Instant currentSynchronizedProcessingTime() {
+      return new Instant(timerService.currentProcessingTime());
+    }
+
+    @Override
+    public Instant currentInputWatermarkTime() {
+      return new Instant(Math.min(currentInputWatermark, getPushbackWatermarkHold()));
+    }
+
+    @Nullable
+    @Override
+    public Instant currentOutputWatermarkTime() {
+      return new Instant(currentOutputWatermark);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/KvToByteBufferKeySelector.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/KvToByteBufferKeySelector.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/KvToByteBufferKeySelector.java
new file mode 100644
index 0000000..dce2e68
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/KvToByteBufferKeySelector.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming;
+
+import java.nio.ByteBuffer;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.KV;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.java.functions.KeySelector;
+import org.apache.flink.api.java.typeutils.GenericTypeInfo;
+import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
+
+/**
+ * {@link KeySelector} that retrieves a key from a {@link KV}. This will return
+ * the key as encoded by the provided {@link Coder} in a {@link ByteBuffer}. This ensures
+ * that all key comparisons/hashing happen on the encoded form.
+ */
+public class KvToByteBufferKeySelector<K, V>
+    implements KeySelector<WindowedValue<KV<K, V>>, ByteBuffer>,
+    ResultTypeQueryable<ByteBuffer> {
+
+  private final Coder<K> keyCoder;
+
+  public KvToByteBufferKeySelector(Coder<K> keyCoder) {
+    this.keyCoder = keyCoder;
+  }
+
+  @Override
+  public ByteBuffer getKey(WindowedValue<KV<K, V>> value) throws Exception {
+    K key = value.getValue().getKey();
+    byte[] keyBytes = CoderUtils.encodeToByteArray(keyCoder, key);
+    return ByteBuffer.wrap(keyBytes);
+  }
+
+  @Override
+  public TypeInformation<ByteBuffer> getProducedType() {
+    return new GenericTypeInfo<>(ByteBuffer.class);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItem.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItem.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItem.java
new file mode 100644
index 0000000..e843660
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItem.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming;
+
+import java.util.Collections;
+import org.apache.beam.runners.core.KeyedWorkItem;
+import org.apache.beam.runners.core.TimerInternals;
+import org.apache.beam.sdk.util.WindowedValue;
+
+/**
+ * Singleton keyed word item.
+ */
+public class SingletonKeyedWorkItem<K, ElemT> implements KeyedWorkItem<K, ElemT> {
+
+  final K key;
+  final WindowedValue<ElemT> value;
+
+  public SingletonKeyedWorkItem(K key, WindowedValue<ElemT> value) {
+    this.key = key;
+    this.value = value;
+  }
+
+  @Override
+  public K key() {
+    return key;
+  }
+
+  public WindowedValue<ElemT> value() {
+    return value;
+  }
+
+  @Override
+  public Iterable<TimerInternals.TimerData> timersIterable() {
+    return Collections.EMPTY_LIST;
+  }
+
+  @Override
+  public Iterable<WindowedValue<ElemT>> elementsIterable() {
+    return Collections.singletonList(value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItemCoder.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItemCoder.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItemCoder.java
new file mode 100644
index 0000000..9a52330
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItemCoder.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.collect.ImmutableList;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.List;
+import org.apache.beam.runners.core.KeyedWorkItem;
+import org.apache.beam.runners.core.KeyedWorkItemCoder;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.StandardCoder;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.PropertyNames;
+import org.apache.beam.sdk.util.WindowedValue;
+
+/**
+ * Singleton keyed work item coder.
+ */
+public class SingletonKeyedWorkItemCoder<K, ElemT>
+    extends StandardCoder<SingletonKeyedWorkItem<K, ElemT>> {
+  /**
+   * Create a new {@link KeyedWorkItemCoder} with the provided key coder, element coder, and window
+   * coder.
+   */
+  public static <K, ElemT> SingletonKeyedWorkItemCoder<K, ElemT> of(
+      Coder<K> keyCoder, Coder<ElemT> elemCoder, Coder<? extends BoundedWindow> windowCoder) {
+    return new SingletonKeyedWorkItemCoder<>(keyCoder, elemCoder, windowCoder);
+  }
+
+  @JsonCreator
+  public static <K, ElemT> SingletonKeyedWorkItemCoder<K, ElemT> of(
+      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Coder<?>> components) {
+    checkArgument(components.size() == 3, "Expecting 3 components, got %s", components.size());
+    @SuppressWarnings("unchecked")
+    Coder<K> keyCoder = (Coder<K>) components.get(0);
+    @SuppressWarnings("unchecked")
+    Coder<ElemT> elemCoder = (Coder<ElemT>) components.get(1);
+    @SuppressWarnings("unchecked")
+    Coder<? extends BoundedWindow> windowCoder = (Coder<? extends BoundedWindow>) components.get(2);
+    return new SingletonKeyedWorkItemCoder<>(keyCoder, elemCoder, windowCoder);
+  }
+
+  private final Coder<K> keyCoder;
+  private final Coder<ElemT> elemCoder;
+  private final Coder<? extends BoundedWindow> windowCoder;
+  private final WindowedValue.FullWindowedValueCoder<ElemT> valueCoder;
+
+  private SingletonKeyedWorkItemCoder(
+      Coder<K> keyCoder, Coder<ElemT> elemCoder, Coder<? extends BoundedWindow> windowCoder) {
+    this.keyCoder = keyCoder;
+    this.elemCoder = elemCoder;
+    this.windowCoder = windowCoder;
+    valueCoder = WindowedValue.FullWindowedValueCoder.of(elemCoder, windowCoder);
+  }
+
+  public Coder<K> getKeyCoder() {
+    return keyCoder;
+  }
+
+  public Coder<ElemT> getElementCoder() {
+    return elemCoder;
+  }
+
+  @Override
+  public void encode(SingletonKeyedWorkItem<K, ElemT> value,
+                     OutputStream outStream,
+                     Context context)
+      throws CoderException, IOException {
+    keyCoder.encode(value.key(), outStream, context.nested());
+    valueCoder.encode(value.value, outStream, context);
+  }
+
+  @Override
+  public SingletonKeyedWorkItem<K, ElemT> decode(InputStream inStream, Context context)
+      throws CoderException, IOException {
+    K key = keyCoder.decode(inStream, context.nested());
+    WindowedValue<ElemT> value = valueCoder.decode(inStream, context);
+    return new SingletonKeyedWorkItem<>(key, value);
+  }
+
+  @Override
+  public List<? extends Coder<?>> getCoderArguments() {
+    return ImmutableList.of(keyCoder, elemCoder, windowCoder);
+  }
+
+  @Override
+  public void verifyDeterministic() throws NonDeterministicException {
+    keyCoder.verifyDeterministic();
+    elemCoder.verifyDeterministic();
+    windowCoder.verifyDeterministic();
+  }
+
+  /**
+   * {@inheritDoc}.
+   *
+   * {@link KeyedWorkItemCoder} is not consistent with equals as it can return a
+   * {@link KeyedWorkItem} of a type different from the originally encoded type.
+   */
+  @Override
+  public boolean consistentWithEquals() {
+    return false;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java
new file mode 100644
index 0000000..40f70e4
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java
@@ -0,0 +1,150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming;
+
+import static com.google.common.base.Preconditions.checkState;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Executors;
+import org.apache.beam.runners.core.ElementAndRestriction;
+import org.apache.beam.runners.core.KeyedWorkItem;
+import org.apache.beam.runners.core.KeyedWorkItems;
+import org.apache.beam.runners.core.OutputAndTimeBoundedSplittableProcessElementInvoker;
+import org.apache.beam.runners.core.OutputWindowedValue;
+import org.apache.beam.runners.core.SplittableParDo;
+import org.apache.beam.runners.core.StateInternals;
+import org.apache.beam.runners.core.StateInternalsFactory;
+import org.apache.beam.runners.core.TimerInternals;
+import org.apache.beam.runners.core.TimerInternalsFactory;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.streaming.api.operators.InternalTimer;
+import org.joda.time.Duration;
+import org.joda.time.Instant;
+
+/**
+ * Flink operator for executing splittable {@link DoFn DoFns}. Specifically, for executing
+ * the {@code @ProcessElement} method of a splittable {@link DoFn}.
+ */
+public class SplittableDoFnOperator<
+    InputT, FnOutputT, OutputT, RestrictionT, TrackerT extends RestrictionTracker<RestrictionT>>
+    extends DoFnOperator<
+    KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, FnOutputT, OutputT> {
+
+  public SplittableDoFnOperator(
+      DoFn<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, FnOutputT> doFn,
+      Coder<
+          WindowedValue<
+              KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>> inputCoder,
+      TupleTag<FnOutputT> mainOutputTag,
+      List<TupleTag<?>> additionalOutputTags,
+      OutputManagerFactory<OutputT> outputManagerFactory,
+      WindowingStrategy<?, ?> windowingStrategy,
+      Map<Integer, PCollectionView<?>> sideInputTagMapping,
+      Collection<PCollectionView<?>> sideInputs,
+      PipelineOptions options,
+      Coder<?> keyCoder) {
+    super(
+        doFn,
+        inputCoder,
+        mainOutputTag,
+        additionalOutputTags,
+        outputManagerFactory,
+        windowingStrategy,
+        sideInputTagMapping,
+        sideInputs,
+        options,
+        keyCoder);
+
+  }
+
+  @Override
+  public void open() throws Exception {
+    super.open();
+
+    checkState(doFn instanceof SplittableParDo.ProcessFn);
+
+    StateInternalsFactory<String> stateInternalsFactory = new StateInternalsFactory<String>() {
+      @Override
+      public StateInternals<String> stateInternalsForKey(String key) {
+        //this will implicitly be keyed by the key of the incoming
+        // element or by the key of a firing timer
+        return (StateInternals<String>) stateInternals;
+      }
+    };
+    TimerInternalsFactory<String> timerInternalsFactory = new TimerInternalsFactory<String>() {
+      @Override
+      public TimerInternals timerInternalsForKey(String key) {
+        //this will implicitly be keyed like the StateInternalsFactory
+        return timerInternals;
+      }
+    };
+
+    ((SplittableParDo.ProcessFn) doFn).setStateInternalsFactory(stateInternalsFactory);
+    ((SplittableParDo.ProcessFn) doFn).setTimerInternalsFactory(timerInternalsFactory);
+    ((SplittableParDo.ProcessFn) doFn).setProcessElementInvoker(
+        new OutputAndTimeBoundedSplittableProcessElementInvoker<>(
+            doFn,
+            serializedOptions.getPipelineOptions(),
+            new OutputWindowedValue<FnOutputT>() {
+              @Override
+              public void outputWindowedValue(
+                  FnOutputT output,
+                  Instant timestamp,
+                  Collection<? extends BoundedWindow> windows,
+                  PaneInfo pane) {
+                outputManager.output(
+                    mainOutputTag,
+                    WindowedValue.of(output, timestamp, windows, pane));
+              }
+
+              @Override
+              public <AdditionalOutputT> void outputWindowedValue(
+                  TupleTag<AdditionalOutputT> tag,
+                  AdditionalOutputT output,
+                  Instant timestamp,
+                  Collection<? extends BoundedWindow> windows,
+                  PaneInfo pane) {
+                outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
+              }
+            },
+            sideInputReader,
+            Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()),
+            10000,
+            Duration.standardSeconds(10)));
+  }
+
+  @Override
+  public void fireTimer(InternalTimer<?, TimerInternals.TimerData> timer) {
+    doFnRunner.processElement(WindowedValue.valueInGlobalWindow(
+        KeyedWorkItems.<String, ElementAndRestriction<InputT, RestrictionT>>timersWorkItem(
+            (String) stateInternals.getKey(),
+            Collections.singletonList(timer.getNamespace()))));
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java
new file mode 100644
index 0000000..9b2136c
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming;
+
+import static org.apache.beam.runners.core.TimerInternals.TimerData;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.runners.core.GroupAlsoByWindowViaWindowSetNewDoFn;
+import org.apache.beam.runners.core.KeyedWorkItem;
+import org.apache.beam.runners.core.KeyedWorkItems;
+import org.apache.beam.runners.core.StateInternals;
+import org.apache.beam.runners.core.StateInternalsFactory;
+import org.apache.beam.runners.core.SystemReduceFn;
+import org.apache.beam.runners.core.TimerInternals;
+import org.apache.beam.runners.core.TimerInternalsFactory;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.streaming.api.operators.InternalTimer;
+
+/**
+ * Flink operator for executing window {@link DoFn DoFns}.
+ */
+public class WindowDoFnOperator<K, InputT, OutputT>
+    extends DoFnOperator<KeyedWorkItem<K, InputT>, KV<K, OutputT>, WindowedValue<KV<K, OutputT>>> {
+
+  private final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> systemReduceFn;
+
+  public WindowDoFnOperator(
+      SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> systemReduceFn,
+      Coder<WindowedValue<KeyedWorkItem<K, InputT>>> inputCoder,
+      TupleTag<KV<K, OutputT>> mainOutputTag,
+      List<TupleTag<?>> additionalOutputTags,
+      OutputManagerFactory<WindowedValue<KV<K, OutputT>>> outputManagerFactory,
+      WindowingStrategy<?, ?> windowingStrategy,
+      Map<Integer, PCollectionView<?>> sideInputTagMapping,
+      Collection<PCollectionView<?>> sideInputs,
+      PipelineOptions options,
+      Coder<K> keyCoder) {
+    super(
+        null,
+        inputCoder,
+        mainOutputTag,
+        additionalOutputTags,
+        outputManagerFactory,
+        windowingStrategy,
+        sideInputTagMapping,
+        sideInputs,
+        options,
+        keyCoder);
+
+    this.systemReduceFn = systemReduceFn;
+
+  }
+
+  @Override
+  protected DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> getDoFn() {
+    StateInternalsFactory<K> stateInternalsFactory = new StateInternalsFactory<K>() {
+      @Override
+      public StateInternals<K> stateInternalsForKey(K key) {
+        //this will implicitly be keyed by the key of the incoming
+        // element or by the key of a firing timer
+        return (StateInternals<K>) stateInternals;
+      }
+    };
+    TimerInternalsFactory<K> timerInternalsFactory = new TimerInternalsFactory<K>() {
+      @Override
+      public TimerInternals timerInternalsForKey(K key) {
+        //this will implicitly be keyed like the StateInternalsFactory
+        return timerInternals;
+      }
+    };
+
+    // we have to do the unchecked cast because GroupAlsoByWindowViaWindowSetDoFn.create
+    // has the window type as generic parameter while WindowingStrategy is almost always
+    // untyped.
+    @SuppressWarnings("unchecked")
+    DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFn =
+        GroupAlsoByWindowViaWindowSetNewDoFn.create(
+            windowingStrategy, stateInternalsFactory, timerInternalsFactory, sideInputReader,
+                (SystemReduceFn) systemReduceFn, outputManager, mainOutputTag);
+    return doFn;
+  }
+
+  @Override
+  public void fireTimer(InternalTimer<?, TimerData> timer) {
+    doFnRunner.processElement(WindowedValue.valueInGlobalWindow(
+        KeyedWorkItems.<K, InputT>timersWorkItem(
+            (K) stateInternals.getKey(),
+            Collections.singletonList(timer.getNamespace()))));
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WorkItemKeySelector.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WorkItemKeySelector.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WorkItemKeySelector.java
new file mode 100644
index 0000000..1dff367
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WorkItemKeySelector.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming;
+
+import java.nio.ByteBuffer;
+import org.apache.beam.runners.core.KeyedWorkItem;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.java.functions.KeySelector;
+import org.apache.flink.api.java.typeutils.GenericTypeInfo;
+import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
+
+/**
+ * {@link KeySelector} that retrieves a key from a {@link KeyedWorkItem}. This will return
+ * the key as encoded by the provided {@link Coder} in a {@link ByteBuffer}. This ensures
+ * that all key comparisons/hashing happen on the encoded form.
+ */
+public class WorkItemKeySelector<K, V>
+    implements KeySelector<WindowedValue<SingletonKeyedWorkItem<K, V>>, ByteBuffer>,
+    ResultTypeQueryable<ByteBuffer> {
+
+  private final Coder<K> keyCoder;
+
+  public WorkItemKeySelector(Coder<K> keyCoder) {
+    this.keyCoder = keyCoder;
+  }
+
+  @Override
+  public ByteBuffer getKey(WindowedValue<SingletonKeyedWorkItem<K, V>> value) throws Exception {
+    K key = value.getValue().key();
+    byte[] keyBytes = CoderUtils.encodeToByteArray(keyCoder, key);
+    return ByteBuffer.wrap(keyBytes);
+  }
+
+  @Override
+  public TypeInformation<ByteBuffer> getProducedType() {
+    return new GenericTypeInfo<>(ByteBuffer.class);
+  }
+}

[29/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java
new file mode 100644
index 0000000..26fd0b4
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingNonShuffleReduceFunction.java
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.runners.core.PerKeyCombineFnRunner;
+import org.apache.beam.runners.core.PerKeyCombineFnRunners;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.flink.api.common.functions.RichGroupReduceFunction;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+/**
+ * Special version of {@link FlinkReduceFunction} that supports merging windows. This
+ * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
+ * same behaviour as {@code MergeOverlappingIntervalWindows}.
+ *
+ * <p>This is different from the pair of function for the non-merging windows case
+ * in that we cannot do combining before the shuffle because elements would not
+ * yet be in their correct windows for side-input access.
+ */
+public class FlinkMergingNonShuffleReduceFunction<
+    K, InputT, AccumT, OutputT, W extends IntervalWindow>
+    extends RichGroupReduceFunction<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, OutputT>>> {
+
+  private final CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> combineFn;
+
+  private final WindowingStrategy<?, W> windowingStrategy;
+
+  private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
+
+  private final SerializedPipelineOptions serializedOptions;
+
+  public FlinkMergingNonShuffleReduceFunction(
+      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> keyedCombineFn,
+      WindowingStrategy<?, W> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions pipelineOptions) {
+
+    this.combineFn = keyedCombineFn;
+
+    this.windowingStrategy = windowingStrategy;
+    this.sideInputs = sideInputs;
+
+    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+
+  }
+
+  @Override
+  public void reduce(
+      Iterable<WindowedValue<KV<K, InputT>>> elements,
+      Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
+
+    PipelineOptions options = serializedOptions.getPipelineOptions();
+
+    FlinkSideInputReader sideInputReader =
+        new FlinkSideInputReader(sideInputs, getRuntimeContext());
+
+    PerKeyCombineFnRunner<K, InputT, AccumT, OutputT> combineFnRunner =
+        PerKeyCombineFnRunners.create(combineFn);
+
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super BoundedWindow> outputTimeFn =
+        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+    // get all elements so that we can sort them, has to fit into
+    // memory
+    // this seems very unprudent, but correct, for now
+    List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
+    for (WindowedValue<KV<K, InputT>> inputValue : elements) {
+      for (WindowedValue<KV<K, InputT>> exploded : inputValue.explodeWindows()) {
+        sortedInput.add(exploded);
+      }
+    }
+    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
+      @Override
+      public int compare(
+          WindowedValue<KV<K, InputT>> o1,
+          WindowedValue<KV<K, InputT>> o2) {
+        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+      }
+    });
+
+    // merge windows, we have to do it in an extra pre-processing step and
+    // can't do it as we go since the window of early elements would not
+    // be correct when calling the CombineFn
+    mergeWindow(sortedInput);
+
+    // iterate over the elements that are sorted by window timestamp
+    final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
+
+    // create accumulator using the first elements key
+    WindowedValue<KV<K, InputT>> currentValue = iterator.next();
+    K key = currentValue.getValue().getKey();
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
+    InputT firstValue = currentValue.getValue().getValue();
+    AccumT accumulator =
+        combineFnRunner.createAccumulator(key, options, sideInputReader, currentValue.getWindows());
+    accumulator = combineFnRunner.addInput(key, accumulator, firstValue,
+        options, sideInputReader, currentValue.getWindows());
+
+    // we use this to keep track of the timestamps assigned by the OutputTimeFn
+    Instant windowTimestamp =
+        outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
+
+    while (iterator.hasNext()) {
+      WindowedValue<KV<K, InputT>> nextValue = iterator.next();
+      IntervalWindow nextWindow =
+          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+
+      if (currentWindow.equals(nextWindow)) {
+        // continue accumulating and merge windows
+
+        InputT value = nextValue.getValue().getValue();
+        accumulator = combineFnRunner.addInput(key, accumulator, value,
+            options, sideInputReader, currentValue.getWindows());
+
+        windowTimestamp = outputTimeFn.combine(
+            windowTimestamp,
+            outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
+
+      } else {
+        // emit the value that we currently have
+        out.collect(
+            WindowedValue.of(
+                KV.of(key, combineFnRunner.extractOutput(key, accumulator,
+                    options, sideInputReader, currentValue.getWindows())),
+                windowTimestamp,
+                currentWindow,
+                PaneInfo.NO_FIRING));
+
+        currentWindow = nextWindow;
+        currentValue = nextValue;
+        InputT value = nextValue.getValue().getValue();
+        accumulator = combineFnRunner.createAccumulator(key,
+            options, sideInputReader, currentValue.getWindows());
+        accumulator = combineFnRunner.addInput(key, accumulator, value,
+            options, sideInputReader, currentValue.getWindows());
+        windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
+      }
+
+    }
+
+    // emit the final accumulator
+    out.collect(
+        WindowedValue.of(
+            KV.of(key, combineFnRunner.extractOutput(key, accumulator,
+                options, sideInputReader, currentValue.getWindows())),
+            windowTimestamp,
+            currentWindow,
+            PaneInfo.NO_FIRING));
+  }
+
+  /**
+   * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
+   * This replaces windows in the input list.
+   */
+  private void mergeWindow(List<WindowedValue<KV<K, InputT>>> elements) {
+    int currentStart = 0;
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
+
+    for (int i = 1; i < elements.size(); i++) {
+      WindowedValue<KV<K, InputT>> nextValue = elements.get(i);
+      IntervalWindow nextWindow =
+          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+      if (currentWindow.intersects(nextWindow)) {
+        // we continue
+        currentWindow = currentWindow.span(nextWindow);
+      } else {
+        // retrofit the merged window to all windows up to "currentStart"
+        for (int j = i - 1; j >= currentStart; j--) {
+          WindowedValue<KV<K, InputT>> value = elements.get(j);
+          elements.set(
+              j,
+              WindowedValue.of(
+                  value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+        }
+        currentStart = i;
+        currentWindow = nextWindow;
+      }
+    }
+    if (currentStart < elements.size() - 1) {
+      // we have to retrofit the last batch
+      for (int j = elements.size() - 1; j >= currentStart; j--) {
+        WindowedValue<KV<K, InputT>> value = elements.get(j);
+        elements.set(
+            j,
+            WindowedValue.of(
+                value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java
new file mode 100644
index 0000000..c68f155
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingPartialReduceFunction.java
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.runners.core.PerKeyCombineFnRunner;
+import org.apache.beam.runners.core.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+/**
+ * Special version of {@link FlinkPartialReduceFunction} that supports merging windows. This
+ * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
+ * same behaviour as {@code MergeOverlappingIntervalWindows}.
+ */
+public class FlinkMergingPartialReduceFunction<K, InputT, AccumT, W extends IntervalWindow>
+    extends FlinkPartialReduceFunction<K, InputT, AccumT, W> {
+
+  public FlinkMergingPartialReduceFunction(
+      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn,
+      WindowingStrategy<?, W> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions pipelineOptions) {
+    super(combineFn, windowingStrategy, sideInputs, pipelineOptions);
+  }
+
+  @Override
+  public void combine(
+      Iterable<WindowedValue<KV<K, InputT>>> elements,
+      Collector<WindowedValue<KV<K, AccumT>>> out) throws Exception {
+
+    PipelineOptions options = serializedOptions.getPipelineOptions();
+
+    FlinkSideInputReader sideInputReader =
+        new FlinkSideInputReader(sideInputs, getRuntimeContext());
+
+    PerKeyCombineFnRunner<K, InputT, AccumT, ?> combineFnRunner =
+        PerKeyCombineFnRunners.create(combineFn);
+
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super BoundedWindow> outputTimeFn =
+        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+    // get all elements so that we can sort them, has to fit into
+    // memory
+    // this seems very unprudent, but correct, for now
+    List<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
+    for (WindowedValue<KV<K, InputT>> inputValue : elements) {
+      for (WindowedValue<KV<K, InputT>> exploded : inputValue.explodeWindows()) {
+        sortedInput.add(exploded);
+      }
+    }
+    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
+      @Override
+      public int compare(
+          WindowedValue<KV<K, InputT>> o1,
+          WindowedValue<KV<K, InputT>> o2) {
+        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+      }
+    });
+
+    // merge windows, we have to do it in an extra pre-processing step and
+    // can't do it as we go since the window of early elements would not
+    // be correct when calling the CombineFn
+    mergeWindow(sortedInput);
+
+    // iterate over the elements that are sorted by window timestamp
+    final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
+
+    // create accumulator using the first elements key
+    WindowedValue<KV<K, InputT>> currentValue = iterator.next();
+    K key = currentValue.getValue().getKey();
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
+    InputT firstValue = currentValue.getValue().getValue();
+    AccumT accumulator = combineFnRunner.createAccumulator(key,
+        options, sideInputReader, currentValue.getWindows());
+    accumulator = combineFnRunner.addInput(key, accumulator, firstValue,
+        options, sideInputReader, currentValue.getWindows());
+
+    // we use this to keep track of the timestamps assigned by the OutputTimeFn
+    Instant windowTimestamp =
+        outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
+
+    while (iterator.hasNext()) {
+      WindowedValue<KV<K, InputT>> nextValue = iterator.next();
+      IntervalWindow nextWindow = (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+
+      if (currentWindow.equals(nextWindow)) {
+        // continue accumulating and merge windows
+
+        InputT value = nextValue.getValue().getValue();
+        accumulator = combineFnRunner.addInput(key, accumulator, value,
+            options, sideInputReader, currentValue.getWindows());
+
+        windowTimestamp = outputTimeFn.combine(
+            windowTimestamp,
+            outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
+
+      } else {
+        // emit the value that we currently have
+        out.collect(
+            WindowedValue.of(
+                KV.of(key, accumulator),
+                windowTimestamp,
+                currentWindow,
+                PaneInfo.NO_FIRING));
+
+        currentWindow = nextWindow;
+        currentValue = nextValue;
+        InputT value = nextValue.getValue().getValue();
+        accumulator = combineFnRunner.createAccumulator(key,
+            options, sideInputReader, currentValue.getWindows());
+        accumulator = combineFnRunner.addInput(key, accumulator, value,
+            options, sideInputReader, currentValue.getWindows());
+        windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
+      }
+    }
+
+    // emit the final accumulator
+    out.collect(
+        WindowedValue.of(
+            KV.of(key, accumulator),
+            windowTimestamp,
+            currentWindow,
+            PaneInfo.NO_FIRING));
+  }
+
+  /**
+   * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
+   * This replaces windows in the input list.
+   */
+  private void mergeWindow(List<WindowedValue<KV<K, InputT>>> elements) {
+    int currentStart = 0;
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
+
+    for (int i = 1; i < elements.size(); i++) {
+      WindowedValue<KV<K, InputT>> nextValue = elements.get(i);
+      IntervalWindow nextWindow =
+          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+      if (currentWindow.intersects(nextWindow)) {
+        // we continue
+        currentWindow = currentWindow.span(nextWindow);
+      } else {
+        // retrofit the merged window to all windows up to "currentStart"
+        for (int j = i - 1; j >= currentStart; j--) {
+          WindowedValue<KV<K, InputT>> value = elements.get(j);
+          elements.set(
+              j,
+              WindowedValue.of(
+                  value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+        }
+        currentStart = i;
+        currentWindow = nextWindow;
+      }
+    }
+    if (currentStart < elements.size() - 1) {
+      // we have to retrofit the last batch
+      for (int j = elements.size() - 1; j >= currentStart; j--) {
+        WindowedValue<KV<K, InputT>> value = elements.get(j);
+        elements.set(
+            j,
+            WindowedValue.of(
+                value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+      }
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java
new file mode 100644
index 0000000..84b3adc
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMergingReduceFunction.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.runners.core.PerKeyCombineFnRunner;
+import org.apache.beam.runners.core.PerKeyCombineFnRunners;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+/**
+ * Special version of {@link FlinkReduceFunction} that supports merging windows. This
+ * assumes that the windows are {@link IntervalWindow IntervalWindows} and exhibits the
+ * same behaviour as {@code MergeOverlappingIntervalWindows}.
+ */
+public class FlinkMergingReduceFunction<K, AccumT, OutputT, W extends IntervalWindow>
+    extends FlinkReduceFunction<K, AccumT, OutputT, W> {
+
+  public FlinkMergingReduceFunction(
+      CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> keyedCombineFn,
+      WindowingStrategy<?, W> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions pipelineOptions) {
+    super(keyedCombineFn, windowingStrategy, sideInputs, pipelineOptions);
+  }
+
+  @Override
+  public void reduce(
+      Iterable<WindowedValue<KV<K, AccumT>>> elements,
+      Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
+
+    PipelineOptions options = serializedOptions.getPipelineOptions();
+
+    FlinkSideInputReader sideInputReader =
+        new FlinkSideInputReader(sideInputs, getRuntimeContext());
+
+    PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combineFnRunner =
+        PerKeyCombineFnRunners.create(combineFn);
+
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super BoundedWindow> outputTimeFn =
+        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+    // get all elements so that we can sort them, has to fit into
+    // memory
+    // this seems very unprudent, but correct, for now
+    ArrayList<WindowedValue<KV<K, AccumT>>> sortedInput = Lists.newArrayList();
+    for (WindowedValue<KV<K, AccumT>> inputValue : elements) {
+      for (WindowedValue<KV<K, AccumT>> exploded : inputValue.explodeWindows()) {
+        sortedInput.add(exploded);
+      }
+    }
+    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, AccumT>>>() {
+      @Override
+      public int compare(
+          WindowedValue<KV<K, AccumT>> o1,
+          WindowedValue<KV<K, AccumT>> o2) {
+        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+      }
+    });
+
+    // merge windows, we have to do it in an extra pre-processing step and
+    // can't do it as we go since the window of early elements would not
+    // be correct when calling the CombineFn
+    mergeWindow(sortedInput);
+
+    // iterate over the elements that are sorted by window timestamp
+    final Iterator<WindowedValue<KV<K, AccumT>>> iterator = sortedInput.iterator();
+
+    // get the first accumulator
+    WindowedValue<KV<K, AccumT>> currentValue = iterator.next();
+    K key = currentValue.getValue().getKey();
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(currentValue.getWindows());
+    AccumT accumulator = currentValue.getValue().getValue();
+
+    // we use this to keep track of the timestamps assigned by the OutputTimeFn,
+    // in FlinkPartialReduceFunction we already merge the timestamps assigned
+    // to individual elements, here we just merge them
+    List<Instant> windowTimestamps = new ArrayList<>();
+    windowTimestamps.add(currentValue.getTimestamp());
+
+    while (iterator.hasNext()) {
+      WindowedValue<KV<K, AccumT>> nextValue = iterator.next();
+      IntervalWindow nextWindow =
+          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+
+      if (nextWindow.equals(currentWindow)) {
+        // continue accumulating and merge windows
+
+        accumulator = combineFnRunner.mergeAccumulators(
+            key, ImmutableList.of(accumulator, nextValue.getValue().getValue()),
+            options, sideInputReader, currentValue.getWindows());
+
+        windowTimestamps.add(nextValue.getTimestamp());
+      } else {
+        out.collect(
+            WindowedValue.of(
+                KV.of(key, combineFnRunner.extractOutput(key, accumulator,
+                    options, sideInputReader, currentValue.getWindows())),
+                outputTimeFn.merge(currentWindow, windowTimestamps),
+                currentWindow,
+                PaneInfo.NO_FIRING));
+
+        windowTimestamps.clear();
+
+        currentWindow = nextWindow;
+        currentValue = nextValue;
+        accumulator = nextValue.getValue().getValue();
+        windowTimestamps.add(nextValue.getTimestamp());
+      }
+    }
+
+    // emit the final accumulator
+    out.collect(
+        WindowedValue.of(
+            KV.of(key, combineFnRunner.extractOutput(key, accumulator,
+                options, sideInputReader, currentValue.getWindows())),
+            outputTimeFn.merge(currentWindow, windowTimestamps),
+            currentWindow,
+            PaneInfo.NO_FIRING));
+  }
+
+  /**
+   * Merge windows. This assumes that the list of elements is sorted by window-end timestamp.
+   * This replaces windows in the input list.
+   */
+  private void mergeWindow(List<WindowedValue<KV<K, AccumT>>> elements) {
+    int currentStart = 0;
+    IntervalWindow currentWindow =
+        (IntervalWindow) Iterables.getOnlyElement(elements.get(0).getWindows());
+
+    for (int i = 1; i < elements.size(); i++) {
+      WindowedValue<KV<K, AccumT>> nextValue = elements.get(i);
+      IntervalWindow nextWindow =
+          (IntervalWindow) Iterables.getOnlyElement(nextValue.getWindows());
+      if (currentWindow.intersects(nextWindow)) {
+        // we continue
+        currentWindow = currentWindow.span(nextWindow);
+      } else {
+        // retrofit the merged window to all windows up to "currentStart"
+        for (int j = i - 1; j >= currentStart; j--) {
+          WindowedValue<KV<K, AccumT>> value = elements.get(j);
+          elements.set(
+              j,
+              WindowedValue.of(
+                  value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+        }
+        currentStart = i;
+        currentWindow = nextWindow;
+      }
+    }
+    if (currentStart < elements.size() - 1) {
+      // we have to retrofit the last batch
+      for (int j = elements.size() - 1; j >= currentStart; j--) {
+        WindowedValue<KV<K, AccumT>> value = elements.get(j);
+        elements.set(
+            j,
+            WindowedValue.of(
+                value.getValue(), value.getTimestamp(), currentWindow, value.getPane()));
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
new file mode 100644
index 0000000..9071cc5
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkMultiOutputPruningFunction.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.util.Collector;
+
+/**
+ * A {@link FlatMapFunction} function that filters out those elements that don't belong in this
+ * output. We need this to implement MultiOutput ParDo functions in combination with
+ * {@link FlinkDoFnFunction}.
+ */
+public class FlinkMultiOutputPruningFunction<T>
+    implements FlatMapFunction<WindowedValue<RawUnionValue>, WindowedValue<T>> {
+
+  private final int ourOutputTag;
+
+  public FlinkMultiOutputPruningFunction(int ourOutputTag) {
+    this.ourOutputTag = ourOutputTag;
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public void flatMap(
+      WindowedValue<RawUnionValue> windowedValue,
+      Collector<WindowedValue<T>> collector) throws Exception {
+    int unionTag = windowedValue.getValue().getUnionTag();
+    if (unionTag == ourOutputTag) {
+      collector.collect(
+          (WindowedValue<T>) windowedValue.withValue(windowedValue.getValue().getValue()));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoOpStepContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoOpStepContext.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoOpStepContext.java
new file mode 100644
index 0000000..847a00a
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkNoOpStepContext.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import java.io.IOException;
+import org.apache.beam.runners.core.ExecutionContext.StepContext;
+import org.apache.beam.runners.core.StateInternals;
+import org.apache.beam.runners.core.TimerInternals;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.TupleTag;
+
+/**
+ * A {@link StepContext} for Flink Batch Runner execution.
+ */
+public class FlinkNoOpStepContext implements StepContext {
+
+  @Override
+  public String getStepName() {
+    return null;
+  }
+
+  @Override
+  public String getTransformName() {
+    return null;
+  }
+
+  @Override
+  public void noteOutput(WindowedValue<?> output) {
+
+  }
+
+  @Override
+  public void noteOutput(TupleTag<?> tag, WindowedValue<?> output) {
+
+  }
+
+  @Override
+  public <T, W extends BoundedWindow> void writePCollectionViewData(
+      TupleTag<?> tag,
+      Iterable<WindowedValue<T>> data,
+      Coder<Iterable<WindowedValue<T>>> dataCoder,
+      W window,
+      Coder<W> windowCoder) throws IOException {
+  }
+
+  @Override
+  public StateInternals<?> stateInternals() {
+    return null;
+  }
+
+  @Override
+  public TimerInternals timerInternals() {
+    return null;
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
new file mode 100644
index 0000000..1d1ff9f
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkPartialReduceFunction.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.Map;
+import org.apache.beam.runners.core.PerKeyCombineFnRunner;
+import org.apache.beam.runners.core.PerKeyCombineFnRunners;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.flink.api.common.functions.RichGroupCombineFunction;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+/**
+ * This is is the first step for executing a {@link org.apache.beam.sdk.transforms.Combine.PerKey}
+ * on Flink. The second part is {@link FlinkReduceFunction}. This function performs a local
+ * combine step before shuffling while the latter does the final combination after a shuffle.
+ *
+ * <p>The input to {@link #combine(Iterable, Collector)} are elements of the same key but
+ * for different windows. We have to ensure that we only combine elements of matching
+ * windows.
+ */
+public class FlinkPartialReduceFunction<K, InputT, AccumT, W extends BoundedWindow>
+    extends RichGroupCombineFunction<WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, AccumT>>> {
+
+  protected final CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn;
+
+  protected final WindowingStrategy<?, W> windowingStrategy;
+
+  protected final SerializedPipelineOptions serializedOptions;
+
+  protected final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
+
+  public FlinkPartialReduceFunction(
+      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, ?> combineFn,
+      WindowingStrategy<?, W> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions pipelineOptions) {
+
+    this.combineFn = combineFn;
+    this.windowingStrategy = windowingStrategy;
+    this.sideInputs = sideInputs;
+    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+
+  }
+
+  @Override
+  public void combine(
+      Iterable<WindowedValue<KV<K, InputT>>> elements,
+      Collector<WindowedValue<KV<K, AccumT>>> out) throws Exception {
+
+    PipelineOptions options = serializedOptions.getPipelineOptions();
+
+    FlinkSideInputReader sideInputReader =
+        new FlinkSideInputReader(sideInputs, getRuntimeContext());
+
+    PerKeyCombineFnRunner<K, InputT, AccumT, ?> combineFnRunner =
+        PerKeyCombineFnRunners.create(combineFn);
+
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super BoundedWindow> outputTimeFn =
+        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+    // get all elements so that we can sort them, has to fit into
+    // memory
+    // this seems very unprudent, but correct, for now
+    ArrayList<WindowedValue<KV<K, InputT>>> sortedInput = Lists.newArrayList();
+    for (WindowedValue<KV<K, InputT>> inputValue : elements) {
+      for (WindowedValue<KV<K, InputT>> exploded : inputValue.explodeWindows()) {
+        sortedInput.add(exploded);
+      }
+    }
+    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, InputT>>>() {
+      @Override
+      public int compare(
+          WindowedValue<KV<K, InputT>> o1,
+          WindowedValue<KV<K, InputT>> o2) {
+        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+      }
+    });
+
+    // iterate over the elements that are sorted by window timestamp
+    //
+    final Iterator<WindowedValue<KV<K, InputT>>> iterator = sortedInput.iterator();
+
+    // create accumulator using the first elements key
+    WindowedValue<KV<K, InputT>> currentValue = iterator.next();
+    K key = currentValue.getValue().getKey();
+    BoundedWindow currentWindow = Iterables.getFirst(currentValue.getWindows(), null);
+    InputT firstValue = currentValue.getValue().getValue();
+    AccumT accumulator = combineFnRunner.createAccumulator(key,
+        options, sideInputReader, currentValue.getWindows());
+    accumulator = combineFnRunner.addInput(key, accumulator, firstValue,
+        options, sideInputReader, currentValue.getWindows());
+
+    // we use this to keep track of the timestamps assigned by the OutputTimeFn
+    Instant windowTimestamp =
+        outputTimeFn.assignOutputTime(currentValue.getTimestamp(), currentWindow);
+
+    while (iterator.hasNext()) {
+      WindowedValue<KV<K, InputT>> nextValue = iterator.next();
+      BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
+
+      if (nextWindow.equals(currentWindow)) {
+        // continue accumulating
+        InputT value = nextValue.getValue().getValue();
+        accumulator = combineFnRunner.addInput(key, accumulator, value,
+            options, sideInputReader, currentValue.getWindows());
+
+        windowTimestamp = outputTimeFn.combine(
+            windowTimestamp,
+            outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow));
+
+      } else {
+        // emit the value that we currently have
+        out.collect(
+            WindowedValue.of(
+                KV.of(key, accumulator),
+                windowTimestamp,
+                currentWindow,
+                PaneInfo.NO_FIRING));
+
+        currentWindow = nextWindow;
+        currentValue = nextValue;
+        InputT value = nextValue.getValue().getValue();
+        accumulator = combineFnRunner.createAccumulator(key,
+            options, sideInputReader, currentValue.getWindows());
+        accumulator = combineFnRunner.addInput(key, accumulator, value,
+            options, sideInputReader, currentValue.getWindows());
+        windowTimestamp = outputTimeFn.assignOutputTime(nextValue.getTimestamp(), currentWindow);
+      }
+    }
+
+    // emit the final accumulator
+    out.collect(
+        WindowedValue.of(
+            KV.of(key, accumulator),
+            windowTimestamp,
+            currentWindow,
+            PaneInfo.NO_FIRING));
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
new file mode 100644
index 0000000..3e4f742
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkReduceFunction.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.runners.core.PerKeyCombineFnRunner;
+import org.apache.beam.runners.core.PerKeyCombineFnRunners;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.CombineFnBase;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.flink.api.common.functions.RichGroupReduceFunction;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+/**
+ * This is the second part for executing a {@link org.apache.beam.sdk.transforms.Combine.PerKey}
+ * on Flink, the second part is {@link FlinkReduceFunction}. This function performs the final
+ * combination of the pre-combined values after a shuffle.
+ *
+ * <p>The input to {@link #reduce(Iterable, Collector)} are elements of the same key but
+ * for different windows. We have to ensure that we only combine elements of matching
+ * windows.
+ */
+public class FlinkReduceFunction<K, AccumT, OutputT, W extends BoundedWindow>
+    extends RichGroupReduceFunction<WindowedValue<KV<K, AccumT>>, WindowedValue<KV<K, OutputT>>> {
+
+  protected final CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> combineFn;
+
+  protected final WindowingStrategy<?, W> windowingStrategy;
+
+  protected final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
+
+  protected final SerializedPipelineOptions serializedOptions;
+
+  public FlinkReduceFunction(
+      CombineFnBase.PerKeyCombineFn<K, ?, AccumT, OutputT> keyedCombineFn,
+      WindowingStrategy<?, W> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions pipelineOptions) {
+
+    this.combineFn = keyedCombineFn;
+
+    this.windowingStrategy = windowingStrategy;
+    this.sideInputs = sideInputs;
+
+    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+
+  }
+
+  @Override
+  public void reduce(
+      Iterable<WindowedValue<KV<K, AccumT>>> elements,
+      Collector<WindowedValue<KV<K, OutputT>>> out) throws Exception {
+
+    PipelineOptions options = serializedOptions.getPipelineOptions();
+
+    FlinkSideInputReader sideInputReader =
+        new FlinkSideInputReader(sideInputs, getRuntimeContext());
+
+    PerKeyCombineFnRunner<K, ?, AccumT, OutputT> combineFnRunner =
+        PerKeyCombineFnRunners.create(combineFn);
+
+    @SuppressWarnings("unchecked")
+    OutputTimeFn<? super BoundedWindow> outputTimeFn =
+        (OutputTimeFn<? super BoundedWindow>) windowingStrategy.getOutputTimeFn();
+
+
+    // get all elements so that we can sort them, has to fit into
+    // memory
+    // this seems very unprudent, but correct, for now
+    ArrayList<WindowedValue<KV<K, AccumT>>> sortedInput = Lists.newArrayList();
+    for (WindowedValue<KV<K, AccumT>> inputValue: elements) {
+      for (WindowedValue<KV<K, AccumT>> exploded: inputValue.explodeWindows()) {
+        sortedInput.add(exploded);
+      }
+    }
+    Collections.sort(sortedInput, new Comparator<WindowedValue<KV<K, AccumT>>>() {
+      @Override
+      public int compare(
+          WindowedValue<KV<K, AccumT>> o1,
+          WindowedValue<KV<K, AccumT>> o2) {
+        return Iterables.getOnlyElement(o1.getWindows()).maxTimestamp()
+            .compareTo(Iterables.getOnlyElement(o2.getWindows()).maxTimestamp());
+      }
+    });
+
+    // iterate over the elements that are sorted by window timestamp
+    //
+    final Iterator<WindowedValue<KV<K, AccumT>>> iterator = sortedInput.iterator();
+
+    // get the first accumulator
+    WindowedValue<KV<K, AccumT>> currentValue = iterator.next();
+    K key = currentValue.getValue().getKey();
+    BoundedWindow currentWindow = Iterables.getFirst(currentValue.getWindows(), null);
+    AccumT accumulator = currentValue.getValue().getValue();
+
+    // we use this to keep track of the timestamps assigned by the OutputTimeFn,
+    // in FlinkPartialReduceFunction we already merge the timestamps assigned
+    // to individual elements, here we just merge them
+    List<Instant> windowTimestamps = new ArrayList<>();
+    windowTimestamps.add(currentValue.getTimestamp());
+
+    while (iterator.hasNext()) {
+      WindowedValue<KV<K, AccumT>> nextValue = iterator.next();
+      BoundedWindow nextWindow = Iterables.getOnlyElement(nextValue.getWindows());
+
+      if (nextWindow.equals(currentWindow)) {
+        // continue accumulating
+        accumulator = combineFnRunner.mergeAccumulators(
+            key, ImmutableList.of(accumulator, nextValue.getValue().getValue()),
+            options, sideInputReader, currentValue.getWindows());
+
+        windowTimestamps.add(nextValue.getTimestamp());
+      } else {
+        // emit the value that we currently have
+        out.collect(
+            WindowedValue.of(
+                KV.of(key, combineFnRunner.extractOutput(key, accumulator,
+                    options, sideInputReader, currentValue.getWindows())),
+                outputTimeFn.merge(currentWindow, windowTimestamps),
+                currentWindow,
+                PaneInfo.NO_FIRING));
+
+        windowTimestamps.clear();
+
+        currentWindow = nextWindow;
+        currentValue = nextValue;
+        accumulator = nextValue.getValue().getValue();
+        windowTimestamps.add(nextValue.getTimestamp());
+      }
+
+    }
+
+    // emit the final accumulator
+    out.collect(
+        WindowedValue.of(
+            KV.of(key, combineFnRunner.extractOutput(key, accumulator,
+                options, sideInputReader, currentValue.getWindows())),
+            outputTimeFn.merge(currentWindow, windowTimestamps),
+            currentWindow,
+            PaneInfo.NO_FIRING));
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkSideInputReader.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkSideInputReader.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkSideInputReader.java
new file mode 100644
index 0000000..c317182
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkSideInputReader.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.SideInputReader;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.api.common.functions.RuntimeContext;
+
+/**
+ * A {@link SideInputReader} for the Flink Batch Runner.
+ */
+public class FlinkSideInputReader implements SideInputReader {
+
+  private final Map<TupleTag<?>, WindowingStrategy<?, ?>> sideInputs;
+
+  private RuntimeContext runtimeContext;
+
+  public FlinkSideInputReader(Map<PCollectionView<?>, WindowingStrategy<?, ?>> indexByView,
+                              RuntimeContext runtimeContext) {
+    sideInputs = new HashMap<>();
+    for (Map.Entry<PCollectionView<?>, WindowingStrategy<?, ?>> entry : indexByView.entrySet()) {
+      sideInputs.put(entry.getKey().getTagInternal(), entry.getValue());
+    }
+    this.runtimeContext = runtimeContext;
+  }
+
+  @Nullable
+  @Override
+  public <T> T get(PCollectionView<T> view, BoundedWindow window) {
+    checkNotNull(view, "View passed to sideInput cannot be null");
+    TupleTag<Iterable<WindowedValue<?>>> tag = view.getTagInternal();
+    checkNotNull(
+        sideInputs.get(tag),
+        "Side input for " + view + " not available.");
+
+    Map<BoundedWindow, T> sideInputs =
+        runtimeContext.getBroadcastVariableWithInitializer(
+            tag.getId(), new SideInputInitializer<>(view));
+    T result = sideInputs.get(window);
+    if (result == null) {
+      result = view.getViewFn().apply(Collections.<WindowedValue<?>>emptyList());
+    }
+    return result;
+  }
+
+  @Override
+  public <T> boolean contains(PCollectionView<T> view) {
+    return sideInputs.containsKey(view.getTagInternal());
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return sideInputs.isEmpty();
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java
new file mode 100644
index 0000000..c8193d2
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkStatefulDoFnFunction.java
@@ -0,0 +1,198 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import static org.apache.flink.util.Preconditions.checkArgument;
+
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Map;
+import org.apache.beam.runners.core.DoFnRunner;
+import org.apache.beam.runners.core.DoFnRunners;
+import org.apache.beam.runners.core.InMemoryStateInternals;
+import org.apache.beam.runners.core.InMemoryTimerInternals;
+import org.apache.beam.runners.core.StateInternals;
+import org.apache.beam.runners.core.StateNamespace;
+import org.apache.beam.runners.core.StateNamespaces;
+import org.apache.beam.runners.core.TimerInternals;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
+import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.api.common.functions.RichGroupReduceFunction;
+import org.apache.flink.api.common.functions.RuntimeContext;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.util.Collector;
+import org.joda.time.Instant;
+
+/**
+ * A {@link RichGroupReduceFunction} for stateful {@link ParDo} in Flink Batch Runner.
+ */
+public class FlinkStatefulDoFnFunction<K, V, OutputT>
+    extends RichGroupReduceFunction<WindowedValue<KV<K, V>>, WindowedValue<OutputT>> {
+
+  private final DoFn<KV<K, V>, OutputT> dofn;
+  private final WindowingStrategy<?, ?> windowingStrategy;
+  private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
+  private final SerializedPipelineOptions serializedOptions;
+  private final Map<TupleTag<?>, Integer> outputMap;
+  private final TupleTag<OutputT> mainOutputTag;
+  private transient DoFnInvoker doFnInvoker;
+
+  public FlinkStatefulDoFnFunction(
+      DoFn<KV<K, V>, OutputT> dofn,
+      WindowingStrategy<?, ?> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions pipelineOptions,
+      Map<TupleTag<?>, Integer> outputMap,
+      TupleTag<OutputT> mainOutputTag) {
+
+    this.dofn = dofn;
+    this.windowingStrategy = windowingStrategy;
+    this.sideInputs = sideInputs;
+    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+    this.outputMap = outputMap;
+    this.mainOutputTag = mainOutputTag;
+  }
+
+  @Override
+  public void reduce(
+      Iterable<WindowedValue<KV<K, V>>> values,
+      Collector<WindowedValue<OutputT>> out) throws Exception {
+    RuntimeContext runtimeContext = getRuntimeContext();
+
+    DoFnRunners.OutputManager outputManager;
+    if (outputMap == null) {
+      outputManager = new FlinkDoFnFunction.DoFnOutputManager(out);
+    } else {
+      // it has some additional Outputs
+      outputManager =
+          new FlinkDoFnFunction.MultiDoFnOutputManager((Collector) out, outputMap);
+    }
+
+    final Iterator<WindowedValue<KV<K, V>>> iterator = values.iterator();
+
+    // get the first value, we need this for initializing the state internals with the key.
+    // we are guaranteed to have a first value, otherwise reduce() would not have been called.
+    WindowedValue<KV<K, V>> currentValue = iterator.next();
+    final K key = currentValue.getValue().getKey();
+
+    final InMemoryStateInternals<K> stateInternals = InMemoryStateInternals.forKey(key);
+
+    // Used with Batch, we know that all the data is available for this key. We can't use the
+    // timer manager from the context because it doesn't exist. So we create one and advance
+    // time to the end after processing all elements.
+    final InMemoryTimerInternals timerInternals = new InMemoryTimerInternals();
+    timerInternals.advanceProcessingTime(Instant.now());
+    timerInternals.advanceSynchronizedProcessingTime(Instant.now());
+
+    DoFnRunner<KV<K, V>, OutputT> doFnRunner = DoFnRunners.simpleRunner(
+        serializedOptions.getPipelineOptions(), dofn,
+        new FlinkSideInputReader(sideInputs, runtimeContext),
+        outputManager,
+        mainOutputTag,
+        // see SimpleDoFnRunner, just use it to limit number of additional outputs
+        Collections.<TupleTag<?>>emptyList(),
+        new FlinkNoOpStepContext() {
+          @Override
+          public StateInternals<?> stateInternals() {
+            return stateInternals;
+          }
+          @Override
+          public TimerInternals timerInternals() {
+            return timerInternals;
+          }
+        },
+        new FlinkAggregatorFactory(runtimeContext),
+        windowingStrategy);
+
+    doFnRunner.startBundle();
+
+    doFnRunner.processElement(currentValue);
+    while (iterator.hasNext()) {
+      currentValue = iterator.next();
+      doFnRunner.processElement(currentValue);
+    }
+
+    // Finish any pending windows by advancing the input watermark to infinity.
+    timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+    // Finally, advance the processing time to infinity to fire any timers.
+    timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
+    timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+    fireEligibleTimers(timerInternals, doFnRunner);
+
+    doFnRunner.finishBundle();
+  }
+
+  private void fireEligibleTimers(
+      InMemoryTimerInternals timerInternals, DoFnRunner<KV<K, V>, OutputT> runner)
+      throws Exception {
+
+    while (true) {
+
+      TimerInternals.TimerData timer;
+      boolean hasFired = false;
+
+      while ((timer = timerInternals.removeNextEventTimer()) != null) {
+        hasFired = true;
+        fireTimer(timer, runner);
+      }
+      while ((timer = timerInternals.removeNextProcessingTimer()) != null) {
+        hasFired = true;
+        fireTimer(timer, runner);
+      }
+      while ((timer = timerInternals.removeNextSynchronizedProcessingTimer()) != null) {
+        hasFired = true;
+        fireTimer(timer, runner);
+      }
+      if (!hasFired) {
+        break;
+      }
+    }
+  }
+
+  private void fireTimer(
+      TimerInternals.TimerData timer, DoFnRunner<KV<K, V>, OutputT> doFnRunner) {
+    StateNamespace namespace = timer.getNamespace();
+    checkArgument(namespace instanceof StateNamespaces.WindowNamespace);
+    BoundedWindow window = ((StateNamespaces.WindowNamespace) namespace).getWindow();
+    doFnRunner.onTimer(timer.getTimerId(), window, timer.getTimestamp(), timer.getDomain());
+  }
+
+  @Override
+  public void open(Configuration parameters) throws Exception {
+    doFnInvoker = DoFnInvokers.invokerFor(dofn);
+    doFnInvoker.invokeSetup();
+  }
+
+  @Override
+  public void close() throws Exception {
+    doFnInvoker.invokeTeardown();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java
new file mode 100644
index 0000000..12222b4
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/SideInputInitializer.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.flink.api.common.functions.BroadcastVariableInitializer;
+
+/**
+ * {@link BroadcastVariableInitializer} that initializes the broadcast input as a {@code Map}
+ * from window to side input.
+ */
+public class SideInputInitializer<ElemT, ViewT, W extends BoundedWindow>
+    implements BroadcastVariableInitializer<WindowedValue<ElemT>, Map<BoundedWindow, ViewT>> {
+
+  PCollectionView<ViewT> view;
+
+  public SideInputInitializer(PCollectionView<ViewT> view) {
+    this.view = view;
+  }
+
+  @Override
+  public Map<BoundedWindow, ViewT> initializeBroadcastVariable(
+      Iterable<WindowedValue<ElemT>> inputValues) {
+
+    // first partition into windows
+    Map<BoundedWindow, List<WindowedValue<ElemT>>> partitionedElements = new HashMap<>();
+    for (WindowedValue<ElemT> value: inputValues) {
+      for (BoundedWindow window: value.getWindows()) {
+        List<WindowedValue<ElemT>> windowedValues = partitionedElements.get(window);
+        if (windowedValues == null) {
+          windowedValues = new ArrayList<>();
+          partitionedElements.put(window, windowedValues);
+        }
+        windowedValues.add(value);
+      }
+    }
+
+    Map<BoundedWindow, ViewT> resultMap = new HashMap<>();
+
+    for (Map.Entry<BoundedWindow, List<WindowedValue<ElemT>>> elements:
+        partitionedElements.entrySet()) {
+
+      @SuppressWarnings("unchecked")
+      Iterable<WindowedValue<?>> elementsIterable =
+          (List<WindowedValue<?>>) (List<?>) elements.getValue();
+
+      resultMap.put(elements.getKey(), view.getViewFn().apply(elementsIterable));
+    }
+
+    return resultMap;
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/package-info.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/package-info.java
new file mode 100644
index 0000000..9f11212
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internal implementation of the Beam runner for Apache Flink.
+ */
+package org.apache.beam.runners.flink.translation.functions;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/package-info.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/package-info.java
new file mode 100644
index 0000000..af4b354
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internal implementation of the Beam runner for Apache Flink.
+ */
+package org.apache.beam.runners.flink.translation;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
new file mode 100644
index 0000000..9b449aa
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeInformation.java
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.types;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.typeinfo.AtomicType;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.common.typeutils.TypeComparator;
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+
+/**
+ * Flink {@link org.apache.flink.api.common.typeinfo.TypeInformation} for
+ * Dataflow {@link org.apache.beam.sdk.coders.Coder}s.
+ */
+public class CoderTypeInformation<T> extends TypeInformation<T> implements AtomicType<T> {
+
+  private final Coder<T> coder;
+
+  public CoderTypeInformation(Coder<T> coder) {
+    checkNotNull(coder);
+    this.coder = coder;
+  }
+
+  public Coder<T> getCoder() {
+    return coder;
+  }
+
+  @Override
+  public boolean isBasicType() {
+    return false;
+  }
+
+  @Override
+  public boolean isTupleType() {
+    return false;
+  }
+
+  @Override
+  public int getArity() {
+    return 1;
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public Class<T> getTypeClass() {
+    // We don't have the Class, so we have to pass null here. What a shame...
+    return (Class<T>) Object.class;
+  }
+
+  @Override
+  public boolean isKeyType() {
+    return true;
+  }
+
+  @Override
+  @SuppressWarnings("unchecked")
+  public TypeSerializer<T> createSerializer(ExecutionConfig config) {
+    return new CoderTypeSerializer<>(coder);
+  }
+
+  @Override
+  public int getTotalFields() {
+    return 2;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+
+    CoderTypeInformation that = (CoderTypeInformation) o;
+
+    return coder.equals(that.coder);
+
+  }
+
+  @Override
+  public int hashCode() {
+    return coder.hashCode();
+  }
+
+  @Override
+  public boolean canEqual(Object obj) {
+    return obj instanceof CoderTypeInformation;
+  }
+
+  @Override
+  public String toString() {
+    return "CoderTypeInformation{coder=" + coder + '}';
+  }
+
+  @Override
+  public TypeComparator<T> createComparator(boolean sortOrderAscending, ExecutionConfig
+      executionConfig) {
+    throw new UnsupportedOperationException(
+        "Non-encoded values cannot be compared directly.");
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
new file mode 100644
index 0000000..e210ed9
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.types;
+
+import java.io.EOFException;
+import java.io.IOException;
+import org.apache.beam.runners.flink.translation.wrappers.DataInputViewWrapper;
+import org.apache.beam.runners.flink.translation.wrappers.DataOutputViewWrapper;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.apache.flink.core.memory.DataInputView;
+import org.apache.flink.core.memory.DataOutputView;
+
+/**
+ * Flink {@link org.apache.flink.api.common.typeutils.TypeSerializer} for
+ * Dataflow {@link org.apache.beam.sdk.coders.Coder Coders}.
+ */
+public class CoderTypeSerializer<T> extends TypeSerializer<T> {
+
+  private Coder<T> coder;
+
+  public CoderTypeSerializer(Coder<T> coder) {
+    this.coder = coder;
+  }
+
+  @Override
+  public boolean isImmutableType() {
+    return false;
+  }
+
+  @Override
+  public CoderTypeSerializer<T> duplicate() {
+    return new CoderTypeSerializer<>(coder);
+  }
+
+  @Override
+  public T createInstance() {
+    return null;
+  }
+
+  @Override
+  public T copy(T t) {
+    try {
+      return CoderUtils.clone(coder, t);
+    } catch (CoderException e) {
+      throw new RuntimeException("Could not clone.", e);
+    }
+  }
+
+  @Override
+  public T copy(T t, T reuse) {
+    return copy(t);
+  }
+
+  @Override
+  public int getLength() {
+    return -1;
+  }
+
+  @Override
+  public void serialize(T t, DataOutputView dataOutputView) throws IOException {
+    DataOutputViewWrapper outputWrapper = new DataOutputViewWrapper(dataOutputView);
+    coder.encode(t, outputWrapper, Coder.Context.NESTED);
+  }
+
+  @Override
+  public T deserialize(DataInputView dataInputView) throws IOException {
+    try {
+      DataInputViewWrapper inputWrapper = new DataInputViewWrapper(dataInputView);
+      return coder.decode(inputWrapper, Coder.Context.NESTED);
+    } catch (CoderException e) {
+      Throwable cause = e.getCause();
+      if (cause instanceof EOFException) {
+        throw (EOFException) cause;
+      } else {
+        throw e;
+      }
+    }
+  }
+
+  @Override
+  public T deserialize(T t, DataInputView dataInputView) throws IOException {
+    return deserialize(dataInputView);
+  }
+
+  @Override
+  public void copy(
+      DataInputView dataInputView,
+      DataOutputView dataOutputView) throws IOException {
+    serialize(deserialize(dataInputView), dataOutputView);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+
+    CoderTypeSerializer that = (CoderTypeSerializer) o;
+    return coder.equals(that.coder);
+  }
+
+  @Override
+  public boolean canEqual(Object obj) {
+    return obj instanceof CoderTypeSerializer;
+  }
+
+  @Override
+  public int hashCode() {
+    return coder.hashCode();
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueComparator.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueComparator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueComparator.java
new file mode 100644
index 0000000..667ef45
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueComparator.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.types;
+
+import java.io.IOException;
+import java.util.Arrays;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.flink.api.common.typeutils.TypeComparator;
+import org.apache.flink.core.memory.DataInputView;
+import org.apache.flink.core.memory.DataOutputView;
+import org.apache.flink.core.memory.MemorySegment;
+
+/**
+ * Flink {@link org.apache.flink.api.common.typeutils.TypeComparator} for Beam values that have
+ * been encoded to byte data by a {@link Coder}.
+ */
+public class EncodedValueComparator extends TypeComparator<byte[]> {
+
+  /** For storing the Reference in encoded form. */
+  private transient byte[] encodedReferenceKey;
+
+  private final boolean ascending;
+
+  public EncodedValueComparator(boolean ascending) {
+    this.ascending = ascending;
+  }
+
+  @Override
+  public int hash(byte[] record) {
+    return Arrays.hashCode(record);
+  }
+
+  @Override
+  public void setReference(byte[] toCompare) {
+    this.encodedReferenceKey = toCompare;
+  }
+
+  @Override
+  public boolean equalToReference(byte[] candidate) {
+    if (encodedReferenceKey.length != candidate.length) {
+      return false;
+    }
+    int len = candidate.length;
+    for (int i = 0; i < len; i++) {
+      if (encodedReferenceKey[i] != candidate[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  @Override
+  public int compareToReference(TypeComparator<byte[]> other) {
+    // VERY IMPORTANT: compareToReference does not behave like Comparable.compare
+    // the meaning of the return value is inverted.
+
+    EncodedValueComparator otherEncodedValueComparator = (EncodedValueComparator) other;
+
+    int len = Math.min(
+        encodedReferenceKey.length,
+        otherEncodedValueComparator.encodedReferenceKey.length);
+
+    for (int i = 0; i < len; i++) {
+      byte b1 = encodedReferenceKey[i];
+      byte b2 = otherEncodedValueComparator.encodedReferenceKey[i];
+      int result = (b1 < b2 ? -1 : (b1 == b2 ? 0 : 1));
+      if (result != 0) {
+        return ascending ? -result : result;
+      }
+    }
+    int result =
+        encodedReferenceKey.length - otherEncodedValueComparator.encodedReferenceKey.length;
+    return ascending ? -result : result;
+  }
+
+
+  @Override
+  public int compare(byte[] first, byte[] second) {
+    int len = Math.min(first.length, second.length);
+    for (int i = 0; i < len; i++) {
+      byte b1 = first[i];
+      byte b2 = second[i];
+      int result = (b1 < b2 ? -1 : (b1 == b2 ? 0 : 1));
+      if (result != 0) {
+        return ascending ? result : -result;
+      }
+    }
+    int result = first.length - second.length;
+    return ascending ? result : -result;
+  }
+
+  @Override
+  public int compareSerialized(
+      DataInputView firstSource,
+      DataInputView secondSource) throws IOException {
+    int lengthFirst = firstSource.readInt();
+    int lengthSecond = secondSource.readInt();
+
+    int len = Math.min(lengthFirst, lengthSecond);
+    for (int i = 0; i < len; i++) {
+      byte b1 = firstSource.readByte();
+      byte b2 = secondSource.readByte();
+      int result = (b1 < b2 ? -1 : (b1 == b2 ? 0 : 1));
+      if (result != 0) {
+        return ascending ? result : -result;
+      }
+    }
+
+    int result = lengthFirst - lengthSecond;
+    return ascending ? result : -result;
+  }
+
+
+
+  @Override
+  public boolean supportsNormalizedKey() {
+    // disabled because this seems to not work with some coders,
+    // such as the AvroCoder
+    return false;
+  }
+
+  @Override
+  public boolean supportsSerializationWithKeyNormalization() {
+    return false;
+  }
+
+  @Override
+  public int getNormalizeKeyLen() {
+    return Integer.MAX_VALUE;
+  }
+
+  @Override
+  public boolean isNormalizedKeyPrefixOnly(int keyBytes) {
+    return true;
+  }
+
+  @Override
+  public void putNormalizedKey(byte[] record, MemorySegment target, int offset, int numBytes) {
+    final int limit = offset + numBytes;
+
+    target.put(offset, record, 0, Math.min(numBytes, record.length));
+
+    offset += record.length;
+
+    while (offset < limit) {
+      target.put(offset++, (byte) 0);
+    }
+  }
+
+  @Override
+  public void writeWithKeyNormalization(byte[] record, DataOutputView target) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public byte[] readWithKeyDenormalization(byte[] reuse, DataInputView source) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public boolean invertNormalizedKey() {
+    return !ascending;
+  }
+
+  @Override
+  public TypeComparator<byte[]> duplicate() {
+    return new EncodedValueComparator(ascending);
+  }
+
+  @Override
+  public int extractKeys(Object record, Object[] target, int index) {
+    target[index] = record;
+    return 1;
+  }
+
+  @Override
+  public TypeComparator[] getFlatComparators() {
+    return new TypeComparator[] { this.duplicate() };
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueSerializer.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueSerializer.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueSerializer.java
new file mode 100644
index 0000000..41db61e
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/types/EncodedValueSerializer.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.types;
+
+import java.io.IOException;
+
+import org.apache.beam.sdk.coders.Coder;
+
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.apache.flink.core.memory.DataInputView;
+import org.apache.flink.core.memory.DataOutputView;
+
+/**
+ * {@link TypeSerializer} for values that were encoded using a {@link Coder}.
+ */
+public final class EncodedValueSerializer extends TypeSerializer<byte[]> {
+
+  private static final long serialVersionUID = 1L;
+
+  private static final byte[] EMPTY = new byte[0];
+
+  @Override
+  public boolean isImmutableType() {
+    return true;
+  }
+
+  @Override
+  public byte[] createInstance() {
+    return EMPTY;
+  }
+
+  @Override
+  public byte[] copy(byte[] from) {
+    return from;
+  }
+
+  @Override
+  public byte[] copy(byte[] from, byte[] reuse) {
+    return copy(from);
+  }
+
+  @Override
+  public int getLength() {
+    return -1;
+  }
+
+
+  @Override
+  public void serialize(byte[] record, DataOutputView target) throws IOException {
+    if (record == null) {
+      throw new IllegalArgumentException("The record must not be null.");
+    }
+
+    final int len = record.length;
+    target.writeInt(len);
+    target.write(record);
+  }
+
+  @Override
+  public byte[] deserialize(DataInputView source) throws IOException {
+    final int len = source.readInt();
+    byte[] result = new byte[len];
+    source.readFully(result);
+    return result;
+  }
+
+  @Override
+  public byte[] deserialize(byte[] reuse, DataInputView source) throws IOException {
+    return deserialize(source);
+  }
+
+  @Override
+  public void copy(DataInputView source, DataOutputView target) throws IOException {
+    final int len = source.readInt();
+    target.writeInt(len);
+    target.write(source, len);
+  }
+
+  @Override
+  public boolean canEqual(Object obj) {
+    return obj instanceof EncodedValueSerializer;
+  }
+
+  @Override
+  public int hashCode() {
+    return this.getClass().hashCode();
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    return obj instanceof EncodedValueSerializer;
+  }
+
+  @Override
+  public TypeSerializer<byte[]> duplicate() {
+    return this;
+  }
+}

[45/50] [abbrv] beam git commit: This closes #2587

Posted by dh...@apache.org.

This closes #2587


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/29e054a8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/29e054a8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/29e054a8

Branch: refs/heads/DSL_SQL
Commit: 29e054a8d7ffe6a061dbbe9a1885185b02f4e8ec
Parents: 714fdd2 418c304
Author: Thomas Groh <tg...@google.com>
Authored: Wed Apr 19 10:53:30 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Wed Apr 19 10:53:30 2017 -0700

----------------------------------------------------------------------
 .../core/construction/UnconsumedReads.java      |  72 +++++++++++++
 .../core/construction/UnconsumedReadsTest.java  | 105 +++++++++++++++++++
 .../beam/runners/dataflow/DataflowRunner.java   |   4 +
 .../runners/dataflow/DataflowRunnerTest.java    |  24 +++++
 4 files changed, 205 insertions(+)
----------------------------------------------------------------------

[08/50] [abbrv] beam git commit: Creates ProcessFnRunner and wires it through ParDoEvaluator

Posted by dh...@apache.org.

Creates ProcessFnRunner and wires it through ParDoEvaluator


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b93de58f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b93de58f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b93de58f

Branch: refs/heads/DSL_SQL
Commit: b93de58f5a3a10877997815a793725cb0e53cc2d
Parents: 7e1a267
Author: Eugene Kirpichov <ki...@google.com>
Authored: Mon Apr 17 14:52:23 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 18:02:07 2017 -0700

----------------------------------------------------------------------
 .../apache/beam/runners/core/DoFnRunners.java   |  32 +++++
 .../beam/runners/core/ProcessFnRunner.java      | 127 +++++++++++++++++++
 .../beam/runners/direct/ParDoEvaluator.java     | 114 +++++++++++++----
 .../runners/direct/ParDoEvaluatorFactory.java   |  11 +-
 ...littableProcessElementsEvaluatorFactory.java | 106 ++++++++++++----
 .../direct/StatefulParDoEvaluatorFactory.java   |   4 +-
 .../direct/TransformEvaluatorRegistry.java      |   4 +-
 .../beam/runners/direct/ParDoEvaluatorTest.java |   3 +-
 8 files changed, 341 insertions(+), 60 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/b93de58f/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunners.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunners.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunners.java
index b09ee08..8501e72 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunners.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/DoFnRunners.java
@@ -17,8 +17,10 @@
  */
 package org.apache.beam.runners.core;
 
+import java.util.Collection;
 import java.util.List;
 import org.apache.beam.runners.core.ExecutionContext.StepContext;
+import org.apache.beam.runners.core.SplittableParDo.ProcessFn;
 import org.apache.beam.runners.core.StatefulDoFnRunner.CleanupTimer;
 import org.apache.beam.runners.core.StatefulDoFnRunner.StateCleaner;
 import org.apache.beam.sdk.options.PipelineOptions;
@@ -26,10 +28,12 @@ import org.apache.beam.sdk.transforms.Aggregator;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.Sum;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.util.ReadyCheckingSideInputReader;
 import org.apache.beam.sdk.util.SideInputReader;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
 
 /**
@@ -146,4 +150,32 @@ public class DoFnRunners {
         stateCleaner,
         droppedDueToLateness);
   }
+
+  public static <InputT, OutputT, RestrictionT>
+  ProcessFnRunner<InputT, OutputT, RestrictionT>
+  newProcessFnRunner(
+      ProcessFn<InputT, OutputT, RestrictionT, ?> fn,
+      PipelineOptions options,
+      Collection<PCollectionView<?>> views,
+      ReadyCheckingSideInputReader sideInputReader,
+      OutputManager outputManager,
+      TupleTag<OutputT> mainOutputTag,
+      List<TupleTag<?>> additionalOutputTags,
+      StepContext stepContext,
+      AggregatorFactory aggregatorFactory,
+      WindowingStrategy<?, ?> windowingStrategy) {
+    return new ProcessFnRunner<>(
+        simpleRunner(
+            options,
+            fn,
+            sideInputReader,
+            outputManager,
+            mainOutputTag,
+            additionalOutputTags,
+            stepContext,
+            aggregatorFactory,
+            windowingStrategy),
+        views,
+        sideInputReader);
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/b93de58f/runners/core-java/src/main/java/org/apache/beam/runners/core/ProcessFnRunner.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/ProcessFnRunner.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/ProcessFnRunner.java
new file mode 100644
index 0000000..3ae3f50
--- /dev/null
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/ProcessFnRunner.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.core;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static org.apache.beam.runners.core.SplittableParDo.ProcessFn;
+
+import com.google.common.collect.Iterables;
+import java.util.Collection;
+import java.util.Collections;
+import org.apache.beam.runners.core.StateNamespaces.WindowNamespace;
+import org.apache.beam.runners.core.TimerInternals.TimerData;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.util.ReadyCheckingSideInputReader;
+import org.apache.beam.sdk.util.TimeDomain;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.joda.time.Instant;
+
+/** Runs a {@link ProcessFn} by constructing the appropriate contexts and passing them in. */
+public class ProcessFnRunner<InputT, OutputT, RestrictionT>
+    implements PushbackSideInputDoFnRunner<
+        KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT> {
+  private final DoFnRunner<
+          KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>
+      underlying;
+  private final Collection<PCollectionView<?>> views;
+  private final ReadyCheckingSideInputReader sideInputReader;
+
+  ProcessFnRunner(
+      DoFnRunner<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>
+          underlying,
+      Collection<PCollectionView<?>> views,
+      ReadyCheckingSideInputReader sideInputReader) {
+    this.underlying = underlying;
+    this.views = views;
+    this.sideInputReader = sideInputReader;
+  }
+
+  @Override
+  public void startBundle() {
+    underlying.startBundle();
+  }
+
+  @Override
+  public Iterable<WindowedValue<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>>
+      processElementInReadyWindows(
+          WindowedValue<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>
+              windowedKWI) {
+    checkTrivialOuterWindows(windowedKWI);
+    BoundedWindow window = getUnderlyingWindow(windowedKWI.getValue());
+    if (!isReady(window)) {
+      return Collections.singletonList(windowedKWI);
+    }
+    underlying.processElement(windowedKWI);
+    return Collections.emptyList();
+  }
+
+  @Override
+  public void finishBundle() {
+    underlying.finishBundle();
+  }
+
+  @Override
+  public void onTimer(
+      String timerId, BoundedWindow window, Instant timestamp, TimeDomain timeDomain) {
+    throw new UnsupportedOperationException("User timers unsupported in ProcessFn");
+  }
+
+  private static <T> void checkTrivialOuterWindows(
+      WindowedValue<KeyedWorkItem<String, T>> windowedKWI) {
+    // In practice it will be in 0 or 1 windows (ValueInEmptyWindows or ValueInGlobalWindow)
+    Collection<? extends BoundedWindow> outerWindows = windowedKWI.getWindows();
+    if (!outerWindows.isEmpty()) {
+      checkArgument(
+          outerWindows.size() == 1,
+          "The KeyedWorkItem itself must not be in multiple windows, but was in: %s",
+          outerWindows);
+      BoundedWindow onlyWindow = Iterables.getOnlyElement(outerWindows);
+      checkArgument(
+          onlyWindow instanceof GlobalWindow,
+          "KeyedWorkItem must be in the Global window, but was in: %s",
+          onlyWindow);
+    }
+  }
+
+  private static <T> BoundedWindow getUnderlyingWindow(KeyedWorkItem<String, T> kwi) {
+    if (Iterables.isEmpty(kwi.elementsIterable())) {
+      // ProcessFn sets only a single timer.
+      TimerData timer = Iterables.getOnlyElement(kwi.timersIterable());
+      return ((WindowNamespace) timer.getNamespace()).getWindow();
+    } else {
+      // KWI must have a single element in elementsIterable, because it follows a GBK by a
+      // uniquely generated key.
+      // Additionally, windows must be exploded before GBKIntoKeyedWorkItems, so there's also
+      // only a single window.
+      WindowedValue<T> value = Iterables.getOnlyElement(kwi.elementsIterable());
+      return Iterables.getOnlyElement(value.getWindows());
+    }
+  }
+
+  private boolean isReady(BoundedWindow mainInputWindow) {
+    for (PCollectionView<?> view : views) {
+      BoundedWindow sideInputWindow = view.getWindowMappingFn().getSideInputWindow(mainInputWindow);
+      if (!sideInputReader.isReady(view, sideInputWindow)) {
+        return false;
+      }
+    }
+    return true;
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/b93de58f/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
index bab7b2c..cab11db 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluator.java
@@ -30,6 +30,7 @@ import org.apache.beam.runners.core.SimplePushbackSideInputDoFnRunner;
 import org.apache.beam.runners.core.TimerInternals.TimerData;
 import org.apache.beam.runners.direct.DirectExecutionContext.DirectStepContext;
 import org.apache.beam.runners.direct.DirectRunner.UncommittedBundle;
+import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.transforms.AppliedPTransform;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
@@ -43,6 +44,50 @@ import org.apache.beam.sdk.values.TupleTag;
 
 class ParDoEvaluator<InputT> implements TransformEvaluator<InputT> {
 
+  public interface DoFnRunnerFactory<InputT, OutputT> {
+    PushbackSideInputDoFnRunner<InputT, OutputT> createRunner(
+        PipelineOptions options,
+        DoFn<InputT, OutputT> fn,
+        List<PCollectionView<?>> sideInputs,
+        ReadyCheckingSideInputReader sideInputReader,
+        OutputManager outputManager,
+        TupleTag<OutputT> mainOutputTag,
+        List<TupleTag<?>> additionalOutputTags,
+        DirectStepContext stepContext,
+        AggregatorContainer.Mutator aggregatorChanges,
+        WindowingStrategy<?, ? extends BoundedWindow> windowingStrategy);
+  }
+
+  public static <InputT, OutputT> DoFnRunnerFactory<InputT, OutputT> defaultRunnerFactory() {
+    return new DoFnRunnerFactory<InputT, OutputT>() {
+      @Override
+      public PushbackSideInputDoFnRunner<InputT, OutputT> createRunner(
+          PipelineOptions options,
+          DoFn<InputT, OutputT> fn,
+          List<PCollectionView<?>> sideInputs,
+          ReadyCheckingSideInputReader sideInputReader,
+          OutputManager outputManager,
+          TupleTag<OutputT> mainOutputTag,
+          List<TupleTag<?>> additionalOutputTags,
+          DirectStepContext stepContext,
+          AggregatorContainer.Mutator aggregatorChanges,
+          WindowingStrategy<?, ? extends BoundedWindow> windowingStrategy) {
+        DoFnRunner<InputT, OutputT> underlying =
+            DoFnRunners.simpleRunner(
+                options,
+                fn,
+                sideInputReader,
+                outputManager,
+                mainOutputTag,
+                additionalOutputTags,
+                stepContext,
+                aggregatorChanges,
+                windowingStrategy);
+        return SimplePushbackSideInputDoFnRunner.create(underlying, sideInputs, sideInputReader);
+      }
+    };
+  }
+
   public static <InputT, OutputT> ParDoEvaluator<InputT> create(
       EvaluationContext evaluationContext,
       DirectStepContext stepContext,
@@ -53,9 +98,43 @@ class ParDoEvaluator<InputT> implements TransformEvaluator<InputT> {
       List<PCollectionView<?>> sideInputs,
       TupleTag<OutputT> mainOutputTag,
       List<TupleTag<?>> additionalOutputTags,
-      Map<TupleTag<?>, PCollection<?>> outputs) {
+      Map<TupleTag<?>, PCollection<?>> outputs,
+      DoFnRunnerFactory<InputT, OutputT> runnerFactory) {
     AggregatorContainer.Mutator aggregatorChanges = evaluationContext.getAggregatorMutator();
 
+    BundleOutputManager outputManager = createOutputManager(evaluationContext, key, outputs);
+
+    ReadyCheckingSideInputReader sideInputReader =
+        evaluationContext.createSideInputReader(sideInputs);
+
+    PushbackSideInputDoFnRunner<InputT, OutputT> runner = runnerFactory.createRunner(
+        evaluationContext.getPipelineOptions(),
+        fn,
+        sideInputs,
+        sideInputReader,
+        outputManager,
+        mainOutputTag,
+        additionalOutputTags,
+        stepContext,
+        aggregatorChanges,
+        windowingStrategy);
+
+    return create(runner, stepContext, application, aggregatorChanges, outputManager);
+  }
+
+  public static <InputT, OutputT> ParDoEvaluator<InputT> create(
+      PushbackSideInputDoFnRunner<InputT, OutputT> runner,
+      DirectStepContext stepContext,
+      AppliedPTransform<?, ?, ?> application,
+      AggregatorContainer.Mutator aggregatorChanges,
+      BundleOutputManager outputManager) {
+    return new ParDoEvaluator<>(runner, application, aggregatorChanges, outputManager, stepContext);
+  }
+
+  static BundleOutputManager createOutputManager(
+      EvaluationContext evaluationContext,
+      StructuralKey<?> key,
+      Map<TupleTag<?>, PCollection<?>> outputs) {
     Map<TupleTag<?>, UncommittedBundle<?>> outputBundles = new HashMap<>();
     for (Map.Entry<TupleTag<?>, PCollection<?>> outputEntry : outputs.entrySet()) {
       // Just trust the context's decision as to whether the output should be keyed.
@@ -69,32 +148,7 @@ class ParDoEvaluator<InputT> implements TransformEvaluator<InputT> {
             outputEntry.getKey(), evaluationContext.createBundle(outputEntry.getValue()));
       }
     }
-    BundleOutputManager outputManager = BundleOutputManager.create(outputBundles);
-
-    ReadyCheckingSideInputReader sideInputReader =
-        evaluationContext.createSideInputReader(sideInputs);
-
-    DoFnRunner<InputT, OutputT> underlying =
-        DoFnRunners.simpleRunner(
-            evaluationContext.getPipelineOptions(),
-            fn,
-            sideInputReader,
-            outputManager,
-            mainOutputTag,
-            additionalOutputTags,
-            stepContext,
-            aggregatorChanges,
-            windowingStrategy);
-    PushbackSideInputDoFnRunner<InputT, OutputT> runner =
-        SimplePushbackSideInputDoFnRunner.create(underlying, sideInputs, sideInputReader);
-
-    try {
-      runner.startBundle();
-    } catch (Exception e) {
-      throw UserCodeException.wrap(e);
-    }
-
-    return new ParDoEvaluator<>(runner, application, aggregatorChanges, outputManager, stepContext);
+    return BundleOutputManager.create(outputBundles);
   }
 
   ////////////////////////////////////////////////////////////////////////////////////////////////
@@ -119,6 +173,12 @@ class ParDoEvaluator<InputT> implements TransformEvaluator<InputT> {
     this.stepContext = stepContext;
     this.aggregatorChanges = aggregatorChanges;
     this.unprocessedElements = ImmutableList.builder();
+
+    try {
+      fnRunner.startBundle();
+    } catch (Exception e) {
+      throw UserCodeException.wrap(e);
+    }
   }
 
   public BundleOutputManager getOutputManager() {

http://git-wip-us.apache.org/repos/asf/beam/blob/b93de58f/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluatorFactory.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluatorFactory.java
index 93f204a..b00c2b6 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluatorFactory.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/ParDoEvaluatorFactory.java
@@ -43,9 +43,13 @@ final class ParDoEvaluatorFactory<InputT, OutputT> implements TransformEvaluator
   private static final Logger LOG = LoggerFactory.getLogger(ParDoEvaluatorFactory.class);
   private final LoadingCache<DoFn<?, ?>, DoFnLifecycleManager> fnClones;
   private final EvaluationContext evaluationContext;
+  private final ParDoEvaluator.DoFnRunnerFactory<InputT, OutputT> runnerFactory;
 
-  ParDoEvaluatorFactory(EvaluationContext evaluationContext) {
+  ParDoEvaluatorFactory(
+      EvaluationContext evaluationContext,
+      ParDoEvaluator.DoFnRunnerFactory<InputT, OutputT> runnerFactory) {
     this.evaluationContext = evaluationContext;
+    this.runnerFactory = runnerFactory;
     fnClones =
         CacheBuilder.newBuilder()
             .build(
@@ -148,7 +152,8 @@ final class ParDoEvaluatorFactory<InputT, OutputT> implements TransformEvaluator
           sideInputs,
           mainOutputTag,
           additionalOutputTags,
-          pcollections(application.getOutputs()));
+          pcollections(application.getOutputs()),
+          runnerFactory);
     } catch (Exception e) {
       try {
         fnManager.remove();
@@ -162,7 +167,7 @@ final class ParDoEvaluatorFactory<InputT, OutputT> implements TransformEvaluator
     }
   }
 
-  private Map<TupleTag<?>, PCollection<?>> pcollections(Map<TupleTag<?>, PValue> outputs) {
+  static Map<TupleTag<?>, PCollection<?>> pcollections(Map<TupleTag<?>, PValue> outputs) {
     Map<TupleTag<?>, PCollection<?>> pcs = new HashMap<>();
     for (Map.Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
       pcs.put(output.getKey(), (PCollection<?>) output.getValue());

http://git-wip-us.apache.org/repos/asf/beam/blob/b93de58f/runners/direct-java/src/main/java/org/apache/beam/runners/direct/SplittableProcessElementsEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/SplittableProcessElementsEvaluatorFactory.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/SplittableProcessElementsEvaluatorFactory.java
index 00b16dd..7efdb52 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/SplittableProcessElementsEvaluatorFactory.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/SplittableProcessElementsEvaluatorFactory.java
@@ -18,25 +18,34 @@
 package org.apache.beam.runners.direct;
 
 import java.util.Collection;
+import java.util.List;
 import java.util.concurrent.Executors;
+import org.apache.beam.runners.core.DoFnRunners;
 import org.apache.beam.runners.core.DoFnRunners.OutputManager;
 import org.apache.beam.runners.core.ElementAndRestriction;
 import org.apache.beam.runners.core.KeyedWorkItem;
 import org.apache.beam.runners.core.OutputAndTimeBoundedSplittableProcessElementInvoker;
 import org.apache.beam.runners.core.OutputWindowedValue;
+import org.apache.beam.runners.core.PushbackSideInputDoFnRunner;
 import org.apache.beam.runners.core.SplittableParDo;
+import org.apache.beam.runners.core.SplittableParDo.ProcessFn;
 import org.apache.beam.runners.core.StateInternals;
 import org.apache.beam.runners.core.StateInternalsFactory;
 import org.apache.beam.runners.core.TimerInternals;
 import org.apache.beam.runners.core.TimerInternalsFactory;
 import org.apache.beam.runners.direct.DirectRunner.CommittedBundle;
+import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.transforms.AppliedPTransform;
+import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.ReadyCheckingSideInputReader;
 import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.PCollectionView;
 import org.apache.beam.sdk.values.TupleTag;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
@@ -51,7 +60,11 @@ class SplittableProcessElementsEvaluatorFactory<
 
   SplittableProcessElementsEvaluatorFactory(EvaluationContext evaluationContext) {
     this.evaluationContext = evaluationContext;
-    this.delegateFactory = new ParDoEvaluatorFactory<>(evaluationContext);
+    this.delegateFactory =
+        new ParDoEvaluatorFactory<>(
+            evaluationContext,
+            SplittableProcessElementsEvaluatorFactory
+                .<InputT, OutputT, RestrictionT>processFnRunnerFactory());
   }
 
   @Override
@@ -82,12 +95,12 @@ class SplittableProcessElementsEvaluatorFactory<
     final SplittableParDo.ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform =
         application.getTransform();
 
-    SplittableParDo.ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn =
+    ProcessFn<InputT, OutputT, RestrictionT, TrackerT> processFn =
         transform.newProcessFn(transform.getFn());
 
     DoFnLifecycleManager fnManager = DoFnLifecycleManager.of(processFn);
     processFn =
-        ((SplittableParDo.ProcessFn<InputT, OutputT, RestrictionT, TrackerT>)
+        ((ProcessFn<InputT, OutputT, RestrictionT, TrackerT>)
             fnManager
                 .<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>
                     get());
@@ -98,7 +111,7 @@ class SplittableProcessElementsEvaluatorFactory<
             .getExecutionContext(application, inputBundle.getKey())
             .getOrCreateStepContext(stepName, stepName);
 
-    ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>
+    final ParDoEvaluator<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>
         parDoEvaluator =
             delegateFactory.createParDoEvaluator(
                 application,
@@ -127,34 +140,36 @@ class SplittableProcessElementsEvaluatorFactory<
           }
         });
 
-    final OutputManager outputManager = parDoEvaluator.getOutputManager();
+    OutputWindowedValue<OutputT> outputWindowedValue =
+        new OutputWindowedValue<OutputT>() {
+          private final OutputManager outputManager = parDoEvaluator.getOutputManager();
+
+          @Override
+          public void outputWindowedValue(
+              OutputT output,
+              Instant timestamp,
+              Collection<? extends BoundedWindow> windows,
+              PaneInfo pane) {
+            outputManager.output(
+                transform.getMainOutputTag(), WindowedValue.of(output, timestamp, windows, pane));
+          }
+
+          @Override
+          public <AdditionalOutputT> void outputWindowedValue(
+              TupleTag<AdditionalOutputT> tag,
+              AdditionalOutputT output,
+              Instant timestamp,
+              Collection<? extends BoundedWindow> windows,
+              PaneInfo pane) {
+            outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
+          }
+        };
     processFn.setProcessElementInvoker(
         new OutputAndTimeBoundedSplittableProcessElementInvoker<
             InputT, OutputT, RestrictionT, TrackerT>(
             transform.getFn(),
             evaluationContext.getPipelineOptions(),
-            new OutputWindowedValue<OutputT>() {
-              @Override
-              public void outputWindowedValue(
-                  OutputT output,
-                  Instant timestamp,
-                  Collection<? extends BoundedWindow> windows,
-                  PaneInfo pane) {
-                outputManager.output(
-                    transform.getMainOutputTag(),
-                    WindowedValue.of(output, timestamp, windows, pane));
-              }
-
-              @Override
-              public <AdditionalOutputT> void outputWindowedValue(
-                  TupleTag<AdditionalOutputT> tag,
-                  AdditionalOutputT output,
-                  Instant timestamp,
-                  Collection<? extends BoundedWindow> windows,
-                  PaneInfo pane) {
-                outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
-              }
-            },
+            outputWindowedValue,
             evaluationContext.createSideInputReader(transform.getSideInputs()),
             // TODO: For better performance, use a higher-level executor?
             Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()),
@@ -163,4 +178,41 @@ class SplittableProcessElementsEvaluatorFactory<
 
     return DoFnLifecycleManagerRemovingTransformEvaluator.wrapping(parDoEvaluator, fnManager);
   }
+
+  private static <InputT, OutputT, RestrictionT>
+  ParDoEvaluator.DoFnRunnerFactory<
+                KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>
+          processFnRunnerFactory() {
+    return new ParDoEvaluator.DoFnRunnerFactory<
+            KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>() {
+      @Override
+      public PushbackSideInputDoFnRunner<
+          KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>
+      createRunner(
+          PipelineOptions options,
+          DoFn<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT> fn,
+          List<PCollectionView<?>> sideInputs,
+          ReadyCheckingSideInputReader sideInputReader,
+          OutputManager outputManager,
+          TupleTag<OutputT> mainOutputTag,
+          List<TupleTag<?>> additionalOutputTags,
+          DirectExecutionContext.DirectStepContext stepContext,
+          AggregatorContainer.Mutator aggregatorChanges,
+          WindowingStrategy<?, ? extends BoundedWindow> windowingStrategy) {
+        ProcessFn<InputT, OutputT, RestrictionT, ?> processFn =
+            (ProcessFn) fn;
+        return DoFnRunners.newProcessFnRunner(
+            processFn,
+            options,
+            sideInputs,
+            sideInputReader,
+            outputManager,
+            mainOutputTag,
+            additionalOutputTags,
+            stepContext,
+            aggregatorChanges,
+            windowingStrategy);
+      }
+    };
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/b93de58f/runners/direct-java/src/main/java/org/apache/beam/runners/direct/StatefulParDoEvaluatorFactory.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/StatefulParDoEvaluatorFactory.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/StatefulParDoEvaluatorFactory.java
index be77ea1..8793ae8 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/StatefulParDoEvaluatorFactory.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/StatefulParDoEvaluatorFactory.java
@@ -65,7 +65,9 @@ final class StatefulParDoEvaluatorFactory<K, InputT, OutputT> implements Transfo
   private final ParDoEvaluatorFactory<KV<K, InputT>, OutputT> delegateFactory;
 
   StatefulParDoEvaluatorFactory(EvaluationContext evaluationContext) {
-    this.delegateFactory = new ParDoEvaluatorFactory<>(evaluationContext);
+    this.delegateFactory =
+        new ParDoEvaluatorFactory<>(
+            evaluationContext, ParDoEvaluator.<KV<K, InputT>, OutputT>defaultRunnerFactory());
     this.cleanupRegistry =
         CacheBuilder.newBuilder()
             .weakValues()

http://git-wip-us.apache.org/repos/asf/beam/blob/b93de58f/runners/direct-java/src/main/java/org/apache/beam/runners/direct/TransformEvaluatorRegistry.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/TransformEvaluatorRegistry.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/TransformEvaluatorRegistry.java
index ae7ad93..d06c460 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/TransformEvaluatorRegistry.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/TransformEvaluatorRegistry.java
@@ -52,7 +52,9 @@ class TransformEvaluatorRegistry implements TransformEvaluatorFactory {
         ImmutableMap.<Class<? extends PTransform>, TransformEvaluatorFactory>builder()
             .put(Read.Bounded.class, new BoundedReadEvaluatorFactory(ctxt))
             .put(Read.Unbounded.class, new UnboundedReadEvaluatorFactory(ctxt))
-            .put(ParDo.MultiOutput.class, new ParDoEvaluatorFactory<>(ctxt))
+            .put(
+                ParDo.MultiOutput.class,
+                new ParDoEvaluatorFactory<>(ctxt, ParDoEvaluator.defaultRunnerFactory()))
             .put(StatefulParDo.class, new StatefulParDoEvaluatorFactory<>(ctxt))
             .put(PCollections.class, new FlattenEvaluatorFactory(ctxt))
             .put(WriteView.class, new ViewEvaluatorFactory(ctxt))

http://git-wip-us.apache.org/repos/asf/beam/blob/b93de58f/runners/direct-java/src/test/java/org/apache/beam/runners/direct/ParDoEvaluatorTest.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/ParDoEvaluatorTest.java b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/ParDoEvaluatorTest.java
index 2be0f9d..e99e4bf 100644
--- a/runners/direct-java/src/test/java/org/apache/beam/runners/direct/ParDoEvaluatorTest.java
+++ b/runners/direct-java/src/test/java/org/apache/beam/runners/direct/ParDoEvaluatorTest.java
@@ -169,7 +169,8 @@ public class ParDoEvaluatorTest {
         ImmutableList.<PCollectionView<?>>of(singletonView),
         mainOutputTag,
         additionalOutputTags,
-        ImmutableMap.<TupleTag<?>, PCollection<?>>of(mainOutputTag, output));
+        ImmutableMap.<TupleTag<?>, PCollection<?>>of(mainOutputTag, output),
+        ParDoEvaluator.<Integer, Integer>defaultRunnerFactory());
   }
 
   private static class RecorderFn extends DoFn<Integer, Integer> {

[47/50] [abbrv] beam git commit: This closes #2594

Posted by dh...@apache.org.

This closes #2594


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/391fb77c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/391fb77c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/391fb77c

Branch: refs/heads/DSL_SQL
Commit: 391fb77c379d271494527c6f78ef8ada6f40dc23
Parents: 29e054a 1533e2b
Author: Eugene Kirpichov <ki...@google.com>
Authored: Wed Apr 19 11:39:36 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Wed Apr 19 11:39:36 2017 -0700

----------------------------------------------------------------------
 .../sdk/io/gcp/bigquery/BigQuerySourceBase.java | 31 +++++++++++++-------
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 18 +++++-------
 .../sdk/io/gcp/bigquery/FakeJobService.java     |  9 ++++++
 3 files changed, 37 insertions(+), 21 deletions(-)
----------------------------------------------------------------------

[35/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItemCoder.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItemCoder.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItemCoder.java
deleted file mode 100644
index 9a52330..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SingletonKeyedWorkItemCoder.java
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.google.common.collect.ImmutableList;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.List;
-import org.apache.beam.runners.core.KeyedWorkItem;
-import org.apache.beam.runners.core.KeyedWorkItemCoder;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.CoderException;
-import org.apache.beam.sdk.coders.StandardCoder;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.util.PropertyNames;
-import org.apache.beam.sdk.util.WindowedValue;
-
-/**
- * Singleton keyed work item coder.
- */
-public class SingletonKeyedWorkItemCoder<K, ElemT>
-    extends StandardCoder<SingletonKeyedWorkItem<K, ElemT>> {
-  /**
-   * Create a new {@link KeyedWorkItemCoder} with the provided key coder, element coder, and window
-   * coder.
-   */
-  public static <K, ElemT> SingletonKeyedWorkItemCoder<K, ElemT> of(
-      Coder<K> keyCoder, Coder<ElemT> elemCoder, Coder<? extends BoundedWindow> windowCoder) {
-    return new SingletonKeyedWorkItemCoder<>(keyCoder, elemCoder, windowCoder);
-  }
-
-  @JsonCreator
-  public static <K, ElemT> SingletonKeyedWorkItemCoder<K, ElemT> of(
-      @JsonProperty(PropertyNames.COMPONENT_ENCODINGS) List<Coder<?>> components) {
-    checkArgument(components.size() == 3, "Expecting 3 components, got %s", components.size());
-    @SuppressWarnings("unchecked")
-    Coder<K> keyCoder = (Coder<K>) components.get(0);
-    @SuppressWarnings("unchecked")
-    Coder<ElemT> elemCoder = (Coder<ElemT>) components.get(1);
-    @SuppressWarnings("unchecked")
-    Coder<? extends BoundedWindow> windowCoder = (Coder<? extends BoundedWindow>) components.get(2);
-    return new SingletonKeyedWorkItemCoder<>(keyCoder, elemCoder, windowCoder);
-  }
-
-  private final Coder<K> keyCoder;
-  private final Coder<ElemT> elemCoder;
-  private final Coder<? extends BoundedWindow> windowCoder;
-  private final WindowedValue.FullWindowedValueCoder<ElemT> valueCoder;
-
-  private SingletonKeyedWorkItemCoder(
-      Coder<K> keyCoder, Coder<ElemT> elemCoder, Coder<? extends BoundedWindow> windowCoder) {
-    this.keyCoder = keyCoder;
-    this.elemCoder = elemCoder;
-    this.windowCoder = windowCoder;
-    valueCoder = WindowedValue.FullWindowedValueCoder.of(elemCoder, windowCoder);
-  }
-
-  public Coder<K> getKeyCoder() {
-    return keyCoder;
-  }
-
-  public Coder<ElemT> getElementCoder() {
-    return elemCoder;
-  }
-
-  @Override
-  public void encode(SingletonKeyedWorkItem<K, ElemT> value,
-                     OutputStream outStream,
-                     Context context)
-      throws CoderException, IOException {
-    keyCoder.encode(value.key(), outStream, context.nested());
-    valueCoder.encode(value.value, outStream, context);
-  }
-
-  @Override
-  public SingletonKeyedWorkItem<K, ElemT> decode(InputStream inStream, Context context)
-      throws CoderException, IOException {
-    K key = keyCoder.decode(inStream, context.nested());
-    WindowedValue<ElemT> value = valueCoder.decode(inStream, context);
-    return new SingletonKeyedWorkItem<>(key, value);
-  }
-
-  @Override
-  public List<? extends Coder<?>> getCoderArguments() {
-    return ImmutableList.of(keyCoder, elemCoder, windowCoder);
-  }
-
-  @Override
-  public void verifyDeterministic() throws NonDeterministicException {
-    keyCoder.verifyDeterministic();
-    elemCoder.verifyDeterministic();
-    windowCoder.verifyDeterministic();
-  }
-
-  /**
-   * {@inheritDoc}.
-   *
-   * {@link KeyedWorkItemCoder} is not consistent with equals as it can return a
-   * {@link KeyedWorkItem} of a type different from the originally encoded type.
-   */
-  @Override
-  public boolean consistentWithEquals() {
-    return false;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java
deleted file mode 100644
index 40f70e4..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/SplittableDoFnOperator.java
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming;
-
-import static com.google.common.base.Preconditions.checkState;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.Executors;
-import org.apache.beam.runners.core.ElementAndRestriction;
-import org.apache.beam.runners.core.KeyedWorkItem;
-import org.apache.beam.runners.core.KeyedWorkItems;
-import org.apache.beam.runners.core.OutputAndTimeBoundedSplittableProcessElementInvoker;
-import org.apache.beam.runners.core.OutputWindowedValue;
-import org.apache.beam.runners.core.SplittableParDo;
-import org.apache.beam.runners.core.StateInternals;
-import org.apache.beam.runners.core.StateInternalsFactory;
-import org.apache.beam.runners.core.TimerInternals;
-import org.apache.beam.runners.core.TimerInternalsFactory;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.streaming.api.operators.InternalTimer;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-/**
- * Flink operator for executing splittable {@link DoFn DoFns}. Specifically, for executing
- * the {@code @ProcessElement} method of a splittable {@link DoFn}.
- */
-public class SplittableDoFnOperator<
-    InputT, FnOutputT, OutputT, RestrictionT, TrackerT extends RestrictionTracker<RestrictionT>>
-    extends DoFnOperator<
-    KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, FnOutputT, OutputT> {
-
-  public SplittableDoFnOperator(
-      DoFn<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, FnOutputT> doFn,
-      Coder<
-          WindowedValue<
-              KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>> inputCoder,
-      TupleTag<FnOutputT> mainOutputTag,
-      List<TupleTag<?>> additionalOutputTags,
-      OutputManagerFactory<OutputT> outputManagerFactory,
-      WindowingStrategy<?, ?> windowingStrategy,
-      Map<Integer, PCollectionView<?>> sideInputTagMapping,
-      Collection<PCollectionView<?>> sideInputs,
-      PipelineOptions options,
-      Coder<?> keyCoder) {
-    super(
-        doFn,
-        inputCoder,
-        mainOutputTag,
-        additionalOutputTags,
-        outputManagerFactory,
-        windowingStrategy,
-        sideInputTagMapping,
-        sideInputs,
-        options,
-        keyCoder);
-
-  }
-
-  @Override
-  public void open() throws Exception {
-    super.open();
-
-    checkState(doFn instanceof SplittableParDo.ProcessFn);
-
-    StateInternalsFactory<String> stateInternalsFactory = new StateInternalsFactory<String>() {
-      @Override
-      public StateInternals<String> stateInternalsForKey(String key) {
-        //this will implicitly be keyed by the key of the incoming
-        // element or by the key of a firing timer
-        return (StateInternals<String>) stateInternals;
-      }
-    };
-    TimerInternalsFactory<String> timerInternalsFactory = new TimerInternalsFactory<String>() {
-      @Override
-      public TimerInternals timerInternalsForKey(String key) {
-        //this will implicitly be keyed like the StateInternalsFactory
-        return timerInternals;
-      }
-    };
-
-    ((SplittableParDo.ProcessFn) doFn).setStateInternalsFactory(stateInternalsFactory);
-    ((SplittableParDo.ProcessFn) doFn).setTimerInternalsFactory(timerInternalsFactory);
-    ((SplittableParDo.ProcessFn) doFn).setProcessElementInvoker(
-        new OutputAndTimeBoundedSplittableProcessElementInvoker<>(
-            doFn,
-            serializedOptions.getPipelineOptions(),
-            new OutputWindowedValue<FnOutputT>() {
-              @Override
-              public void outputWindowedValue(
-                  FnOutputT output,
-                  Instant timestamp,
-                  Collection<? extends BoundedWindow> windows,
-                  PaneInfo pane) {
-                outputManager.output(
-                    mainOutputTag,
-                    WindowedValue.of(output, timestamp, windows, pane));
-              }
-
-              @Override
-              public <AdditionalOutputT> void outputWindowedValue(
-                  TupleTag<AdditionalOutputT> tag,
-                  AdditionalOutputT output,
-                  Instant timestamp,
-                  Collection<? extends BoundedWindow> windows,
-                  PaneInfo pane) {
-                outputManager.output(tag, WindowedValue.of(output, timestamp, windows, pane));
-              }
-            },
-            sideInputReader,
-            Executors.newSingleThreadScheduledExecutor(Executors.defaultThreadFactory()),
-            10000,
-            Duration.standardSeconds(10)));
-  }
-
-  @Override
-  public void fireTimer(InternalTimer<?, TimerInternals.TimerData> timer) {
-    doFnRunner.processElement(WindowedValue.valueInGlobalWindow(
-        KeyedWorkItems.<String, ElementAndRestriction<InputT, RestrictionT>>timersWorkItem(
-            (String) stateInternals.getKey(),
-            Collections.singletonList(timer.getNamespace()))));
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java
deleted file mode 100644
index 9b2136c..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WindowDoFnOperator.java
+++ /dev/null
@@ -1,117 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming;
-
-import static org.apache.beam.runners.core.TimerInternals.TimerData;
-
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import org.apache.beam.runners.core.GroupAlsoByWindowViaWindowSetNewDoFn;
-import org.apache.beam.runners.core.KeyedWorkItem;
-import org.apache.beam.runners.core.KeyedWorkItems;
-import org.apache.beam.runners.core.StateInternals;
-import org.apache.beam.runners.core.StateInternalsFactory;
-import org.apache.beam.runners.core.SystemReduceFn;
-import org.apache.beam.runners.core.TimerInternals;
-import org.apache.beam.runners.core.TimerInternalsFactory;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.streaming.api.operators.InternalTimer;
-
-/**
- * Flink operator for executing window {@link DoFn DoFns}.
- */
-public class WindowDoFnOperator<K, InputT, OutputT>
-    extends DoFnOperator<KeyedWorkItem<K, InputT>, KV<K, OutputT>, WindowedValue<KV<K, OutputT>>> {
-
-  private final SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> systemReduceFn;
-
-  public WindowDoFnOperator(
-      SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> systemReduceFn,
-      Coder<WindowedValue<KeyedWorkItem<K, InputT>>> inputCoder,
-      TupleTag<KV<K, OutputT>> mainOutputTag,
-      List<TupleTag<?>> additionalOutputTags,
-      OutputManagerFactory<WindowedValue<KV<K, OutputT>>> outputManagerFactory,
-      WindowingStrategy<?, ?> windowingStrategy,
-      Map<Integer, PCollectionView<?>> sideInputTagMapping,
-      Collection<PCollectionView<?>> sideInputs,
-      PipelineOptions options,
-      Coder<K> keyCoder) {
-    super(
-        null,
-        inputCoder,
-        mainOutputTag,
-        additionalOutputTags,
-        outputManagerFactory,
-        windowingStrategy,
-        sideInputTagMapping,
-        sideInputs,
-        options,
-        keyCoder);
-
-    this.systemReduceFn = systemReduceFn;
-
-  }
-
-  @Override
-  protected DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> getDoFn() {
-    StateInternalsFactory<K> stateInternalsFactory = new StateInternalsFactory<K>() {
-      @Override
-      public StateInternals<K> stateInternalsForKey(K key) {
-        //this will implicitly be keyed by the key of the incoming
-        // element or by the key of a firing timer
-        return (StateInternals<K>) stateInternals;
-      }
-    };
-    TimerInternalsFactory<K> timerInternalsFactory = new TimerInternalsFactory<K>() {
-      @Override
-      public TimerInternals timerInternalsForKey(K key) {
-        //this will implicitly be keyed like the StateInternalsFactory
-        return timerInternals;
-      }
-    };
-
-    // we have to do the unchecked cast because GroupAlsoByWindowViaWindowSetDoFn.create
-    // has the window type as generic parameter while WindowingStrategy is almost always
-    // untyped.
-    @SuppressWarnings("unchecked")
-    DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> doFn =
-        GroupAlsoByWindowViaWindowSetNewDoFn.create(
-            windowingStrategy, stateInternalsFactory, timerInternalsFactory, sideInputReader,
-                (SystemReduceFn) systemReduceFn, outputManager, mainOutputTag);
-    return doFn;
-  }
-
-  @Override
-  public void fireTimer(InternalTimer<?, TimerData> timer) {
-    doFnRunner.processElement(WindowedValue.valueInGlobalWindow(
-        KeyedWorkItems.<K, InputT>timersWorkItem(
-            (K) stateInternals.getKey(),
-            Collections.singletonList(timer.getNamespace()))));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WorkItemKeySelector.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WorkItemKeySelector.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WorkItemKeySelector.java
deleted file mode 100644
index 1dff367..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/WorkItemKeySelector.java
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming;
-
-import java.nio.ByteBuffer;
-import org.apache.beam.runners.core.KeyedWorkItem;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.flink.api.common.typeinfo.TypeInformation;
-import org.apache.flink.api.java.functions.KeySelector;
-import org.apache.flink.api.java.typeutils.GenericTypeInfo;
-import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
-
-/**
- * {@link KeySelector} that retrieves a key from a {@link KeyedWorkItem}. This will return
- * the key as encoded by the provided {@link Coder} in a {@link ByteBuffer}. This ensures
- * that all key comparisons/hashing happen on the encoded form.
- */
-public class WorkItemKeySelector<K, V>
-    implements KeySelector<WindowedValue<SingletonKeyedWorkItem<K, V>>, ByteBuffer>,
-    ResultTypeQueryable<ByteBuffer> {
-
-  private final Coder<K> keyCoder;
-
-  public WorkItemKeySelector(Coder<K> keyCoder) {
-    this.keyCoder = keyCoder;
-  }
-
-  @Override
-  public ByteBuffer getKey(WindowedValue<SingletonKeyedWorkItem<K, V>> value) throws Exception {
-    K key = value.getValue().key();
-    byte[] keyBytes = CoderUtils.encodeToByteArray(keyCoder, key);
-    return ByteBuffer.wrap(keyBytes);
-  }
-
-  @Override
-  public TypeInformation<ByteBuffer> getProducedType() {
-    return new GenericTypeInfo<>(ByteBuffer.class);
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/BoundedSourceWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/BoundedSourceWrapper.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/BoundedSourceWrapper.java
deleted file mode 100644
index 2ed5024..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/BoundedSourceWrapper.java
+++ /dev/null
@@ -1,218 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming.io;
-
-import com.google.common.annotations.VisibleForTesting;
-import java.util.ArrayList;
-import java.util.List;
-import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.sdk.io.BoundedSource;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.flink.api.common.functions.StoppableFunction;
-import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
-import org.apache.flink.streaming.api.watermark.Watermark;
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Wrapper for executing {@link BoundedSource BoundedSources} as a Flink Source.
- */
-public class BoundedSourceWrapper<OutputT>
-    extends RichParallelSourceFunction<WindowedValue<OutputT>>
-    implements StoppableFunction {
-
-  private static final Logger LOG = LoggerFactory.getLogger(BoundedSourceWrapper.class);
-
-  /**
-   * Keep the options so that we can initialize the readers.
-   */
-  private final SerializedPipelineOptions serializedOptions;
-
-  /**
-   * The split sources. We split them in the constructor to ensure that all parallel
-   * sources are consistent about the split sources.
-   */
-  private List<? extends BoundedSource<OutputT>> splitSources;
-
-  /**
-   * Make it a field so that we can access it in {@link #close()}.
-   */
-  private transient List<BoundedSource.BoundedReader<OutputT>> readers;
-
-  /**
-   * Initialize here and not in run() to prevent races where we cancel a job before run() is
-   * ever called or run() is called after cancel().
-   */
-  private volatile boolean isRunning = true;
-
-  @SuppressWarnings("unchecked")
-  public BoundedSourceWrapper(
-      PipelineOptions pipelineOptions,
-      BoundedSource<OutputT> source,
-      int parallelism) throws Exception {
-    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
-
-    long desiredBundleSize = source.getEstimatedSizeBytes(pipelineOptions) / parallelism;
-
-    // get the splits early. we assume that the generated splits are stable,
-    // this is necessary so that the mapping of state to source is correct
-    // when restoring
-    splitSources = source.split(desiredBundleSize, pipelineOptions);
-  }
-
-  @Override
-  public void run(SourceContext<WindowedValue<OutputT>> ctx) throws Exception {
-
-    // figure out which split sources we're responsible for
-    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
-    int numSubtasks = getRuntimeContext().getNumberOfParallelSubtasks();
-
-    List<BoundedSource<OutputT>> localSources = new ArrayList<>();
-
-    for (int i = 0; i < splitSources.size(); i++) {
-      if (i % numSubtasks == subtaskIndex) {
-        localSources.add(splitSources.get(i));
-      }
-    }
-
-    LOG.info("Bounded Flink Source {}/{} is reading from sources: {}",
-        subtaskIndex,
-        numSubtasks,
-        localSources);
-
-    readers = new ArrayList<>();
-    // initialize readers from scratch
-    for (BoundedSource<OutputT> source : localSources) {
-      readers.add(source.createReader(serializedOptions.getPipelineOptions()));
-    }
-
-   if (readers.size() == 1) {
-      // the easy case, we just read from one reader
-      BoundedSource.BoundedReader<OutputT> reader = readers.get(0);
-
-      boolean dataAvailable = reader.start();
-      if (dataAvailable) {
-        emitElement(ctx, reader);
-      }
-
-      while (isRunning) {
-        dataAvailable = reader.advance();
-
-        if (dataAvailable)  {
-          emitElement(ctx, reader);
-        } else {
-          break;
-        }
-      }
-    } else {
-      // a bit more complicated, we are responsible for several readers
-      // loop through them and sleep if none of them had any data
-
-      int currentReader = 0;
-
-      // start each reader and emit data if immediately available
-      for (BoundedSource.BoundedReader<OutputT> reader : readers) {
-        boolean dataAvailable = reader.start();
-        if (dataAvailable) {
-          emitElement(ctx, reader);
-        }
-      }
-
-      // a flag telling us whether any of the readers had data
-      // if no reader had data, sleep for bit
-      boolean hadData = false;
-      while (isRunning && !readers.isEmpty()) {
-        BoundedSource.BoundedReader<OutputT> reader = readers.get(currentReader);
-        boolean dataAvailable = reader.advance();
-
-        if (dataAvailable) {
-          emitElement(ctx, reader);
-          hadData = true;
-        } else {
-          readers.remove(currentReader);
-          currentReader--;
-          if (readers.isEmpty()) {
-            break;
-          }
-        }
-
-        currentReader = (currentReader + 1) % readers.size();
-        if (currentReader == 0 && !hadData) {
-          Thread.sleep(50);
-        } else if (currentReader == 0) {
-          hadData = false;
-        }
-      }
-
-    }
-
-    // emit final Long.MAX_VALUE watermark, just to be sure
-    ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
-  }
-
-  /**
-   * Emit the current element from the given Reader. The reader is guaranteed to have data.
-   */
-  private void emitElement(
-      SourceContext<WindowedValue<OutputT>> ctx,
-      BoundedSource.BoundedReader<OutputT> reader) {
-    // make sure that reader state update and element emission are atomic
-    // with respect to snapshots
-    synchronized (ctx.getCheckpointLock()) {
-
-      OutputT item = reader.getCurrent();
-      Instant timestamp = reader.getCurrentTimestamp();
-
-      WindowedValue<OutputT> windowedValue =
-          WindowedValue.of(item, timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
-      ctx.collectWithTimestamp(windowedValue, timestamp.getMillis());
-    }
-  }
-
-  @Override
-  public void close() throws Exception {
-    super.close();
-    if (readers != null) {
-      for (BoundedSource.BoundedReader<OutputT> reader: readers) {
-        reader.close();
-      }
-    }
-  }
-
-  @Override
-  public void cancel() {
-    isRunning = false;
-  }
-
-  @Override
-  public void stop() {
-    this.isRunning = false;
-  }
-
-  /**
-   * Visible so that we can check this in tests. Must not be used for anything else.
-   */
-  @VisibleForTesting
-  public List<? extends BoundedSource<OutputT>> getSplitSources() {
-    return splitSources;
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSocketSource.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSocketSource.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSocketSource.java
deleted file mode 100644
index 910a33f..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSocketSource.java
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming.io;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.net.InetSocketAddress;
-import java.net.Socket;
-import java.util.Collections;
-import java.util.List;
-import java.util.NoSuchElementException;
-import javax.annotation.Nullable;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.UnboundedSource;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * An example unbounded Beam source that reads input from a socket.
- * This is used mainly for testing and debugging.
- * */
-public class UnboundedSocketSource<CheckpointMarkT extends UnboundedSource.CheckpointMark>
-    extends UnboundedSource<String, CheckpointMarkT> {
-
-  private static final Coder<String> DEFAULT_SOCKET_CODER = StringUtf8Coder.of();
-
-  private static final long serialVersionUID = 1L;
-
-  private static final int DEFAULT_CONNECTION_RETRY_SLEEP = 500;
-
-  private static final int CONNECTION_TIMEOUT_TIME = 0;
-
-  private final String hostname;
-  private final int port;
-  private final char delimiter;
-  private final long maxNumRetries;
-  private final long delayBetweenRetries;
-
-  public UnboundedSocketSource(String hostname, int port, char delimiter, long maxNumRetries) {
-    this(hostname, port, delimiter, maxNumRetries, DEFAULT_CONNECTION_RETRY_SLEEP);
-  }
-
-  public UnboundedSocketSource(String hostname,
-                               int port,
-                               char delimiter,
-                               long maxNumRetries,
-                               long delayBetweenRetries) {
-    this.hostname = hostname;
-    this.port = port;
-    this.delimiter = delimiter;
-    this.maxNumRetries = maxNumRetries;
-    this.delayBetweenRetries = delayBetweenRetries;
-  }
-
-  public String getHostname() {
-    return this.hostname;
-  }
-
-  public int getPort() {
-    return this.port;
-  }
-
-  public char getDelimiter() {
-    return this.delimiter;
-  }
-
-  public long getMaxNumRetries() {
-    return this.maxNumRetries;
-  }
-
-  public long getDelayBetweenRetries() {
-    return this.delayBetweenRetries;
-  }
-
-  @Override
-  public List<? extends UnboundedSource<String, CheckpointMarkT>> split(
-      int desiredNumSplits,
-      PipelineOptions options) throws Exception {
-    return Collections.<UnboundedSource<String, CheckpointMarkT>>singletonList(this);
-  }
-
-  @Override
-  public UnboundedReader<String> createReader(PipelineOptions options,
-                                              @Nullable CheckpointMarkT checkpointMark) {
-    return new UnboundedSocketReader(this);
-  }
-
-  @Nullable
-  @Override
-  public Coder getCheckpointMarkCoder() {
-    // Flink and Dataflow have different checkpointing mechanisms.
-    // In our case we do not need a coder.
-    return null;
-  }
-
-  @Override
-  public void validate() {
-    checkArgument(port > 0 && port < 65536, "port is out of range");
-    checkArgument(maxNumRetries >= -1, "maxNumRetries must be zero or larger (num retries), "
-        + "or -1 (infinite retries)");
-    checkArgument(delayBetweenRetries >= 0, "delayBetweenRetries must be zero or positive");
-  }
-
-  @Override
-  public Coder getDefaultOutputCoder() {
-    return DEFAULT_SOCKET_CODER;
-  }
-
-  /**
-   * Unbounded socket reader.
-   */
-  public static class UnboundedSocketReader extends UnboundedSource.UnboundedReader<String> {
-
-    private static final Logger LOG = LoggerFactory.getLogger(UnboundedSocketReader.class);
-
-    private final UnboundedSocketSource source;
-
-    private Socket socket;
-    private BufferedReader reader;
-
-    private boolean isRunning;
-
-    private String currentRecord;
-
-    public UnboundedSocketReader(UnboundedSocketSource source) {
-      this.source = source;
-    }
-
-    private void openConnection() throws IOException {
-      this.socket = new Socket();
-      this.socket.connect(new InetSocketAddress(this.source.getHostname(), this.source.getPort()),
-          CONNECTION_TIMEOUT_TIME);
-      this.reader = new BufferedReader(new InputStreamReader(this.socket.getInputStream()));
-      this.isRunning = true;
-    }
-
-    @Override
-    public boolean start() throws IOException {
-      int attempt = 0;
-      while (!isRunning) {
-        try {
-          openConnection();
-          LOG.info("Connected to server socket " + this.source.getHostname() + ':'
-              + this.source.getPort());
-
-          return advance();
-        } catch (IOException e) {
-          LOG.info("Lost connection to server socket " + this.source.getHostname() + ':'
-              + this.source.getPort() + ". Retrying in "
-              + this.source.getDelayBetweenRetries() + " msecs...");
-
-          if (this.source.getMaxNumRetries() == -1 || attempt++ < this.source.getMaxNumRetries()) {
-            try {
-              Thread.sleep(this.source.getDelayBetweenRetries());
-            } catch (InterruptedException e1) {
-              e1.printStackTrace();
-            }
-          } else {
-            this.isRunning = false;
-            break;
-          }
-        }
-      }
-      LOG.error("Unable to connect to host " + this.source.getHostname()
-          + " : " + this.source.getPort());
-      return false;
-    }
-
-    @Override
-    public boolean advance() throws IOException {
-      final StringBuilder buffer = new StringBuilder();
-      int data;
-      while (isRunning && (data = reader.read()) != -1) {
-        // check if the string is complete
-        if (data != this.source.getDelimiter()) {
-          buffer.append((char) data);
-        } else {
-          if (buffer.length() > 0 && buffer.charAt(buffer.length() - 1) == '\r') {
-            buffer.setLength(buffer.length() - 1);
-          }
-          this.currentRecord = buffer.toString();
-          buffer.setLength(0);
-          return true;
-        }
-      }
-      return false;
-    }
-
-    @Override
-    public byte[] getCurrentRecordId() throws NoSuchElementException {
-      return new byte[0];
-    }
-
-    @Override
-    public String getCurrent() throws NoSuchElementException {
-      return this.currentRecord;
-    }
-
-    @Override
-    public Instant getCurrentTimestamp() throws NoSuchElementException {
-      return Instant.now();
-    }
-
-    @Override
-    public void close() throws IOException {
-      this.reader.close();
-      this.socket.close();
-      this.isRunning = false;
-      LOG.info("Closed connection to server socket at " + this.source.getHostname() + ":"
-          + this.source.getPort() + ".");
-    }
-
-    @Override
-    public Instant getWatermark() {
-      return Instant.now();
-    }
-
-    @Override
-    public CheckpointMark getCheckpointMark() {
-      return null;
-    }
-
-    @Override
-    public UnboundedSource<String, ?> getCurrentSource() {
-      return this.source;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java
deleted file mode 100644
index bb9b58a..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java
+++ /dev/null
@@ -1,476 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming.io;
-
-import com.google.common.annotations.VisibleForTesting;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.List;
-import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.coders.SerializableCoder;
-import org.apache.beam.sdk.io.UnboundedSource;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.TypeDescriptor;
-import org.apache.flink.api.common.ExecutionConfig;
-import org.apache.flink.api.common.functions.StoppableFunction;
-import org.apache.flink.api.common.state.ListState;
-import org.apache.flink.api.common.state.ListStateDescriptor;
-import org.apache.flink.api.common.state.OperatorStateStore;
-import org.apache.flink.configuration.Configuration;
-import org.apache.flink.runtime.state.CheckpointListener;
-import org.apache.flink.runtime.state.DefaultOperatorStateBackend;
-import org.apache.flink.runtime.state.FunctionInitializationContext;
-import org.apache.flink.runtime.state.FunctionSnapshotContext;
-import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
-import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
-import org.apache.flink.streaming.api.operators.StreamingRuntimeContext;
-import org.apache.flink.streaming.api.watermark.Watermark;
-import org.apache.flink.streaming.runtime.tasks.ProcessingTimeCallback;
-import org.joda.time.Instant;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Wrapper for executing {@link UnboundedSource UnboundedSources} as a Flink Source.
- */
-public class UnboundedSourceWrapper<
-    OutputT, CheckpointMarkT extends UnboundedSource.CheckpointMark>
-    extends RichParallelSourceFunction<WindowedValue<OutputT>>
-    implements ProcessingTimeCallback, StoppableFunction,
-    CheckpointListener, CheckpointedFunction {
-
-  private static final Logger LOG = LoggerFactory.getLogger(UnboundedSourceWrapper.class);
-
-  /**
-   * Keep the options so that we can initialize the localReaders.
-   */
-  private final SerializedPipelineOptions serializedOptions;
-
-  /**
-   * For snapshot and restore.
-   */
-  private final KvCoder<
-      ? extends UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> checkpointCoder;
-
-  /**
-   * The split sources. We split them in the constructor to ensure that all parallel
-   * sources are consistent about the split sources.
-   */
-  private final List<? extends UnboundedSource<OutputT, CheckpointMarkT>> splitSources;
-
-  /**
-   * The local split sources. Assigned at runtime when the wrapper is executed in parallel.
-   */
-  private transient List<UnboundedSource<OutputT, CheckpointMarkT>> localSplitSources;
-
-  /**
-   * The local split readers. Assigned at runtime when the wrapper is executed in parallel.
-   * Make it a field so that we can access it in {@link #onProcessingTime(long)} for
-   * emitting watermarks.
-   */
-  private transient List<UnboundedSource.UnboundedReader<OutputT>> localReaders;
-
-  /**
-   * Flag to indicate whether the source is running.
-   * Initialize here and not in run() to prevent races where we cancel a job before run() is
-   * ever called or run() is called after cancel().
-   */
-  private volatile boolean isRunning = true;
-
-  /**
-   * Make it a field so that we can access it in {@link #onProcessingTime(long)} for registering new
-   * triggers.
-   */
-  private transient StreamingRuntimeContext runtimeContext;
-
-  /**
-   * Make it a field so that we can access it in {@link #onProcessingTime(long)} for emitting
-   * watermarks.
-   */
-  private transient SourceContext<WindowedValue<OutputT>> context;
-
-  /**
-   * Pending checkpoints which have not been acknowledged yet.
-   */
-  private transient LinkedHashMap<Long, List<CheckpointMarkT>> pendingCheckpoints;
-  /**
-   * Keep a maximum of 32 checkpoints for {@code CheckpointMark.finalizeCheckpoint()}.
-   */
-  private static final int MAX_NUMBER_PENDING_CHECKPOINTS = 32;
-
-  private transient ListState<KV<? extends
-      UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT>> stateForCheckpoint;
-
-  /**
-   * false if checkpointCoder is null or no restore state by starting first.
-   */
-  private transient boolean isRestored = false;
-
-  @SuppressWarnings("unchecked")
-  public UnboundedSourceWrapper(
-      PipelineOptions pipelineOptions,
-      UnboundedSource<OutputT, CheckpointMarkT> source,
-      int parallelism) throws Exception {
-    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
-
-    if (source.requiresDeduping()) {
-      LOG.warn("Source {} requires deduping but Flink runner doesn't support this yet.", source);
-    }
-
-    Coder<CheckpointMarkT> checkpointMarkCoder = source.getCheckpointMarkCoder();
-    if (checkpointMarkCoder == null) {
-      LOG.info("No CheckpointMarkCoder specified for this source. Won't create snapshots.");
-      checkpointCoder = null;
-    } else {
-
-      Coder<? extends UnboundedSource<OutputT, CheckpointMarkT>> sourceCoder =
-          (Coder) SerializableCoder.of(new TypeDescriptor<UnboundedSource>() {
-          });
-
-      checkpointCoder = KvCoder.of(sourceCoder, checkpointMarkCoder);
-    }
-
-    // get the splits early. we assume that the generated splits are stable,
-    // this is necessary so that the mapping of state to source is correct
-    // when restoring
-    splitSources = source.split(parallelism, pipelineOptions);
-  }
-
-
-  /**
-   * Initialize and restore state before starting execution of the source.
-   */
-  @Override
-  public void open(Configuration parameters) throws Exception {
-    runtimeContext = (StreamingRuntimeContext) getRuntimeContext();
-
-    // figure out which split sources we're responsible for
-    int subtaskIndex = runtimeContext.getIndexOfThisSubtask();
-    int numSubtasks = runtimeContext.getNumberOfParallelSubtasks();
-
-    localSplitSources = new ArrayList<>();
-    localReaders = new ArrayList<>();
-
-    pendingCheckpoints = new LinkedHashMap<>();
-
-    if (isRestored) {
-      // restore the splitSources from the checkpoint to ensure consistent ordering
-      for (KV<? extends UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> restored:
-          stateForCheckpoint.get()) {
-        localSplitSources.add(restored.getKey());
-        localReaders.add(restored.getKey().createReader(
-            serializedOptions.getPipelineOptions(), restored.getValue()));
-      }
-    } else {
-      // initialize localReaders and localSources from scratch
-      for (int i = 0; i < splitSources.size(); i++) {
-        if (i % numSubtasks == subtaskIndex) {
-          UnboundedSource<OutputT, CheckpointMarkT> source =
-              splitSources.get(i);
-          UnboundedSource.UnboundedReader<OutputT> reader =
-              source.createReader(serializedOptions.getPipelineOptions(), null);
-          localSplitSources.add(source);
-          localReaders.add(reader);
-        }
-      }
-    }
-
-    LOG.info("Unbounded Flink Source {}/{} is reading from sources: {}",
-        subtaskIndex,
-        numSubtasks,
-        localSplitSources);
-  }
-
-  @Override
-  public void run(SourceContext<WindowedValue<OutputT>> ctx) throws Exception {
-
-    context = ctx;
-
-    if (localReaders.size() == 0) {
-      // do nothing, but still look busy ...
-      // also, output a Long.MAX_VALUE watermark since we know that we're not
-      // going to emit anything
-      // we can't return here since Flink requires that all operators stay up,
-      // otherwise checkpointing would not work correctly anymore
-      ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
-
-      // wait until this is canceled
-      final Object waitLock = new Object();
-      while (isRunning) {
-        try {
-          // Flink will interrupt us at some point
-          //noinspection SynchronizationOnLocalVariableOrMethodParameter
-          synchronized (waitLock) {
-            // don't wait indefinitely, in case something goes horribly wrong
-            waitLock.wait(1000);
-          }
-        } catch (InterruptedException e) {
-          if (!isRunning) {
-            // restore the interrupted state, and fall through the loop
-            Thread.currentThread().interrupt();
-          }
-        }
-      }
-    } else if (localReaders.size() == 1) {
-      // the easy case, we just read from one reader
-      UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(0);
-
-      boolean dataAvailable = reader.start();
-      if (dataAvailable) {
-        emitElement(ctx, reader);
-      }
-
-      setNextWatermarkTimer(this.runtimeContext);
-
-      while (isRunning) {
-        dataAvailable = reader.advance();
-
-        if (dataAvailable)  {
-          emitElement(ctx, reader);
-        } else {
-          Thread.sleep(50);
-        }
-      }
-    } else {
-      // a bit more complicated, we are responsible for several localReaders
-      // loop through them and sleep if none of them had any data
-
-      int numReaders = localReaders.size();
-      int currentReader = 0;
-
-      // start each reader and emit data if immediately available
-      for (UnboundedSource.UnboundedReader<OutputT> reader : localReaders) {
-        boolean dataAvailable = reader.start();
-        if (dataAvailable) {
-          emitElement(ctx, reader);
-        }
-      }
-
-      // a flag telling us whether any of the localReaders had data
-      // if no reader had data, sleep for bit
-      boolean hadData = false;
-      while (isRunning) {
-        UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(currentReader);
-        boolean dataAvailable = reader.advance();
-
-        if (dataAvailable) {
-          emitElement(ctx, reader);
-          hadData = true;
-        }
-
-        currentReader = (currentReader + 1) % numReaders;
-        if (currentReader == 0 && !hadData) {
-          Thread.sleep(50);
-        } else if (currentReader == 0) {
-          hadData = false;
-        }
-      }
-
-    }
-  }
-
-  /**
-   * Emit the current element from the given Reader. The reader is guaranteed to have data.
-   */
-  private void emitElement(
-      SourceContext<WindowedValue<OutputT>> ctx,
-      UnboundedSource.UnboundedReader<OutputT> reader) {
-    // make sure that reader state update and element emission are atomic
-    // with respect to snapshots
-    synchronized (ctx.getCheckpointLock()) {
-
-      OutputT item = reader.getCurrent();
-      Instant timestamp = reader.getCurrentTimestamp();
-
-      WindowedValue<OutputT> windowedValue =
-          WindowedValue.of(item, timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
-      ctx.collectWithTimestamp(windowedValue, timestamp.getMillis());
-    }
-  }
-
-  @Override
-  public void close() throws Exception {
-    super.close();
-    if (localReaders != null) {
-      for (UnboundedSource.UnboundedReader<OutputT> reader: localReaders) {
-        reader.close();
-      }
-    }
-  }
-
-  @Override
-  public void cancel() {
-    isRunning = false;
-  }
-
-  @Override
-  public void stop() {
-    isRunning = false;
-  }
-
-  // ------------------------------------------------------------------------
-  //  Checkpoint and restore
-  // ------------------------------------------------------------------------
-
-  @Override
-  public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
-    if (!isRunning) {
-      LOG.debug("snapshotState() called on closed source");
-    } else {
-
-      if (checkpointCoder == null) {
-        // no checkpoint coder available in this source
-        return;
-      }
-
-      stateForCheckpoint.clear();
-
-      long checkpointId = functionSnapshotContext.getCheckpointId();
-
-      // we checkpoint the sources along with the CheckpointMarkT to ensure
-      // than we have a correct mapping of checkpoints to sources when
-      // restoring
-      List<CheckpointMarkT> checkpointMarks = new ArrayList<>(localSplitSources.size());
-
-      for (int i = 0; i < localSplitSources.size(); i++) {
-        UnboundedSource<OutputT, CheckpointMarkT> source = localSplitSources.get(i);
-        UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(i);
-
-        @SuppressWarnings("unchecked")
-        CheckpointMarkT mark = (CheckpointMarkT) reader.getCheckpointMark();
-        checkpointMarks.add(mark);
-        KV<UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> kv =
-            KV.of(source, mark);
-        stateForCheckpoint.add(kv);
-      }
-
-      // cleanup old pending checkpoints and add new checkpoint
-      int diff = pendingCheckpoints.size() - MAX_NUMBER_PENDING_CHECKPOINTS;
-      if (diff >= 0) {
-        for (Iterator<Long> iterator = pendingCheckpoints.keySet().iterator();
-             diff >= 0;
-             diff--) {
-          iterator.next();
-          iterator.remove();
-        }
-      }
-      pendingCheckpoints.put(checkpointId, checkpointMarks);
-
-    }
-  }
-
-  @Override
-  public void initializeState(FunctionInitializationContext context) throws Exception {
-    if (checkpointCoder == null) {
-      // no checkpoint coder available in this source
-      return;
-    }
-
-    OperatorStateStore stateStore = context.getOperatorStateStore();
-    CoderTypeInformation<
-        KV<? extends UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT>>
-        typeInformation = (CoderTypeInformation) new CoderTypeInformation<>(checkpointCoder);
-    stateForCheckpoint = stateStore.getOperatorState(
-        new ListStateDescriptor<>(DefaultOperatorStateBackend.DEFAULT_OPERATOR_STATE_NAME,
-            typeInformation.createSerializer(new ExecutionConfig())));
-
-    if (context.isRestored()) {
-      isRestored = true;
-      LOG.info("Having restore state in the UnbounedSourceWrapper.");
-    } else {
-      LOG.info("No restore state for UnbounedSourceWrapper.");
-    }
-  }
-
-  @Override
-  public void onProcessingTime(long timestamp) throws Exception {
-    if (this.isRunning) {
-      synchronized (context.getCheckpointLock()) {
-        // find minimum watermark over all localReaders
-        long watermarkMillis = Long.MAX_VALUE;
-        for (UnboundedSource.UnboundedReader<OutputT> reader: localReaders) {
-          Instant watermark = reader.getWatermark();
-          if (watermark != null) {
-            watermarkMillis = Math.min(watermark.getMillis(), watermarkMillis);
-          }
-        }
-        context.emitWatermark(new Watermark(watermarkMillis));
-      }
-      setNextWatermarkTimer(this.runtimeContext);
-    }
-  }
-
-  private void setNextWatermarkTimer(StreamingRuntimeContext runtime) {
-    if (this.isRunning) {
-      long watermarkInterval =  runtime.getExecutionConfig().getAutoWatermarkInterval();
-      long timeToNextWatermark = getTimeToNextWatermark(watermarkInterval);
-      runtime.getProcessingTimeService().registerTimer(timeToNextWatermark, this);
-    }
-  }
-
-  private long getTimeToNextWatermark(long watermarkInterval) {
-    return System.currentTimeMillis() + watermarkInterval;
-  }
-
-  /**
-   * Visible so that we can check this in tests. Must not be used for anything else.
-   */
-  @VisibleForTesting
-  public List<? extends UnboundedSource<OutputT, CheckpointMarkT>> getSplitSources() {
-    return splitSources;
-  }
-
-  /**
-   * Visible so that we can check this in tests. Must not be used for anything else.
-   */
-  @VisibleForTesting
-  public List<? extends UnboundedSource<OutputT, CheckpointMarkT>> getLocalSplitSources() {
-    return localSplitSources;
-  }
-
-  @Override
-  public void notifyCheckpointComplete(long checkpointId) throws Exception {
-
-    List<CheckpointMarkT> checkpointMarks = pendingCheckpoints.get(checkpointId);
-
-    if (checkpointMarks != null) {
-
-      // remove old checkpoints including the current one
-      Iterator<Long> iterator = pendingCheckpoints.keySet().iterator();
-      long currentId;
-      do {
-        currentId = iterator.next();
-        iterator.remove();
-      } while (currentId != checkpointId);
-
-      // confirm all marks
-      for (CheckpointMarkT mark : checkpointMarks) {
-        mark.finalizeCheckpoint();
-      }
-
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/package-info.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/package-info.java
deleted file mode 100644
index b431ce7..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Internal implementation of the Beam runner for Apache Flink.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming.io;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/package-info.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/package-info.java
deleted file mode 100644
index 0674871..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Internal implementation of the Beam runner for Apache Flink.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkBroadcastStateInternals.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkBroadcastStateInternals.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkBroadcastStateInternals.java
deleted file mode 100644
index 3203446..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkBroadcastStateInternals.java
+++ /dev/null
@@ -1,865 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import org.apache.beam.runners.core.StateInternals;
-import org.apache.beam.runners.core.StateNamespace;
-import org.apache.beam.runners.core.StateTag;
-import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.ListCoder;
-import org.apache.beam.sdk.coders.MapCoder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.CombineWithContext;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
-import org.apache.beam.sdk.util.CombineContextFactory;
-import org.apache.beam.sdk.util.state.BagState;
-import org.apache.beam.sdk.util.state.CombiningState;
-import org.apache.beam.sdk.util.state.MapState;
-import org.apache.beam.sdk.util.state.ReadableState;
-import org.apache.beam.sdk.util.state.SetState;
-import org.apache.beam.sdk.util.state.State;
-import org.apache.beam.sdk.util.state.StateContext;
-import org.apache.beam.sdk.util.state.StateContexts;
-import org.apache.beam.sdk.util.state.ValueState;
-import org.apache.beam.sdk.util.state.WatermarkHoldState;
-import org.apache.flink.api.common.ExecutionConfig;
-import org.apache.flink.api.common.state.ListState;
-import org.apache.flink.api.common.state.ListStateDescriptor;
-import org.apache.flink.runtime.state.DefaultOperatorStateBackend;
-import org.apache.flink.runtime.state.OperatorStateBackend;
-
-/**
- * {@link StateInternals} that uses a Flink {@link DefaultOperatorStateBackend}
- * to manage the broadcast state.
- * The state is the same on all parallel instances of the operator.
- * So we just need store state of operator-0 in OperatorStateBackend.
- *
- * <p>Note: Ignore index of key.
- * Mainly for SideInputs.
- */
-public class FlinkBroadcastStateInternals<K> implements StateInternals<K> {
-
-  private int indexInSubtaskGroup;
-  private final DefaultOperatorStateBackend stateBackend;
-  // stateName -> <namespace, state>
-  private Map<String, Map<String, ?>> stateForNonZeroOperator;
-
-  public FlinkBroadcastStateInternals(int indexInSubtaskGroup, OperatorStateBackend stateBackend) {
-    //TODO flink do not yet expose through public API
-    this.stateBackend = (DefaultOperatorStateBackend) stateBackend;
-    this.indexInSubtaskGroup = indexInSubtaskGroup;
-    if (indexInSubtaskGroup != 0) {
-      stateForNonZeroOperator = new HashMap<>();
-    }
-  }
-
-  @Override
-  public K getKey() {
-    return null;
-  }
-
-  @Override
-  public <T extends State> T state(
-      final StateNamespace namespace,
-      StateTag<? super K, T> address) {
-
-    return state(namespace, address, StateContexts.nullContext());
-  }
-
-  @Override
-  public <T extends State> T state(
-      final StateNamespace namespace,
-      StateTag<? super K, T> address,
-      final StateContext<?> context) {
-
-    return address.bind(new StateTag.StateBinder<K>() {
-
-      @Override
-      public <T> ValueState<T> bindValue(
-          StateTag<? super K, ValueState<T>> address,
-          Coder<T> coder) {
-
-        return new FlinkBroadcastValueState<>(stateBackend, address, namespace, coder);
-      }
-
-      @Override
-      public <T> BagState<T> bindBag(
-          StateTag<? super K, BagState<T>> address,
-          Coder<T> elemCoder) {
-
-        return new FlinkBroadcastBagState<>(stateBackend, address, namespace, elemCoder);
-      }
-
-      @Override
-      public <T> SetState<T> bindSet(
-          StateTag<? super K, SetState<T>> address,
-          Coder<T> elemCoder) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", SetState.class.getSimpleName()));
-      }
-
-      @Override
-      public <KeyT, ValueT> MapState<KeyT, ValueT> bindMap(
-          StateTag<? super K, MapState<KeyT, ValueT>> spec,
-          Coder<KeyT> mapKeyCoder, Coder<ValueT> mapValueCoder) {
-        throw new UnsupportedOperationException(
-            String.format("%s is not supported", MapState.class.getSimpleName()));
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT>
-      bindCombiningValue(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          Combine.CombineFn<InputT, AccumT, OutputT> combineFn) {
-
-        return new FlinkCombiningState<>(
-            stateBackend, address, combineFn, namespace, accumCoder);
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValue(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
-        return new FlinkKeyedCombiningState<>(
-            stateBackend,
-            address,
-            combineFn,
-            namespace,
-            accumCoder,
-            FlinkBroadcastStateInternals.this);
-      }
-
-      @Override
-      public <InputT, AccumT, OutputT>
-      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValueWithContext(
-          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-          Coder<AccumT> accumCoder,
-          CombineWithContext.KeyedCombineFnWithContext<
-              ? super K, InputT, AccumT, OutputT> combineFn) {
-        return new FlinkCombiningStateWithContext<>(
-            stateBackend,
-            address,
-            combineFn,
-            namespace,
-            accumCoder,
-            FlinkBroadcastStateInternals.this,
-            CombineContextFactory.createFromStateContext(context));
-      }
-
-      @Override
-      public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
-          StateTag<? super K, WatermarkHoldState<W>> address,
-          OutputTimeFn<? super W> outputTimeFn) {
-         throw new UnsupportedOperationException(
-             String.format("%s is not supported", WatermarkHoldState.class.getSimpleName()));
-      }
-    });
-  }
-
-  /**
-   * 1. The way we would use it is to only checkpoint anything from the operator
-   * with subtask index 0 because we assume that the state is the same on all
-   * parallel instances of the operator.
-   *
-   * <p>2. Use map to support namespace.
-   */
-  private abstract class AbstractBroadcastState<T> {
-
-    private String name;
-    private final StateNamespace namespace;
-    private final ListStateDescriptor<Map<String, T>> flinkStateDescriptor;
-    private final DefaultOperatorStateBackend flinkStateBackend;
-
-    AbstractBroadcastState(
-        DefaultOperatorStateBackend flinkStateBackend,
-        String name,
-        StateNamespace namespace,
-        Coder<T> coder) {
-      this.name = name;
-
-      this.namespace = namespace;
-      this.flinkStateBackend = flinkStateBackend;
-
-      CoderTypeInformation<Map<String, T>> typeInfo =
-          new CoderTypeInformation<>(MapCoder.of(StringUtf8Coder.of(), coder));
-
-      flinkStateDescriptor = new ListStateDescriptor<>(name,
-          typeInfo.createSerializer(new ExecutionConfig()));
-    }
-
-    /**
-     * Get map(namespce->T) from index 0.
-     */
-    Map<String, T> getMap() throws Exception {
-      if (indexInSubtaskGroup == 0) {
-        return getMapFromBroadcastState();
-      } else {
-        Map<String, T> result = (Map<String, T>) stateForNonZeroOperator.get(name);
-        // maybe restore from BroadcastState of Operator-0
-        if (result == null) {
-          result = getMapFromBroadcastState();
-          if (result != null) {
-            stateForNonZeroOperator.put(name, result);
-            // we don't need it anymore, must clear it.
-            flinkStateBackend.getBroadcastOperatorState(
-                flinkStateDescriptor).clear();
-          }
-        }
-        return result;
-      }
-    }
-
-    Map<String, T> getMapFromBroadcastState() throws Exception {
-      ListState<Map<String, T>> state = flinkStateBackend.getBroadcastOperatorState(
-          flinkStateDescriptor);
-      Iterable<Map<String, T>> iterable = state.get();
-      Map<String, T> ret = null;
-      if (iterable != null) {
-        // just use index 0
-        Iterator<Map<String, T>> iterator = iterable.iterator();
-        if (iterator.hasNext()) {
-          ret = iterator.next();
-        }
-      }
-      return ret;
-    }
-
-    /**
-     * Update map(namespce->T) from index 0.
-     */
-    void updateMap(Map<String, T> map) throws Exception {
-      if (indexInSubtaskGroup == 0) {
-        ListState<Map<String, T>> state = flinkStateBackend.getBroadcastOperatorState(
-            flinkStateDescriptor);
-        state.clear();
-        if (map.size() > 0) {
-          state.add(map);
-        }
-      } else {
-        if (map.size() == 0) {
-          stateForNonZeroOperator.remove(name);
-          // updateMap is always behind getMap,
-          // getMap will clear map in BroadcastOperatorState,
-          // we don't need clear here.
-        } else {
-          stateForNonZeroOperator.put(name, map);
-        }
-      }
-    }
-
-    void writeInternal(T input) {
-      try {
-        Map<String, T> map = getMap();
-        if (map == null) {
-          map = new HashMap<>();
-        }
-        map.put(namespace.stringKey(), input);
-        updateMap(map);
-      } catch (Exception e) {
-        throw new RuntimeException("Error updating state.", e);
-      }
-    }
-
-    T readInternal() {
-      try {
-        Map<String, T> map = getMap();
-        if (map == null) {
-          return null;
-        } else {
-          return map.get(namespace.stringKey());
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    void clearInternal() {
-      try {
-        Map<String, T> map = getMap();
-        if (map != null) {
-          map.remove(namespace.stringKey());
-          updateMap(map);
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Error clearing state.", e);
-      }
-    }
-
-  }
-
-  private class FlinkBroadcastValueState<K, T>
-      extends AbstractBroadcastState<T> implements ValueState<T> {
-
-    private final StateNamespace namespace;
-    private final StateTag<? super K, ValueState<T>> address;
-
-    FlinkBroadcastValueState(
-        DefaultOperatorStateBackend flinkStateBackend,
-        StateTag<? super K, ValueState<T>> address,
-        StateNamespace namespace,
-        Coder<T> coder) {
-      super(flinkStateBackend, address.getId(), namespace, coder);
-
-      this.namespace = namespace;
-      this.address = address;
-
-    }
-
-    @Override
-    public void write(T input) {
-      writeInternal(input);
-    }
-
-    @Override
-    public ValueState<T> readLater() {
-      return this;
-    }
-
-    @Override
-    public T read() {
-      return readInternal();
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkBroadcastValueState<?, ?> that = (FlinkBroadcastValueState<?, ?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-
-    @Override
-    public void clear() {
-      clearInternal();
-    }
-  }
-
-  private class FlinkBroadcastBagState<K, T> extends AbstractBroadcastState<List<T>>
-      implements BagState<T> {
-
-    private final StateNamespace namespace;
-    private final StateTag<? super K, BagState<T>> address;
-
-    FlinkBroadcastBagState(
-        DefaultOperatorStateBackend flinkStateBackend,
-        StateTag<? super K, BagState<T>> address,
-        StateNamespace namespace,
-        Coder<T> coder) {
-      super(flinkStateBackend, address.getId(), namespace, ListCoder.of(coder));
-
-      this.namespace = namespace;
-      this.address = address;
-    }
-
-    @Override
-    public void add(T input) {
-      List<T> list = readInternal();
-      if (list == null) {
-        list = new ArrayList<>();
-      }
-      list.add(input);
-      writeInternal(list);
-    }
-
-    @Override
-    public BagState<T> readLater() {
-      return this;
-    }
-
-    @Override
-    public Iterable<T> read() {
-      List<T> result = readInternal();
-      return result != null ? result : Collections.<T>emptyList();
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public Boolean read() {
-          try {
-            List<T> result = readInternal();
-            return result == null;
-          } catch (Exception e) {
-            throw new RuntimeException("Error reading state.", e);
-          }
-
-        }
-
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-      };
-    }
-
-    @Override
-    public void clear() {
-      clearInternal();
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkBroadcastBagState<?, ?> that = (FlinkBroadcastBagState<?, ?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-  }
-
-  private class FlinkCombiningState<K, InputT, AccumT, OutputT>
-      extends AbstractBroadcastState<AccumT>
-      implements CombiningState<InputT, AccumT, OutputT> {
-
-    private final StateNamespace namespace;
-    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
-    private final Combine.CombineFn<InputT, AccumT, OutputT> combineFn;
-
-    FlinkCombiningState(
-        DefaultOperatorStateBackend flinkStateBackend,
-        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-        Combine.CombineFn<InputT, AccumT, OutputT> combineFn,
-        StateNamespace namespace,
-        Coder<AccumT> accumCoder) {
-      super(flinkStateBackend, address.getId(), namespace, accumCoder);
-
-      this.namespace = namespace;
-      this.address = address;
-      this.combineFn = combineFn;
-    }
-
-    @Override
-    public CombiningState<InputT, AccumT, OutputT> readLater() {
-      return this;
-    }
-
-    @Override
-    public void add(InputT value) {
-      AccumT current = readInternal();
-      if (current == null) {
-        current = combineFn.createAccumulator();
-      }
-      current = combineFn.addInput(current, value);
-      writeInternal(current);
-    }
-
-    @Override
-    public void addAccum(AccumT accum) {
-      AccumT current = readInternal();
-
-      if (current == null) {
-        writeInternal(accum);
-      } else {
-        current = combineFn.mergeAccumulators(Arrays.asList(current, accum));
-        writeInternal(current);
-      }
-    }
-
-    @Override
-    public AccumT getAccum() {
-      return readInternal();
-    }
-
-    @Override
-    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
-      return combineFn.mergeAccumulators(accumulators);
-    }
-
-    @Override
-    public OutputT read() {
-      AccumT accum = readInternal();
-      if (accum != null) {
-        return combineFn.extractOutput(accum);
-      } else {
-        return combineFn.extractOutput(combineFn.createAccumulator());
-      }
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public Boolean read() {
-          try {
-            return readInternal() == null;
-          } catch (Exception e) {
-            throw new RuntimeException("Error reading state.", e);
-          }
-
-        }
-
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-      };
-    }
-
-    @Override
-    public void clear() {
-      clearInternal();
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkCombiningState<?, ?, ?, ?> that =
-          (FlinkCombiningState<?, ?, ?, ?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-  }
-
-  private class FlinkKeyedCombiningState<K, InputT, AccumT, OutputT>
-      extends AbstractBroadcastState<AccumT>
-      implements CombiningState<InputT, AccumT, OutputT> {
-
-    private final StateNamespace namespace;
-    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
-    private final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn;
-    private final FlinkBroadcastStateInternals<K> flinkStateInternals;
-
-    FlinkKeyedCombiningState(
-        DefaultOperatorStateBackend flinkStateBackend,
-        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-        Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn,
-        StateNamespace namespace,
-        Coder<AccumT> accumCoder,
-        FlinkBroadcastStateInternals<K> flinkStateInternals) {
-      super(flinkStateBackend, address.getId(), namespace, accumCoder);
-
-      this.namespace = namespace;
-      this.address = address;
-      this.combineFn = combineFn;
-      this.flinkStateInternals = flinkStateInternals;
-
-    }
-
-    @Override
-    public CombiningState<InputT, AccumT, OutputT> readLater() {
-      return this;
-    }
-
-    @Override
-    public void add(InputT value) {
-      try {
-        AccumT current = readInternal();
-        if (current == null) {
-          current = combineFn.createAccumulator(flinkStateInternals.getKey());
-        }
-        current = combineFn.addInput(flinkStateInternals.getKey(), current, value);
-        writeInternal(current);
-      } catch (Exception e) {
-        throw new RuntimeException("Error adding to state." , e);
-      }
-    }
-
-    @Override
-    public void addAccum(AccumT accum) {
-      try {
-        AccumT current = readInternal();
-        if (current == null) {
-          writeInternal(accum);
-        } else {
-          current = combineFn.mergeAccumulators(
-              flinkStateInternals.getKey(),
-              Arrays.asList(current, accum));
-          writeInternal(current);
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Error adding to state.", e);
-      }
-    }
-
-    @Override
-    public AccumT getAccum() {
-      try {
-        return readInternal();
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
-      return combineFn.mergeAccumulators(flinkStateInternals.getKey(), accumulators);
-    }
-
-    @Override
-    public OutputT read() {
-      try {
-        AccumT accum = readInternal();
-        if (accum != null) {
-          return combineFn.extractOutput(flinkStateInternals.getKey(), accum);
-        } else {
-          return combineFn.extractOutput(
-              flinkStateInternals.getKey(),
-              combineFn.createAccumulator(flinkStateInternals.getKey()));
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public Boolean read() {
-          try {
-            return readInternal() == null;
-          } catch (Exception e) {
-            throw new RuntimeException("Error reading state.", e);
-          }
-
-        }
-
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-      };
-    }
-
-    @Override
-    public void clear() {
-      clearInternal();
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkKeyedCombiningState<?, ?, ?, ?> that =
-          (FlinkKeyedCombiningState<?, ?, ?, ?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-  }
-
-  private class FlinkCombiningStateWithContext<K, InputT, AccumT, OutputT>
-      extends AbstractBroadcastState<AccumT>
-      implements CombiningState<InputT, AccumT, OutputT> {
-
-    private final StateNamespace namespace;
-    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
-    private final CombineWithContext.KeyedCombineFnWithContext<
-        ? super K, InputT, AccumT, OutputT> combineFn;
-    private final FlinkBroadcastStateInternals<K> flinkStateInternals;
-    private final CombineWithContext.Context context;
-
-    FlinkCombiningStateWithContext(
-        DefaultOperatorStateBackend flinkStateBackend,
-        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
-        CombineWithContext.KeyedCombineFnWithContext<
-            ? super K, InputT, AccumT, OutputT> combineFn,
-        StateNamespace namespace,
-        Coder<AccumT> accumCoder,
-        FlinkBroadcastStateInternals<K> flinkStateInternals,
-        CombineWithContext.Context context) {
-      super(flinkStateBackend, address.getId(), namespace, accumCoder);
-
-      this.namespace = namespace;
-      this.address = address;
-      this.combineFn = combineFn;
-      this.flinkStateInternals = flinkStateInternals;
-      this.context = context;
-
-    }
-
-    @Override
-    public CombiningState<InputT, AccumT, OutputT> readLater() {
-      return this;
-    }
-
-    @Override
-    public void add(InputT value) {
-      try {
-        AccumT current = readInternal();
-        if (current == null) {
-          current = combineFn.createAccumulator(flinkStateInternals.getKey(), context);
-        }
-        current = combineFn.addInput(flinkStateInternals.getKey(), current, value, context);
-        writeInternal(current);
-      } catch (Exception e) {
-        throw new RuntimeException("Error adding to state." , e);
-      }
-    }
-
-    @Override
-    public void addAccum(AccumT accum) {
-      try {
-
-        AccumT current = readInternal();
-        if (current == null) {
-          writeInternal(accum);
-        } else {
-          current = combineFn.mergeAccumulators(
-              flinkStateInternals.getKey(),
-              Arrays.asList(current, accum),
-              context);
-          writeInternal(current);
-        }
-      } catch (Exception e) {
-        throw new RuntimeException("Error adding to state.", e);
-      }
-    }
-
-    @Override
-    public AccumT getAccum() {
-      try {
-        return readInternal();
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
-      return combineFn.mergeAccumulators(flinkStateInternals.getKey(), accumulators, context);
-    }
-
-    @Override
-    public OutputT read() {
-      try {
-        AccumT accum = readInternal();
-        return combineFn.extractOutput(flinkStateInternals.getKey(), accum, context);
-      } catch (Exception e) {
-        throw new RuntimeException("Error reading state.", e);
-      }
-    }
-
-    @Override
-    public ReadableState<Boolean> isEmpty() {
-      return new ReadableState<Boolean>() {
-        @Override
-        public Boolean read() {
-          try {
-            return readInternal() == null;
-          } catch (Exception e) {
-            throw new RuntimeException("Error reading state.", e);
-          }
-
-        }
-
-        @Override
-        public ReadableState<Boolean> readLater() {
-          return this;
-        }
-      };
-    }
-
-    @Override
-    public void clear() {
-      clearInternal();
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) {
-        return true;
-      }
-      if (o == null || getClass() != o.getClass()) {
-        return false;
-      }
-
-      FlinkCombiningStateWithContext<?, ?, ?, ?> that =
-          (FlinkCombiningStateWithContext<?, ?, ?, ?>) o;
-
-      return namespace.equals(that.namespace) && address.equals(that.address);
-
-    }
-
-    @Override
-    public int hashCode() {
-      int result = namespace.hashCode();
-      result = 31 * result + address.hashCode();
-      return result;
-    }
-  }
-
-}

[30/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java
new file mode 100644
index 0000000..123d5e7
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java
@@ -0,0 +1,1044 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.flink;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import org.apache.beam.runners.core.ElementAndRestriction;
+import org.apache.beam.runners.core.KeyedWorkItem;
+import org.apache.beam.runners.core.SplittableParDo;
+import org.apache.beam.runners.core.SystemReduceFn;
+import org.apache.beam.runners.flink.translation.functions.FlinkAssignWindows;
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.KvToByteBufferKeySelector;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.SingletonKeyedWorkItem;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.SingletonKeyedWorkItemCoder;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.SplittableDoFnOperator;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.WindowDoFnOperator;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.WorkItemKeySelector;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.io.BoundedSourceWrapper;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.VoidCoder;
+import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.Flatten;
+import org.apache.beam.sdk.transforms.GroupByKey;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.transforms.join.UnionCoder;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
+import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
+import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.Window;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.AppliedCombineFn;
+import org.apache.beam.sdk.util.Reshuffle;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.functions.RichFlatMapFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.core.fs.FileSystem;
+import org.apache.flink.streaming.api.collector.selector.OutputSelector;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.datastream.DataStreamSink;
+import org.apache.flink.streaming.api.datastream.DataStreamSource;
+import org.apache.flink.streaming.api.datastream.KeyedStream;
+import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
+import org.apache.flink.streaming.api.datastream.SplitStream;
+import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
+import org.apache.flink.streaming.api.operators.TwoInputStreamOperator;
+import org.apache.flink.streaming.api.transformations.TwoInputTransformation;
+import org.apache.flink.util.Collector;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This class contains all the mappings between Beam and Flink
+ * <b>streaming</b> transformations. The {@link FlinkStreamingPipelineTranslator}
+ * traverses the Beam job and comes here to translate the encountered Beam transformations
+ * into Flink one, based on the mapping available in this class.
+ */
+class FlinkStreamingTransformTranslators {
+
+  // --------------------------------------------------------------------------------------------
+  //  Transform Translator Registry
+  // --------------------------------------------------------------------------------------------
+
+  @SuppressWarnings("rawtypes")
+  private static final Map<
+      Class<? extends PTransform>,
+      FlinkStreamingPipelineTranslator.StreamTransformTranslator> TRANSLATORS = new HashMap<>();
+
+  // here you can find all the available translators.
+  static {
+    TRANSLATORS.put(Read.Bounded.class, new BoundedReadSourceTranslator());
+    TRANSLATORS.put(Read.Unbounded.class, new UnboundedReadSourceTranslator());
+    TRANSLATORS.put(TextIO.Write.Bound.class, new TextIOWriteBoundStreamingTranslator());
+
+    TRANSLATORS.put(ParDo.MultiOutput.class, new ParDoStreamingTranslator());
+    TRANSLATORS.put(
+        SplittableParDo.ProcessElements.class, new SplittableProcessElementsStreamingTranslator());
+    TRANSLATORS.put(
+        SplittableParDo.GBKIntoKeyedWorkItems.class, new GBKIntoKeyedWorkItemsTranslator());
+
+
+    TRANSLATORS.put(Window.Assign.class, new WindowAssignTranslator());
+    TRANSLATORS.put(Flatten.PCollections.class, new FlattenPCollectionTranslator());
+    TRANSLATORS.put(
+        FlinkStreamingViewOverrides.CreateFlinkPCollectionView.class,
+        new CreateViewStreamingTranslator());
+
+    TRANSLATORS.put(Reshuffle.class, new ReshuffleTranslatorStreaming());
+    TRANSLATORS.put(GroupByKey.class, new GroupByKeyTranslator());
+    TRANSLATORS.put(Combine.PerKey.class, new CombinePerKeyTranslator());
+  }
+
+  public static FlinkStreamingPipelineTranslator.StreamTransformTranslator<?> getTranslator(
+      PTransform<?, ?> transform) {
+    return TRANSLATORS.get(transform.getClass());
+  }
+
+  // --------------------------------------------------------------------------------------------
+  //  Transformation Implementations
+  // --------------------------------------------------------------------------------------------
+
+  private static class TextIOWriteBoundStreamingTranslator
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<TextIO.Write.Bound> {
+
+    private static final Logger LOG =
+        LoggerFactory.getLogger(TextIOWriteBoundStreamingTranslator.class);
+
+    @Override
+    public void translateNode(
+        TextIO.Write.Bound transform,
+        FlinkStreamingTranslationContext context) {
+      PValue input = context.getInput(transform);
+      DataStream<WindowedValue<String>> inputDataStream = context.getInputDataStream(input);
+
+      String filenamePrefix = transform.getFilenamePrefix();
+      String filenameSuffix = transform.getFilenameSuffix();
+      boolean needsValidation = transform.needsValidation();
+      int numShards = transform.getNumShards();
+      String shardNameTemplate = transform.getShardNameTemplate();
+
+      // TODO: Implement these. We need Flink support for this.
+      LOG.warn(
+          "Translation of TextIO.Write.needsValidation not yet supported. Is: {}.",
+          needsValidation);
+      LOG.warn(
+          "Translation of TextIO.Write.filenameSuffix not yet supported. Is: {}.",
+          filenameSuffix);
+      LOG.warn(
+          "Translation of TextIO.Write.shardNameTemplate not yet supported. Is: {}.",
+          shardNameTemplate);
+
+      DataStream<String> dataSink = inputDataStream
+          .flatMap(new FlatMapFunction<WindowedValue<String>, String>() {
+            @Override
+            public void flatMap(
+                WindowedValue<String> value,
+                Collector<String> out)
+                throws Exception {
+              out.collect(value.getValue());
+            }
+          });
+      DataStreamSink<String> output =
+          dataSink.writeAsText(filenamePrefix, FileSystem.WriteMode.OVERWRITE);
+
+      if (numShards > 0) {
+        output.setParallelism(numShards);
+      }
+    }
+  }
+
+  private static class UnboundedReadSourceTranslator<T>
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<Read.Unbounded<T>> {
+
+    @Override
+    public void translateNode(
+        Read.Unbounded<T> transform,
+        FlinkStreamingTranslationContext context) {
+      PCollection<T> output = context.getOutput(transform);
+
+      TypeInformation<WindowedValue<T>> outputTypeInfo =
+          context.getTypeInfo(context.getOutput(transform));
+
+      DataStream<WindowedValue<T>> source;
+      try {
+        UnboundedSourceWrapper<T, ?> sourceWrapper =
+            new UnboundedSourceWrapper<>(
+                context.getPipelineOptions(),
+                transform.getSource(),
+                context.getExecutionEnvironment().getParallelism());
+        source = context
+            .getExecutionEnvironment()
+            .addSource(sourceWrapper).name(transform.getName()).returns(outputTypeInfo);
+      } catch (Exception e) {
+        throw new RuntimeException(
+            "Error while translating UnboundedSource: " + transform.getSource(), e);
+      }
+
+      context.setOutputDataStream(output, source);
+    }
+  }
+
+  private static class BoundedReadSourceTranslator<T>
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<Read.Bounded<T>> {
+
+    @Override
+    public void translateNode(
+        Read.Bounded<T> transform,
+        FlinkStreamingTranslationContext context) {
+      PCollection<T> output = context.getOutput(transform);
+
+      TypeInformation<WindowedValue<T>> outputTypeInfo =
+          context.getTypeInfo(context.getOutput(transform));
+
+
+      DataStream<WindowedValue<T>> source;
+      try {
+        BoundedSourceWrapper<T> sourceWrapper =
+            new BoundedSourceWrapper<>(
+                context.getPipelineOptions(),
+                transform.getSource(),
+                context.getExecutionEnvironment().getParallelism());
+        source = context
+            .getExecutionEnvironment()
+            .addSource(sourceWrapper).name(transform.getName()).returns(outputTypeInfo);
+      } catch (Exception e) {
+        throw new RuntimeException(
+            "Error while translating BoundedSource: " + transform.getSource(), e);
+      }
+
+      context.setOutputDataStream(output, source);
+    }
+  }
+
+  /**
+   * Wraps each element in a {@link RawUnionValue} with the given tag id.
+   */
+  private static class ToRawUnion<T> implements MapFunction<T, RawUnionValue> {
+    private final int intTag;
+
+    public ToRawUnion(int intTag) {
+      this.intTag = intTag;
+    }
+
+    @Override
+    public RawUnionValue map(T o) throws Exception {
+      return new RawUnionValue(intTag, o);
+    }
+  }
+
+  private static Tuple2<Map<Integer, PCollectionView<?>>, DataStream<RawUnionValue>>
+        transformSideInputs(
+          Collection<PCollectionView<?>> sideInputs,
+          FlinkStreamingTranslationContext context) {
+
+    // collect all side inputs
+    Map<TupleTag<?>, Integer> tagToIntMapping = new HashMap<>();
+    Map<Integer, PCollectionView<?>> intToViewMapping = new HashMap<>();
+    int count = 0;
+    for (PCollectionView<?> sideInput: sideInputs) {
+      TupleTag<?> tag = sideInput.getTagInternal();
+      intToViewMapping.put(count, sideInput);
+      tagToIntMapping.put(tag, count);
+      count++;
+      Coder<Iterable<WindowedValue<?>>> coder = sideInput.getCoderInternal();
+    }
+
+
+    List<Coder<?>> inputCoders = new ArrayList<>();
+    for (PCollectionView<?> sideInput: sideInputs) {
+      DataStream<Object> sideInputStream = context.getInputDataStream(sideInput);
+      TypeInformation<Object> tpe = sideInputStream.getType();
+      if (!(tpe instanceof CoderTypeInformation)) {
+        throw new IllegalStateException(
+            "Input Stream TypeInformation is no CoderTypeInformation.");
+      }
+
+      Coder<?> coder = ((CoderTypeInformation) tpe).getCoder();
+      inputCoders.add(coder);
+    }
+
+    UnionCoder unionCoder = UnionCoder.of(inputCoders);
+
+    CoderTypeInformation<RawUnionValue> unionTypeInformation =
+        new CoderTypeInformation<>(unionCoder);
+
+    // transform each side input to RawUnionValue and union them
+    DataStream<RawUnionValue> sideInputUnion = null;
+
+    for (PCollectionView<?> sideInput: sideInputs) {
+      TupleTag<?> tag = sideInput.getTagInternal();
+      final int intTag = tagToIntMapping.get(tag);
+      DataStream<Object> sideInputStream = context.getInputDataStream(sideInput);
+      DataStream<RawUnionValue> unionValueStream =
+          sideInputStream.map(new ToRawUnion<>(intTag)).returns(unionTypeInformation);
+
+      if (sideInputUnion == null) {
+        sideInputUnion = unionValueStream;
+      } else {
+        sideInputUnion = sideInputUnion.union(unionValueStream);
+      }
+    }
+
+    if (sideInputUnion == null) {
+      throw new IllegalStateException("No unioned side inputs, this indicates a bug.");
+    }
+
+    return new Tuple2<>(intToViewMapping, sideInputUnion);
+  }
+
+  /**
+   * Helper for translating {@link ParDo.MultiOutput} and {@link SplittableParDo.ProcessElements}.
+   */
+  static class ParDoTranslationHelper {
+
+    interface DoFnOperatorFactory<InputT, OutputT> {
+      DoFnOperator<InputT, OutputT, RawUnionValue> createDoFnOperator(
+          DoFn<InputT, OutputT> doFn,
+          List<PCollectionView<?>> sideInputs,
+          TupleTag<OutputT> mainOutputTag,
+          List<TupleTag<?>> additionalOutputTags,
+          FlinkStreamingTranslationContext context,
+          WindowingStrategy<?, ?> windowingStrategy,
+          Map<TupleTag<?>, Integer> tagsToLabels,
+          Coder<WindowedValue<InputT>> inputCoder,
+          Coder keyCoder,
+          Map<Integer, PCollectionView<?>> transformedSideInputs);
+    }
+
+    static <InputT, OutputT> void translateParDo(
+        String transformName,
+        DoFn<InputT, OutputT> doFn,
+        PCollection<InputT> input,
+        List<PCollectionView<?>> sideInputs,
+        Map<TupleTag<?>, PValue> outputs,
+        TupleTag<OutputT> mainOutputTag,
+        List<TupleTag<?>> additionalOutputTags,
+        FlinkStreamingTranslationContext context,
+        DoFnOperatorFactory<InputT, OutputT> doFnOperatorFactory) {
+
+      // we assume that the transformation does not change the windowing strategy.
+      WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
+
+      Map<TupleTag<?>, Integer> tagsToLabels =
+          transformTupleTagsToLabels(mainOutputTag, outputs);
+
+      SingleOutputStreamOperator<RawUnionValue> unionOutputStream;
+
+      Coder<WindowedValue<InputT>> inputCoder = context.getCoder(input);
+
+      DataStream<WindowedValue<InputT>> inputDataStream = context.getInputDataStream(input);
+
+      Coder keyCoder = null;
+      boolean stateful = false;
+      DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
+      if (signature.stateDeclarations().size() > 0
+          || signature.timerDeclarations().size() > 0) {
+        // Based on the fact that the signature is stateful, DoFnSignatures ensures
+        // that it is also keyed
+        keyCoder = ((KvCoder) input.getCoder()).getKeyCoder();
+        inputDataStream = inputDataStream.keyBy(new KvToByteBufferKeySelector(keyCoder));
+        stateful = true;
+      } else if (doFn instanceof SplittableParDo.ProcessFn) {
+        // we know that it is keyed on String
+        keyCoder = StringUtf8Coder.of();
+        stateful = true;
+      }
+
+      if (sideInputs.isEmpty()) {
+        DoFnOperator<InputT, OutputT, RawUnionValue> doFnOperator =
+            doFnOperatorFactory.createDoFnOperator(
+                doFn,
+                sideInputs,
+                mainOutputTag,
+                additionalOutputTags,
+                context,
+                windowingStrategy,
+                tagsToLabels,
+                inputCoder,
+                keyCoder,
+                new HashMap<Integer, PCollectionView<?>>() /* side-input mapping */);
+
+        UnionCoder outputUnionCoder = createUnionCoder(outputs);
+
+        CoderTypeInformation<RawUnionValue> outputUnionTypeInformation =
+            new CoderTypeInformation<>(outputUnionCoder);
+
+        unionOutputStream = inputDataStream
+            .transform(transformName, outputUnionTypeInformation, doFnOperator);
+
+      } else {
+        Tuple2<Map<Integer, PCollectionView<?>>, DataStream<RawUnionValue>> transformedSideInputs =
+            transformSideInputs(sideInputs, context);
+
+        DoFnOperator<InputT, OutputT, RawUnionValue> doFnOperator =
+            doFnOperatorFactory.createDoFnOperator(
+                doFn,
+                sideInputs,
+                mainOutputTag,
+                additionalOutputTags,
+                context,
+                windowingStrategy,
+                tagsToLabels,
+                inputCoder,
+                keyCoder,
+                transformedSideInputs.f0);
+
+        UnionCoder outputUnionCoder = createUnionCoder(outputs);
+
+        CoderTypeInformation<RawUnionValue> outputUnionTypeInformation =
+            new CoderTypeInformation<>(outputUnionCoder);
+
+        if (stateful) {
+          // we have to manually contruct the two-input transform because we're not
+          // allowed to have only one input keyed, normally.
+          KeyedStream keyedStream = (KeyedStream<?, InputT>) inputDataStream;
+          TwoInputTransformation<
+              WindowedValue<KV<?, InputT>>,
+              RawUnionValue,
+              WindowedValue<OutputT>> rawFlinkTransform = new TwoInputTransformation(
+              keyedStream.getTransformation(),
+              transformedSideInputs.f1.broadcast().getTransformation(),
+              transformName,
+              (TwoInputStreamOperator) doFnOperator,
+              outputUnionTypeInformation,
+              keyedStream.getParallelism());
+
+          rawFlinkTransform.setStateKeyType(keyedStream.getKeyType());
+          rawFlinkTransform.setStateKeySelectors(keyedStream.getKeySelector(), null);
+
+          unionOutputStream = new SingleOutputStreamOperator(
+                  keyedStream.getExecutionEnvironment(),
+                  rawFlinkTransform) {}; // we have to cheat around the ctor being protected
+
+          keyedStream.getExecutionEnvironment().addOperator(rawFlinkTransform);
+
+        } else {
+          unionOutputStream = inputDataStream
+              .connect(transformedSideInputs.f1.broadcast())
+              .transform(transformName, outputUnionTypeInformation, doFnOperator);
+        }
+      }
+
+      SplitStream<RawUnionValue> splitStream = unionOutputStream
+              .split(new OutputSelector<RawUnionValue>() {
+                @Override
+                public Iterable<String> select(RawUnionValue value) {
+                  return Collections.singletonList(Integer.toString(value.getUnionTag()));
+                }
+              });
+
+      for (Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
+        final int outputTag = tagsToLabels.get(output.getKey());
+
+        TypeInformation outputTypeInfo = context.getTypeInfo((PCollection<?>) output.getValue());
+
+        @SuppressWarnings("unchecked")
+        DataStream unwrapped = splitStream.select(String.valueOf(outputTag))
+          .flatMap(new FlatMapFunction<RawUnionValue, Object>() {
+            @Override
+            public void flatMap(RawUnionValue value, Collector<Object> out) throws Exception {
+              out.collect(value.getValue());
+            }
+          }).returns(outputTypeInfo);
+
+        context.setOutputDataStream(output.getValue(), unwrapped);
+      }
+    }
+
+    private static Map<TupleTag<?>, Integer> transformTupleTagsToLabels(
+        TupleTag<?> mainTag,
+        Map<TupleTag<?>, PValue> allTaggedValues) {
+
+      Map<TupleTag<?>, Integer> tagToLabelMap = Maps.newHashMap();
+      int count = 0;
+      tagToLabelMap.put(mainTag, count++);
+      for (TupleTag<?> key : allTaggedValues.keySet()) {
+        if (!tagToLabelMap.containsKey(key)) {
+          tagToLabelMap.put(key, count++);
+        }
+      }
+      return tagToLabelMap;
+    }
+
+    private static UnionCoder createUnionCoder(Map<TupleTag<?>, PValue> taggedCollections) {
+      List<Coder<?>> outputCoders = Lists.newArrayList();
+      for (PValue taggedColl : taggedCollections.values()) {
+        checkArgument(
+            taggedColl instanceof PCollection,
+            "A Union Coder can only be created for a Collection of Tagged %s. Got %s",
+            PCollection.class.getSimpleName(),
+            taggedColl.getClass().getSimpleName());
+        PCollection<?> coll = (PCollection<?>) taggedColl;
+        WindowedValue.FullWindowedValueCoder<?> windowedValueCoder =
+            WindowedValue.getFullCoder(
+                coll.getCoder(),
+                coll.getWindowingStrategy().getWindowFn().windowCoder());
+        outputCoders.add(windowedValueCoder);
+      }
+      return UnionCoder.of(outputCoders);
+    }
+  }
+
+  private static class ParDoStreamingTranslator<InputT, OutputT>
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
+      ParDo.MultiOutput<InputT, OutputT>> {
+
+    @Override
+    public void translateNode(
+        ParDo.MultiOutput<InputT, OutputT> transform,
+        FlinkStreamingTranslationContext context) {
+
+      ParDoTranslationHelper.translateParDo(
+          transform.getName(),
+          transform.getFn(),
+          (PCollection<InputT>) context.getInput(transform),
+          transform.getSideInputs(),
+          context.getOutputs(transform),
+          transform.getMainOutputTag(),
+          transform.getAdditionalOutputTags().getAll(),
+          context,
+          new ParDoTranslationHelper.DoFnOperatorFactory<InputT, OutputT>() {
+            @Override
+            public DoFnOperator<InputT, OutputT, RawUnionValue> createDoFnOperator(
+                DoFn<InputT, OutputT> doFn,
+                List<PCollectionView<?>> sideInputs,
+                TupleTag<OutputT> mainOutputTag,
+                List<TupleTag<?>> additionalOutputTags,
+                FlinkStreamingTranslationContext context,
+                WindowingStrategy<?, ?> windowingStrategy,
+                Map<TupleTag<?>, Integer> tagsToLabels,
+                Coder<WindowedValue<InputT>> inputCoder,
+                Coder keyCoder,
+                Map<Integer, PCollectionView<?>> transformedSideInputs) {
+              return new DoFnOperator<>(
+                  doFn,
+                  inputCoder,
+                  mainOutputTag,
+                  additionalOutputTags,
+                  new DoFnOperator.MultiOutputOutputManagerFactory(tagsToLabels),
+                  windowingStrategy,
+                  transformedSideInputs,
+                  sideInputs,
+                  context.getPipelineOptions(),
+                  keyCoder);
+            }
+          });
+    }
+  }
+
+  private static class SplittableProcessElementsStreamingTranslator<
+      InputT, OutputT, RestrictionT, TrackerT extends RestrictionTracker<RestrictionT>>
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
+      SplittableParDo.ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> {
+
+    @Override
+    public void translateNode(
+        SplittableParDo.ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform,
+        FlinkStreamingTranslationContext context) {
+
+      ParDoTranslationHelper.translateParDo(
+          transform.getName(),
+          transform.newProcessFn(transform.getFn()),
+          (PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>)
+              context.getInput(transform),
+          transform.getSideInputs(),
+          context.getOutputs(transform),
+          transform.getMainOutputTag(),
+          transform.getAdditionalOutputTags().getAll(),
+          context,
+          new ParDoTranslationHelper.DoFnOperatorFactory<
+              KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>() {
+            @Override
+            public DoFnOperator<
+                KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>,
+                OutputT,
+                RawUnionValue> createDoFnOperator(
+                    DoFn<
+                        KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>,
+                        OutputT> doFn,
+                    List<PCollectionView<?>> sideInputs,
+                    TupleTag<OutputT> mainOutputTag,
+                    List<TupleTag<?>> additionalOutputTags,
+                    FlinkStreamingTranslationContext context,
+                    WindowingStrategy<?, ?> windowingStrategy,
+                    Map<TupleTag<?>, Integer> tagsToLabels,
+                    Coder<
+                        WindowedValue<
+                            KeyedWorkItem<
+                                String,
+                                ElementAndRestriction<InputT, RestrictionT>>>> inputCoder,
+                    Coder keyCoder,
+                    Map<Integer, PCollectionView<?>> transformedSideInputs) {
+              return new SplittableDoFnOperator<>(
+                  doFn,
+                  inputCoder,
+                  mainOutputTag,
+                  additionalOutputTags,
+                  new DoFnOperator.MultiOutputOutputManagerFactory(tagsToLabels),
+                  windowingStrategy,
+                  transformedSideInputs,
+                  sideInputs,
+                  context.getPipelineOptions(),
+                  keyCoder);
+            }
+          });
+    }
+  }
+
+  private static class CreateViewStreamingTranslator<ElemT, ViewT>
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
+      FlinkStreamingViewOverrides.CreateFlinkPCollectionView<ElemT, ViewT>> {
+
+    @Override
+    public void translateNode(
+        FlinkStreamingViewOverrides.CreateFlinkPCollectionView<ElemT, ViewT> transform,
+        FlinkStreamingTranslationContext context) {
+      // just forward
+      DataStream<WindowedValue<List<ElemT>>> inputDataSet =
+          context.getInputDataStream(context.getInput(transform));
+
+      PCollectionView<ViewT> view = context.getOutput(transform);
+
+      context.setOutputDataStream(view, inputDataSet);
+    }
+  }
+
+  private static class WindowAssignTranslator<T>
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<Window.Assign<T>> {
+
+    @Override
+    public void translateNode(
+        Window.Assign<T> transform,
+        FlinkStreamingTranslationContext context) {
+
+      @SuppressWarnings("unchecked")
+      WindowingStrategy<T, BoundedWindow> windowingStrategy =
+          (WindowingStrategy<T, BoundedWindow>)
+              context.getOutput(transform).getWindowingStrategy();
+
+      TypeInformation<WindowedValue<T>> typeInfo =
+          context.getTypeInfo(context.getOutput(transform));
+
+      DataStream<WindowedValue<T>> inputDataStream =
+          context.getInputDataStream(context.getInput(transform));
+
+      WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn();
+
+      FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction =
+          new FlinkAssignWindows<>(windowFn);
+
+      SingleOutputStreamOperator<WindowedValue<T>> outputDataStream = inputDataStream
+          .flatMap(assignWindowsFunction)
+          .name(context.getOutput(transform).getName())
+          .returns(typeInfo);
+
+      context.setOutputDataStream(context.getOutput(transform), outputDataStream);
+    }
+  }
+
+  private static class ReshuffleTranslatorStreaming<K, InputT>
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<Reshuffle<K, InputT>> {
+
+    @Override
+    public void translateNode(
+        Reshuffle<K, InputT> transform,
+        FlinkStreamingTranslationContext context) {
+
+      DataStream<WindowedValue<KV<K, InputT>>> inputDataSet =
+          context.getInputDataStream(context.getInput(transform));
+
+      context.setOutputDataStream(context.getOutput(transform), inputDataSet.rebalance());
+
+    }
+  }
+
+
+  private static class GroupByKeyTranslator<K, InputT>
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<GroupByKey<K, InputT>> {
+
+    @Override
+    public void translateNode(
+        GroupByKey<K, InputT> transform,
+        FlinkStreamingTranslationContext context) {
+
+      PCollection<KV<K, InputT>> input = context.getInput(transform);
+
+      @SuppressWarnings("unchecked")
+      WindowingStrategy<?, BoundedWindow> windowingStrategy =
+          (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();
+
+      KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();
+
+      SingletonKeyedWorkItemCoder<K, InputT> workItemCoder = SingletonKeyedWorkItemCoder.of(
+          inputKvCoder.getKeyCoder(),
+          inputKvCoder.getValueCoder(),
+          input.getWindowingStrategy().getWindowFn().windowCoder());
+
+      DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);
+
+      WindowedValue.
+          FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>> windowedWorkItemCoder =
+          WindowedValue.getFullCoder(
+              workItemCoder,
+              input.getWindowingStrategy().getWindowFn().windowCoder());
+
+      CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
+          new CoderTypeInformation<>(windowedWorkItemCoder);
+
+      DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
+          inputDataStream
+              .flatMap(new ToKeyedWorkItem<K, InputT>())
+              .returns(workItemTypeInfo).name("ToKeyedWorkItem");
+
+      KeyedStream<
+          WindowedValue<
+              SingletonKeyedWorkItem<K, InputT>>, ByteBuffer> keyedWorkItemStream = workItemStream
+          .keyBy(new WorkItemKeySelector<K, InputT>(inputKvCoder.getKeyCoder()));
+
+      SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, BoundedWindow> reduceFn =
+          SystemReduceFn.buffering(inputKvCoder.getValueCoder());
+
+      TypeInformation<WindowedValue<KV<K, Iterable<InputT>>>> outputTypeInfo =
+          context.getTypeInfo(context.getOutput(transform));
+
+      DoFnOperator.DefaultOutputManagerFactory<
+            WindowedValue<KV<K, Iterable<InputT>>>> outputManagerFactory =
+          new DoFnOperator.DefaultOutputManagerFactory<>();
+
+      WindowDoFnOperator<K, InputT, Iterable<InputT>> doFnOperator =
+          new WindowDoFnOperator<>(
+              reduceFn,
+              (Coder) windowedWorkItemCoder,
+              new TupleTag<KV<K, Iterable<InputT>>>("main output"),
+              Collections.<TupleTag<?>>emptyList(),
+              outputManagerFactory,
+              windowingStrategy,
+              new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
+              Collections.<PCollectionView<?>>emptyList(), /* side inputs */
+              context.getPipelineOptions(),
+              inputKvCoder.getKeyCoder());
+
+      // our operator excepts WindowedValue<KeyedWorkItem> while our input stream
+      // is WindowedValue<SingletonKeyedWorkItem>, which is fine but Java doesn't like it ...
+      @SuppressWarnings("unchecked")
+      SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<InputT>>>> outDataStream =
+          keyedWorkItemStream
+              .transform(
+                  transform.getName(),
+                  outputTypeInfo,
+                  (OneInputStreamOperator) doFnOperator);
+
+      context.setOutputDataStream(context.getOutput(transform), outDataStream);
+
+    }
+  }
+
+  private static class CombinePerKeyTranslator<K, InputT, OutputT>
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
+      Combine.PerKey<K, InputT, OutputT>> {
+
+    @Override
+    boolean canTranslate(
+        Combine.PerKey<K, InputT, OutputT> transform,
+        FlinkStreamingTranslationContext context) {
+
+      // if we have a merging window strategy and side inputs we cannot
+      // translate as a proper combine. We have to group and then run the combine
+      // over the final grouped values.
+      PCollection<KV<K, InputT>> input = context.getInput(transform);
+
+      @SuppressWarnings("unchecked")
+      WindowingStrategy<?, BoundedWindow> windowingStrategy =
+          (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();
+
+      return windowingStrategy.getWindowFn().isNonMerging() || transform.getSideInputs().isEmpty();
+    }
+
+    @Override
+    public void translateNode(
+        Combine.PerKey<K, InputT, OutputT> transform,
+        FlinkStreamingTranslationContext context) {
+
+      PCollection<KV<K, InputT>> input = context.getInput(transform);
+
+      @SuppressWarnings("unchecked")
+      WindowingStrategy<?, BoundedWindow> windowingStrategy =
+          (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();
+
+      KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();
+
+      SingletonKeyedWorkItemCoder<K, InputT> workItemCoder = SingletonKeyedWorkItemCoder.of(
+          inputKvCoder.getKeyCoder(),
+          inputKvCoder.getValueCoder(),
+          input.getWindowingStrategy().getWindowFn().windowCoder());
+
+      DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);
+
+      WindowedValue.
+          FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>> windowedWorkItemCoder =
+            WindowedValue.getFullCoder(
+                workItemCoder,
+                input.getWindowingStrategy().getWindowFn().windowCoder());
+
+      CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
+          new CoderTypeInformation<>(windowedWorkItemCoder);
+
+      DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
+          inputDataStream
+              .flatMap(new ToKeyedWorkItem<K, InputT>())
+              .returns(workItemTypeInfo).name("ToKeyedWorkItem");
+
+      KeyedStream<
+            WindowedValue<
+                SingletonKeyedWorkItem<K, InputT>>, ByteBuffer> keyedWorkItemStream = workItemStream
+          .keyBy(new WorkItemKeySelector<K, InputT>(inputKvCoder.getKeyCoder()));
+
+      SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn = SystemReduceFn.combining(
+          inputKvCoder.getKeyCoder(),
+          AppliedCombineFn.withInputCoder(
+              transform.getFn(), input.getPipeline().getCoderRegistry(), inputKvCoder));
+
+      TypeInformation<WindowedValue<KV<K, OutputT>>> outputTypeInfo =
+          context.getTypeInfo(context.getOutput(transform));
+
+      List<PCollectionView<?>> sideInputs = transform.getSideInputs();
+
+      if (sideInputs.isEmpty()) {
+
+        WindowDoFnOperator<K, InputT, OutputT> doFnOperator =
+            new WindowDoFnOperator<>(
+                reduceFn,
+                (Coder) windowedWorkItemCoder,
+                new TupleTag<KV<K, OutputT>>("main output"),
+                Collections.<TupleTag<?>>emptyList(),
+                new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<K, OutputT>>>(),
+                windowingStrategy,
+                new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
+                Collections.<PCollectionView<?>>emptyList(), /* side inputs */
+                context.getPipelineOptions(),
+                inputKvCoder.getKeyCoder());
+
+        // our operator excepts WindowedValue<KeyedWorkItem> while our input stream
+        // is WindowedValue<SingletonKeyedWorkItem>, which is fine but Java doesn't like it ...
+        @SuppressWarnings("unchecked")
+        SingleOutputStreamOperator<WindowedValue<KV<K, OutputT>>> outDataStream =
+            keyedWorkItemStream.transform(
+                transform.getName(), outputTypeInfo, (OneInputStreamOperator) doFnOperator);
+
+        context.setOutputDataStream(context.getOutput(transform), outDataStream);
+      } else {
+        Tuple2<Map<Integer, PCollectionView<?>>, DataStream<RawUnionValue>> transformSideInputs =
+            transformSideInputs(sideInputs, context);
+
+        WindowDoFnOperator<K, InputT, OutputT> doFnOperator =
+            new WindowDoFnOperator<>(
+                reduceFn,
+                (Coder) windowedWorkItemCoder,
+                new TupleTag<KV<K, OutputT>>("main output"),
+                Collections.<TupleTag<?>>emptyList(),
+                new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<K, OutputT>>>(),
+                windowingStrategy,
+                transformSideInputs.f0,
+                sideInputs,
+                context.getPipelineOptions(),
+                inputKvCoder.getKeyCoder());
+
+        // we have to manually contruct the two-input transform because we're not
+        // allowed to have only one input keyed, normally.
+
+        TwoInputTransformation<
+            WindowedValue<SingletonKeyedWorkItem<K, InputT>>,
+            RawUnionValue,
+            WindowedValue<KV<K, OutputT>>> rawFlinkTransform = new TwoInputTransformation<>(
+            keyedWorkItemStream.getTransformation(),
+            transformSideInputs.f1.broadcast().getTransformation(),
+            transform.getName(),
+            (TwoInputStreamOperator) doFnOperator,
+            outputTypeInfo,
+            keyedWorkItemStream.getParallelism());
+
+        rawFlinkTransform.setStateKeyType(keyedWorkItemStream.getKeyType());
+        rawFlinkTransform.setStateKeySelectors(keyedWorkItemStream.getKeySelector(), null);
+
+        @SuppressWarnings({ "unchecked", "rawtypes" })
+        SingleOutputStreamOperator<WindowedValue<KV<K, OutputT>>> outDataStream =
+            new SingleOutputStreamOperator(
+                keyedWorkItemStream.getExecutionEnvironment(),
+                rawFlinkTransform) {}; // we have to cheat around the ctor being protected
+
+        keyedWorkItemStream.getExecutionEnvironment().addOperator(rawFlinkTransform);
+
+        context.setOutputDataStream(context.getOutput(transform), outDataStream);
+      }
+    }
+  }
+
+  private static class GBKIntoKeyedWorkItemsTranslator<K, InputT>
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
+      SplittableParDo.GBKIntoKeyedWorkItems<K, InputT>> {
+
+    @Override
+    boolean canTranslate(
+        SplittableParDo.GBKIntoKeyedWorkItems<K, InputT> transform,
+        FlinkStreamingTranslationContext context) {
+      return true;
+    }
+
+    @Override
+    public void translateNode(
+        SplittableParDo.GBKIntoKeyedWorkItems<K, InputT> transform,
+        FlinkStreamingTranslationContext context) {
+
+      PCollection<KV<K, InputT>> input = context.getInput(transform);
+
+      KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();
+
+      SingletonKeyedWorkItemCoder<K, InputT> workItemCoder = SingletonKeyedWorkItemCoder.of(
+          inputKvCoder.getKeyCoder(),
+          inputKvCoder.getValueCoder(),
+          input.getWindowingStrategy().getWindowFn().windowCoder());
+
+
+      WindowedValue.
+          FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>> windowedWorkItemCoder =
+          WindowedValue.getFullCoder(
+              workItemCoder,
+              input.getWindowingStrategy().getWindowFn().windowCoder());
+
+      CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
+          new CoderTypeInformation<>(windowedWorkItemCoder);
+
+      DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);
+
+      DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
+          inputDataStream
+              .flatMap(new ToKeyedWorkItem<K, InputT>())
+              .returns(workItemTypeInfo).name("ToKeyedWorkItem");
+
+      KeyedStream<
+          WindowedValue<
+              SingletonKeyedWorkItem<K, InputT>>, ByteBuffer> keyedWorkItemStream = workItemStream
+          .keyBy(new WorkItemKeySelector<K, InputT>(inputKvCoder.getKeyCoder()));
+
+      context.setOutputDataStream(context.getOutput(transform), keyedWorkItemStream);
+    }
+  }
+
+  private static class FlattenPCollectionTranslator<T>
+      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
+      Flatten.PCollections<T>> {
+
+    @Override
+    public void translateNode(
+        Flatten.PCollections<T> transform,
+        FlinkStreamingTranslationContext context) {
+      Map<TupleTag<?>, PValue> allInputs = context.getInputs(transform);
+
+      if (allInputs.isEmpty()) {
+
+        // create an empty dummy source to satisfy downstream operations
+        // we cannot create an empty source in Flink, therefore we have to
+        // add the flatMap that simply never forwards the single element
+        DataStreamSource<String> dummySource =
+            context.getExecutionEnvironment().fromElements("dummy");
+
+        DataStream<WindowedValue<T>> result = dummySource.flatMap(
+            new FlatMapFunction<String, WindowedValue<T>>() {
+              @Override
+              public void flatMap(
+                  String s,
+                  Collector<WindowedValue<T>> collector) throws Exception {
+                // never return anything
+              }
+            }).returns(
+            new CoderTypeInformation<>(
+                WindowedValue.getFullCoder(
+                    (Coder<T>) VoidCoder.of(),
+                    GlobalWindow.Coder.INSTANCE)));
+        context.setOutputDataStream(context.getOutput(transform), result);
+
+      } else {
+        DataStream<T> result = null;
+        for (PValue input : allInputs.values()) {
+          DataStream<T> current = context.getInputDataStream(input);
+          result = (result == null) ? current : result.union(current);
+        }
+        context.setOutputDataStream(context.getOutput(transform), result);
+      }
+    }
+  }
+
+  private static class ToKeyedWorkItem<K, InputT>
+      extends RichFlatMapFunction<
+      WindowedValue<KV<K, InputT>>,
+      WindowedValue<SingletonKeyedWorkItem<K, InputT>>> {
+
+    @Override
+    public void flatMap(
+        WindowedValue<KV<K, InputT>> inWithMultipleWindows,
+        Collector<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> out) throws Exception {
+
+      // we need to wrap each one work item per window for now
+      // since otherwise the PushbackSideInputRunner will not correctly
+      // determine whether side inputs are ready
+      //
+      // this is tracked as https://issues.apache.org/jira/browse/BEAM-1850
+      for (WindowedValue<KV<K, InputT>> in : inWithMultipleWindows.explodeWindows()) {
+        SingletonKeyedWorkItem<K, InputT> workItem =
+            new SingletonKeyedWorkItem<>(
+                in.getValue().getKey(),
+                in.withValue(in.getValue().getValue()));
+
+        out.collect(in.withValue(workItem));
+      }
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTranslationContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTranslationContext.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTranslationContext.java
new file mode 100644
index 0000000..1a943a3
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTranslationContext.java
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.common.collect.Iterables;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.AppliedPTransform;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PInput;
+import org.apache.beam.sdk.values.POutput;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+
+/**
+ * Helper for keeping track of which {@link DataStream DataStreams} map
+ * to which {@link PTransform PTransforms}.
+ */
+class FlinkStreamingTranslationContext {
+
+  private final StreamExecutionEnvironment env;
+  private final PipelineOptions options;
+
+  /**
+   * Keeps a mapping between the output value of the PTransform (in Dataflow) and the
+   * Flink Operator that produced it, after the translation of the correspondinf PTransform
+   * to its Flink equivalent.
+   * */
+  private final Map<PValue, DataStream<?>> dataStreams;
+
+  private AppliedPTransform<?, ?, ?> currentTransform;
+
+  public FlinkStreamingTranslationContext(StreamExecutionEnvironment env, PipelineOptions options) {
+    this.env = checkNotNull(env);
+    this.options = checkNotNull(options);
+    this.dataStreams = new HashMap<>();
+  }
+
+  public StreamExecutionEnvironment getExecutionEnvironment() {
+    return env;
+  }
+
+  public PipelineOptions getPipelineOptions() {
+    return options;
+  }
+
+  @SuppressWarnings("unchecked")
+  public <T> DataStream<T> getInputDataStream(PValue value) {
+    return (DataStream<T>) dataStreams.get(value);
+  }
+
+  public void setOutputDataStream(PValue value, DataStream<?> set) {
+    if (!dataStreams.containsKey(value)) {
+      dataStreams.put(value, set);
+    }
+  }
+
+  /**
+   * Sets the AppliedPTransform which carries input/output.
+   * @param currentTransform
+   */
+  public void setCurrentTransform(AppliedPTransform<?, ?, ?> currentTransform) {
+    this.currentTransform = currentTransform;
+  }
+
+  public <T> Coder<WindowedValue<T>> getCoder(PCollection<T> collection) {
+    Coder<T> valueCoder = collection.getCoder();
+
+    return WindowedValue.getFullCoder(
+        valueCoder,
+        collection.getWindowingStrategy().getWindowFn().windowCoder());
+  }
+
+  @SuppressWarnings("unchecked")
+  public <T> TypeInformation<WindowedValue<T>> getTypeInfo(PCollection<T> collection) {
+    Coder<T> valueCoder = collection.getCoder();
+    WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
+        WindowedValue.getFullCoder(
+            valueCoder,
+            collection.getWindowingStrategy().getWindowFn().windowCoder());
+
+    return new CoderTypeInformation<>(windowedValueCoder);
+  }
+
+
+  @SuppressWarnings("unchecked")
+  public <T extends PValue> T getInput(PTransform<T, ?> transform) {
+    return (T) Iterables.getOnlyElement(currentTransform.getInputs().values());
+  }
+
+  public <T extends PInput> Map<TupleTag<?>, PValue> getInputs(PTransform<T, ?> transform) {
+    return currentTransform.getInputs();
+  }
+
+  @SuppressWarnings("unchecked")
+  public <T extends PValue> T getOutput(PTransform<?, T> transform) {
+    return (T) Iterables.getOnlyElement(currentTransform.getOutputs().values());
+  }
+
+  public <OutputT extends POutput> Map<TupleTag<?>, PValue> getOutputs(
+      PTransform<?, OutputT> transform) {
+    return currentTransform.getOutputs();
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingViewOverrides.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingViewOverrides.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingViewOverrides.java
new file mode 100644
index 0000000..f955f2a
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingViewOverrides.java
@@ -0,0 +1,372 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderRegistry;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.View;
+import org.apache.beam.sdk.util.PCollectionViews;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionView;
+
+/**
+ * Flink streaming overrides for various view (side input) transforms.
+ */
+class FlinkStreamingViewOverrides {
+
+  /**
+   * Specialized implementation for
+   * {@link org.apache.beam.sdk.transforms.View.AsMap View.AsMap}
+   * for the Flink runner in streaming mode.
+   */
+  static class StreamingViewAsMap<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
+
+    private final transient FlinkRunner runner;
+
+    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
+    public StreamingViewAsMap(FlinkRunner runner, View.AsMap<K, V> transform) {
+      this.runner = runner;
+    }
+
+    @Override
+    public PCollectionView<Map<K, V>> expand(PCollection<KV<K, V>> input) {
+      PCollectionView<Map<K, V>> view =
+          PCollectionViews.mapView(
+              input,
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+      try {
+        inputCoder.getKeyCoder().verifyDeterministic();
+      } catch (Coder.NonDeterministicException e) {
+        runner.recordViewUsesNonDeterministicKeyCoder(this);
+      }
+
+      return input
+          .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
+          .apply(CreateFlinkPCollectionView.<KV<K, V>, Map<K, V>>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsMap";
+    }
+  }
+
+  /**
+   * Specialized expansion for {@link
+   * View.AsMultimap View.AsMultimap} for the
+   * Flink runner in streaming mode.
+   */
+  static class StreamingViewAsMultimap<K, V>
+      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
+
+    private final transient FlinkRunner runner;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
+    public StreamingViewAsMultimap(FlinkRunner runner, View.AsMultimap<K, V> transform) {
+      this.runner = runner;
+    }
+
+    @Override
+    public PCollectionView<Map<K, Iterable<V>>> expand(PCollection<KV<K, V>> input) {
+      PCollectionView<Map<K, Iterable<V>>> view =
+          PCollectionViews.multimapView(
+              input,
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
+      try {
+        inputCoder.getKeyCoder().verifyDeterministic();
+      } catch (Coder.NonDeterministicException e) {
+        runner.recordViewUsesNonDeterministicKeyCoder(this);
+      }
+
+      return input
+          .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
+          .apply(CreateFlinkPCollectionView.<KV<K, V>, Map<K, Iterable<V>>>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsMultimap";
+    }
+  }
+
+  /**
+   * Specialized implementation for
+   * {@link View.AsList View.AsList} for the
+   * Flink runner in streaming mode.
+   */
+  static class StreamingViewAsList<T>
+      extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
+    public StreamingViewAsList(FlinkRunner runner, View.AsList<T> transform) {}
+
+    @Override
+    public PCollectionView<List<T>> expand(PCollection<T> input) {
+      PCollectionView<List<T>> view =
+          PCollectionViews.listView(
+              input,
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      return input.apply(Combine.globally(new Concatenate<T>()).withoutDefaults())
+          .apply(CreateFlinkPCollectionView.<T, List<T>>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsList";
+    }
+  }
+
+  /**
+   * Specialized implementation for
+   * {@link View.AsIterable View.AsIterable} for the
+   * Flink runner in streaming mode.
+   */
+  static class StreamingViewAsIterable<T>
+      extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
+    public StreamingViewAsIterable(FlinkRunner runner, View.AsIterable<T> transform) { }
+
+    @Override
+    public PCollectionView<Iterable<T>> expand(PCollection<T> input) {
+      PCollectionView<Iterable<T>> view =
+          PCollectionViews.iterableView(
+              input,
+              input.getWindowingStrategy(),
+              input.getCoder());
+
+      return input.apply(Combine.globally(new Concatenate<T>()).withoutDefaults())
+          .apply(CreateFlinkPCollectionView.<T, Iterable<T>>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsIterable";
+    }
+  }
+
+  /**
+   * Specialized expansion for
+   * {@link View.AsSingleton View.AsSingleton} for the
+   * Flink runner in streaming mode.
+   */
+  static class StreamingViewAsSingleton<T>
+      extends PTransform<PCollection<T>, PCollectionView<T>> {
+    private View.AsSingleton<T> transform;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
+    public StreamingViewAsSingleton(FlinkRunner runner, View.AsSingleton<T> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PCollectionView<T> expand(PCollection<T> input) {
+      Combine.Globally<T, T> combine = Combine.globally(
+          new SingletonCombine<>(transform.hasDefaultValue(), transform.defaultValue()));
+      if (!transform.hasDefaultValue()) {
+        combine = combine.withoutDefaults();
+      }
+      return input.apply(combine.asSingletonView());
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingViewAsSingleton";
+    }
+
+    private static class SingletonCombine<T> extends Combine.BinaryCombineFn<T> {
+      private boolean hasDefaultValue;
+      private T defaultValue;
+
+      SingletonCombine(boolean hasDefaultValue, T defaultValue) {
+        this.hasDefaultValue = hasDefaultValue;
+        this.defaultValue = defaultValue;
+      }
+
+      @Override
+      public T apply(T left, T right) {
+        throw new IllegalArgumentException("PCollection with more than one element "
+            + "accessed as a singleton view. Consider using Combine.globally().asSingleton() to "
+            + "combine the PCollection into a single value");
+      }
+
+      @Override
+      public T identity() {
+        if (hasDefaultValue) {
+          return defaultValue;
+        } else {
+          throw new IllegalArgumentException(
+              "Empty PCollection accessed as a singleton view. "
+                  + "Consider setting withDefault to provide a default value");
+        }
+      }
+    }
+  }
+
+  static class StreamingCombineGloballyAsSingletonView<InputT, OutputT>
+      extends PTransform<PCollection<InputT>, PCollectionView<OutputT>> {
+    Combine.GloballyAsSingletonView<InputT, OutputT> transform;
+
+    /**
+     * Builds an instance of this class from the overridden transform.
+     */
+    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
+    public StreamingCombineGloballyAsSingletonView(
+        FlinkRunner runner,
+        Combine.GloballyAsSingletonView<InputT, OutputT> transform) {
+      this.transform = transform;
+    }
+
+    @Override
+    public PCollectionView<OutputT> expand(PCollection<InputT> input) {
+      PCollection<OutputT> combined =
+          input.apply(Combine.globally(transform.getCombineFn())
+              .withoutDefaults()
+              .withFanout(transform.getFanout()));
+
+      PCollectionView<OutputT> view = PCollectionViews.singletonView(
+          combined,
+          combined.getWindowingStrategy(),
+          transform.getInsertDefault(),
+          transform.getInsertDefault()
+              ? transform.getCombineFn().defaultValue() : null,
+          combined.getCoder());
+      return combined
+          .apply(ParDo.of(new WrapAsList<OutputT>()))
+          .apply(CreateFlinkPCollectionView.<OutputT, OutputT>of(view));
+    }
+
+    @Override
+    protected String getKindString() {
+      return "StreamingCombineGloballyAsSingletonView";
+    }
+  }
+
+  private static class WrapAsList<T> extends DoFn<T, List<T>> {
+    @ProcessElement
+    public void processElement(ProcessContext c) {
+      c.output(Collections.singletonList(c.element()));
+    }
+  }
+
+  /**
+   * Combiner that combines {@code T}s into a single {@code List<T>} containing all inputs.
+   *
+   * <p>For internal use by {@link StreamingViewAsMap}, {@link StreamingViewAsMultimap},
+   * {@link StreamingViewAsList}, {@link StreamingViewAsIterable}.
+   * They require the input {@link PCollection} fits in memory.
+   * For a large {@link PCollection} this is expected to crash!
+   *
+   * @param <T> the type of elements to concatenate.
+   */
+  private static class Concatenate<T> extends Combine.CombineFn<T, List<T>, List<T>> {
+    @Override
+    public List<T> createAccumulator() {
+      return new ArrayList<T>();
+    }
+
+    @Override
+    public List<T> addInput(List<T> accumulator, T input) {
+      accumulator.add(input);
+      return accumulator;
+    }
+
+    @Override
+    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
+      List<T> result = createAccumulator();
+      for (List<T> accumulator : accumulators) {
+        result.addAll(accumulator);
+      }
+      return result;
+    }
+
+    @Override
+    public List<T> extractOutput(List<T> accumulator) {
+      return accumulator;
+    }
+
+    @Override
+    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
+    }
+
+    @Override
+    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
+      return ListCoder.of(inputCoder);
+    }
+  }
+
+  /**
+   * Creates a primitive {@link PCollectionView}.
+   *
+   * <p>For internal use only by runner implementors.
+   *
+   * @param <ElemT> The type of the elements of the input PCollection
+   * @param <ViewT> The type associated with the {@link PCollectionView} used as a side input
+   */
+  public static class CreateFlinkPCollectionView<ElemT, ViewT>
+      extends PTransform<PCollection<List<ElemT>>, PCollectionView<ViewT>> {
+    private PCollectionView<ViewT> view;
+
+    private CreateFlinkPCollectionView(PCollectionView<ViewT> view) {
+      this.view = view;
+    }
+
+    public static <ElemT, ViewT> CreateFlinkPCollectionView<ElemT, ViewT> of(
+        PCollectionView<ViewT> view) {
+      return new CreateFlinkPCollectionView<>(view);
+    }
+
+    @Override
+    public PCollectionView<ViewT> expand(PCollection<List<ElemT>> input) {
+      return view;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/PipelineTranslationOptimizer.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/PipelineTranslationOptimizer.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/PipelineTranslationOptimizer.java
new file mode 100644
index 0000000..3acc3ea
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/PipelineTranslationOptimizer.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.runners.TransformHierarchy;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PValue;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Traverses the Pipeline to determine the {@link TranslationMode} for this pipeline.
+ */
+class PipelineTranslationOptimizer extends FlinkPipelineTranslator {
+
+  private static final Logger LOG = LoggerFactory.getLogger(PipelineTranslationOptimizer.class);
+
+  private TranslationMode translationMode;
+
+  private final FlinkPipelineOptions options;
+
+  public PipelineTranslationOptimizer(TranslationMode defaultMode, FlinkPipelineOptions options) {
+    this.translationMode = defaultMode;
+    this.options = options;
+  }
+
+  public TranslationMode getTranslationMode() {
+
+    // override user-specified translation mode
+    if (options.isStreaming()) {
+      return TranslationMode.STREAMING;
+    }
+
+    return translationMode;
+  }
+
+  @Override
+  public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) {
+    return CompositeBehavior.ENTER_TRANSFORM;
+  }
+
+  @Override
+  public void leaveCompositeTransform(TransformHierarchy.Node node) {}
+
+  @Override
+  public void visitPrimitiveTransform(TransformHierarchy.Node node) {
+    Class<? extends PTransform> transformClass = node.getTransform().getClass();
+    if (transformClass == Read.Unbounded.class) {
+      LOG.info("Found {}. Switching to streaming execution.", transformClass);
+      translationMode = TranslationMode.STREAMING;
+    }
+  }
+
+  @Override
+  public void visitValue(PValue value, TransformHierarchy.Node producer) {}
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/TestFlinkRunner.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/TestFlinkRunner.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/TestFlinkRunner.java
new file mode 100644
index 0000000..8f50105
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/TestFlinkRunner.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.Pipeline.PipelineExecutionException;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.PipelineOptionsValidator;
+import org.apache.beam.sdk.runners.PipelineRunner;
+import org.apache.beam.sdk.util.UserCodeException;
+
+/**
+ * Test Flink runner.
+ */
+public class TestFlinkRunner extends PipelineRunner<PipelineResult> {
+
+  private FlinkRunner delegate;
+
+  private TestFlinkRunner(FlinkPipelineOptions options) {
+    // We use [auto] for testing since this will make it pick up the Testing ExecutionEnvironment
+    options.setFlinkMaster("[auto]");
+    this.delegate = FlinkRunner.fromOptions(options);
+  }
+
+  public static TestFlinkRunner fromOptions(PipelineOptions options) {
+    FlinkPipelineOptions flinkOptions =
+        PipelineOptionsValidator.validate(FlinkPipelineOptions.class, options);
+    return new TestFlinkRunner(flinkOptions);
+  }
+
+  public static TestFlinkRunner create(boolean streaming) {
+    FlinkPipelineOptions flinkOptions = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
+    flinkOptions.setRunner(TestFlinkRunner.class);
+    flinkOptions.setStreaming(streaming);
+    return TestFlinkRunner.fromOptions(flinkOptions);
+  }
+
+  @Override
+  public PipelineResult run(Pipeline pipeline) {
+    try {
+      return delegate.run(pipeline);
+    } catch (Throwable t) {
+      // Special case hack to pull out assertion errors from PAssert; instead there should
+      // probably be a better story along the lines of UserCodeException.
+      UserCodeException innermostUserCodeException = null;
+      Throwable current = t;
+      for (; current.getCause() != null; current = current.getCause()) {
+        if (current instanceof UserCodeException) {
+          innermostUserCodeException = ((UserCodeException) current);
+        }
+      }
+      if (innermostUserCodeException != null) {
+        current = innermostUserCodeException.getCause();
+      }
+      if (current instanceof AssertionError) {
+        throw (AssertionError) current;
+      }
+      throw new PipelineExecutionException(current);
+    }
+  }
+
+  public PipelineOptions getPipelineOptions() {
+    return delegate.getPipelineOptions();
+  }
+}
+
+

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/TranslationMode.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/TranslationMode.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/TranslationMode.java
new file mode 100644
index 0000000..ad54750
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/TranslationMode.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+/**
+ * The translation mode of the Beam Pipeline.
+ */
+enum TranslationMode {
+
+  /** Uses the batch mode of Flink. */
+  BATCH,
+
+  /** Uses the streaming mode of Flink. */
+  STREAMING
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/package-info.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/package-info.java
new file mode 100644
index 0000000..57f1e59
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internal implementation of the Beam runner for Apache Flink.
+ */
+package org.apache.beam.runners.flink;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAggregatorFactory.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAggregatorFactory.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAggregatorFactory.java
new file mode 100644
index 0000000..fb2493b
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAggregatorFactory.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import org.apache.beam.runners.core.AggregatorFactory;
+import org.apache.beam.runners.core.ExecutionContext;
+import org.apache.beam.runners.flink.translation.wrappers.SerializableFnAggregatorWrapper;
+import org.apache.beam.sdk.transforms.Aggregator;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.flink.api.common.functions.RuntimeContext;
+
+/**
+ * A {@link AggregatorFactory} for the Flink Batch Runner.
+ */
+public class FlinkAggregatorFactory implements AggregatorFactory{
+
+  private final RuntimeContext runtimeContext;
+
+  public FlinkAggregatorFactory(RuntimeContext runtimeContext) {
+    this.runtimeContext = runtimeContext;
+  }
+
+  @Override
+  public <InputT, AccumT, OutputT> Aggregator<InputT, OutputT> createAggregatorForDoFn(
+      Class<?> fnClass, ExecutionContext.StepContext stepContext, String aggregatorName,
+      Combine.CombineFn<InputT, AccumT, OutputT> combine) {
+    @SuppressWarnings("unchecked")
+    SerializableFnAggregatorWrapper<InputT, OutputT> result =
+        (SerializableFnAggregatorWrapper<InputT, OutputT>)
+            runtimeContext.getAccumulator(aggregatorName);
+
+    if (result == null) {
+      result = new SerializableFnAggregatorWrapper<>(combine);
+      runtimeContext.addAccumulator(aggregatorName, result);
+    }
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java
new file mode 100644
index 0000000..447b1e5
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignContext.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.common.collect.Iterables;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.joda.time.Instant;
+
+/**
+ * {@link org.apache.beam.sdk.transforms.windowing.WindowFn.AssignContext} for
+ * Flink functions.
+ */
+class FlinkAssignContext<InputT, W extends BoundedWindow>
+    extends WindowFn<InputT, W>.AssignContext {
+  private final WindowedValue<InputT> value;
+
+  FlinkAssignContext(WindowFn<InputT, W> fn, WindowedValue<InputT> value) {
+    fn.super();
+    checkArgument(
+        Iterables.size(value.getWindows()) == 1,
+        String.format(
+            "%s passed to window assignment must be in a single window, but it was in %s: %s",
+            WindowedValue.class.getSimpleName(),
+            Iterables.size(value.getWindows()),
+            value.getWindows()));
+    this.value = value;
+  }
+
+  @Override
+  public InputT element() {
+    return value.getValue();
+  }
+
+  @Override
+  public Instant timestamp() {
+    return value.getTimestamp();
+  }
+
+  @Override
+  public BoundedWindow window() {
+    return Iterables.getOnlyElement(value.getWindows());
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java
new file mode 100644
index 0000000..c3a5095
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkAssignWindows.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import java.util.Collection;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.WindowFn;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.flink.api.common.functions.FlatMapFunction;
+import org.apache.flink.util.Collector;
+
+/**
+ * Flink {@link FlatMapFunction} for implementing
+ * {@link org.apache.beam.sdk.transforms.windowing.Window.Assign}.
+ */
+public class FlinkAssignWindows<T, W extends BoundedWindow>
+    implements FlatMapFunction<WindowedValue<T>, WindowedValue<T>> {
+
+  private final WindowFn<T, W> windowFn;
+
+  public FlinkAssignWindows(WindowFn<T, W> windowFn) {
+    this.windowFn = windowFn;
+  }
+
+  @Override
+  public void flatMap(
+      WindowedValue<T> input, Collector<WindowedValue<T>> collector) throws Exception {
+    Collection<W> windows = windowFn.assignWindows(new FlinkAssignContext<>(windowFn, input));
+    for (W window: windows) {
+      collector.collect(
+          WindowedValue.of(input.getValue(), input.getTimestamp(), window, input.getPane()));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
new file mode 100644
index 0000000..51582af
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/functions/FlinkDoFnFunction.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.functions;
+
+import java.util.Collections;
+import java.util.Map;
+import org.apache.beam.runners.core.DoFnRunner;
+import org.apache.beam.runners.core.DoFnRunners;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.join.RawUnionValue;
+import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
+import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.api.common.functions.RichMapPartitionFunction;
+import org.apache.flink.api.common.functions.RuntimeContext;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.util.Collector;
+
+/**
+ * Encapsulates a {@link DoFn}
+ * inside a Flink {@link org.apache.flink.api.common.functions.RichMapPartitionFunction}.
+ *
+ * <p>We get a mapping from {@link org.apache.beam.sdk.values.TupleTag} to output index
+ * and must tag all outputs with the output number. Afterwards a filter will filter out
+ * those elements that are not to be in a specific output.
+ */
+public class FlinkDoFnFunction<InputT, OutputT>
+    extends RichMapPartitionFunction<WindowedValue<InputT>, WindowedValue<OutputT>> {
+
+  private final SerializedPipelineOptions serializedOptions;
+
+  private final DoFn<InputT, OutputT> doFn;
+  private final Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs;
+
+  private final WindowingStrategy<?, ?> windowingStrategy;
+
+  private final Map<TupleTag<?>, Integer> outputMap;
+  private final TupleTag<OutputT> mainOutputTag;
+
+  private transient DoFnInvoker<InputT, OutputT> doFnInvoker;
+
+  public FlinkDoFnFunction(
+      DoFn<InputT, OutputT> doFn,
+      WindowingStrategy<?, ?> windowingStrategy,
+      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputs,
+      PipelineOptions options,
+      Map<TupleTag<?>, Integer> outputMap,
+      TupleTag<OutputT> mainOutputTag) {
+
+    this.doFn = doFn;
+    this.sideInputs = sideInputs;
+    this.serializedOptions = new SerializedPipelineOptions(options);
+    this.windowingStrategy = windowingStrategy;
+    this.outputMap = outputMap;
+    this.mainOutputTag = mainOutputTag;
+
+  }
+
+  @Override
+  public void mapPartition(
+      Iterable<WindowedValue<InputT>> values,
+      Collector<WindowedValue<OutputT>> out) throws Exception {
+
+    RuntimeContext runtimeContext = getRuntimeContext();
+
+    DoFnRunners.OutputManager outputManager;
+    if (outputMap == null) {
+      outputManager = new FlinkDoFnFunction.DoFnOutputManager(out);
+    } else {
+      // it has some additional outputs
+      outputManager =
+          new FlinkDoFnFunction.MultiDoFnOutputManager((Collector) out, outputMap);
+    }
+
+    DoFnRunner<InputT, OutputT> doFnRunner = DoFnRunners.simpleRunner(
+        serializedOptions.getPipelineOptions(), doFn,
+        new FlinkSideInputReader(sideInputs, runtimeContext),
+        outputManager,
+        mainOutputTag,
+        // see SimpleDoFnRunner, just use it to limit number of additional outputs
+        Collections.<TupleTag<?>>emptyList(),
+        new FlinkNoOpStepContext(),
+        new FlinkAggregatorFactory(runtimeContext),
+        windowingStrategy);
+
+    doFnRunner.startBundle();
+
+    for (WindowedValue<InputT> value : values) {
+      doFnRunner.processElement(value);
+    }
+
+    doFnRunner.finishBundle();
+  }
+
+  @Override
+  public void open(Configuration parameters) throws Exception {
+    doFnInvoker = DoFnInvokers.invokerFor(doFn);
+    doFnInvoker.invokeSetup();
+  }
+
+  @Override
+  public void close() throws Exception {
+    doFnInvoker.invokeTeardown();
+  }
+
+  static class DoFnOutputManager
+      implements DoFnRunners.OutputManager {
+
+    private Collector collector;
+
+    DoFnOutputManager(Collector collector) {
+      this.collector = collector;
+    }
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
+      collector.collect(output);
+    }
+  }
+
+  static class MultiDoFnOutputManager
+      implements DoFnRunners.OutputManager {
+
+    private Collector<WindowedValue<RawUnionValue>> collector;
+    private Map<TupleTag<?>, Integer> outputMap;
+
+    MultiDoFnOutputManager(Collector<WindowedValue<RawUnionValue>> collector,
+                      Map<TupleTag<?>, Integer> outputMap) {
+      this.collector = collector;
+      this.outputMap = outputMap;
+    }
+
+    @Override
+    public <T> void output(TupleTag<T> tag, WindowedValue<T> output) {
+      collector.collect(WindowedValue.of(new RawUnionValue(outputMap.get(tag), output.getValue()),
+          output.getTimestamp(), output.getWindows(), output.getPane()));
+    }
+  }
+
+}

[44/50] [abbrv] beam git commit: Ensure all Read outputs are consumed in Dataflow

Posted by dh...@apache.org.

Ensure all Read outputs are consumed in Dataflow

Apply a no-op ParDo to any PTransform that is not consumed.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/418c304d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/418c304d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/418c304d

Branch: refs/heads/DSL_SQL
Commit: 418c304dbff1ce8c176d08c890780ec97245aaae
Parents: 714fdd2
Author: Thomas Groh <tg...@google.com>
Authored: Tue Apr 18 17:25:59 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Wed Apr 19 10:53:30 2017 -0700

----------------------------------------------------------------------
 .../core/construction/UnconsumedReads.java      |  72 +++++++++++++
 .../core/construction/UnconsumedReadsTest.java  | 105 +++++++++++++++++++
 .../beam/runners/dataflow/DataflowRunner.java   |   4 +
 .../runners/dataflow/DataflowRunnerTest.java    |  24 +++++
 4 files changed, 205 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/418c304d/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/UnconsumedReads.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/UnconsumedReads.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/UnconsumedReads.java
new file mode 100644
index 0000000..c191eeb
--- /dev/null
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/UnconsumedReads.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.core.construction;
+
+import java.util.HashSet;
+import java.util.Set;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.Pipeline.PipelineVisitor;
+import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.runners.TransformHierarchy.Node;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PValue;
+
+/**
+ * Utilities for ensuring that all {@link Read} {@link PTransform PTransforms} are consumed by some
+ * {@link PTransform}.
+ */
+public class UnconsumedReads {
+  public static void ensureAllReadsConsumed(Pipeline pipeline) {
+    final Set<PCollection<?>> unconsumed = new HashSet<>();
+    pipeline.traverseTopologically(
+        new PipelineVisitor.Defaults() {
+          @Override
+          public void visitPrimitiveTransform(Node node) {
+            unconsumed.removeAll(node.getInputs().values());
+          }
+
+          @Override
+          public void visitValue(PValue value, Node producer) {
+            if (producer.getTransform() instanceof Read.Bounded
+                || producer.getTransform() instanceof Read.Unbounded) {
+              unconsumed.add((PCollection<?>) value);
+            }
+          }
+        });
+    int i = 0;
+    for (PCollection<?> unconsumedPCollection : unconsumed) {
+      consume(unconsumedPCollection, i);
+      i++;
+    }
+  }
+
+  private static <T> void consume(PCollection<T> unconsumedPCollection, int uniq) {
+    // Multiple applications should never break due to stable unique names.
+    String uniqueName = "DropInputs" + (uniq == 0 ? "" : uniq);
+    unconsumedPCollection.apply(uniqueName, ParDo.of(new NoOpDoFn<T>()));
+  }
+
+  private static class NoOpDoFn<T> extends DoFn<T, T> {
+    @ProcessElement
+    public void doNothing(ProcessContext context) {}
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/418c304d/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/UnconsumedReadsTest.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/UnconsumedReadsTest.java b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/UnconsumedReadsTest.java
new file mode 100644
index 0000000..1966a93
--- /dev/null
+++ b/runners/core-construction-java/src/test/java/org/apache/beam/runners/core/construction/UnconsumedReadsTest.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.core.construction;
+
+import static org.junit.Assert.assertThat;
+
+import java.util.HashSet;
+import java.util.Set;
+import org.apache.beam.sdk.Pipeline.PipelineVisitor;
+import org.apache.beam.sdk.io.CountingSource;
+import org.apache.beam.sdk.io.Read;
+import org.apache.beam.sdk.io.Read.Bounded;
+import org.apache.beam.sdk.io.Read.Unbounded;
+import org.apache.beam.sdk.runners.TransformHierarchy.Node;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Flatten;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionList;
+import org.apache.beam.sdk.values.PValue;
+import org.hamcrest.Matchers;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/**
+ * Tests for {@link UnconsumedReads}.
+ */
+@RunWith(JUnit4.class)
+public class UnconsumedReadsTest {
+  @Rule public TestPipeline pipeline = TestPipeline.create().enableAbandonedNodeEnforcement(false);
+
+  @Test
+  public void matcherProducesUnconsumedValueBoundedRead() {
+    Bounded<Long> transform = Read.from(CountingSource.upTo(20L));
+    PCollection<Long> output = pipeline.apply(transform);
+    UnconsumedReads.ensureAllReadsConsumed(pipeline);
+    validateConsumed();
+  }
+
+  @Test
+  public void matcherProducesUnconsumedValueUnboundedRead() {
+    Unbounded<Long> transform = Read.from(CountingSource.unbounded());
+    PCollection<Long> output = pipeline.apply(transform);
+    UnconsumedReads.ensureAllReadsConsumed(pipeline);
+    validateConsumed();
+  }
+
+  @Test
+  public void doesNotConsumeAlreadyConsumedRead() {
+    Unbounded<Long> transform = Read.from(CountingSource.unbounded());
+    final PCollection<Long> output = pipeline.apply(transform);
+    final Flatten.PCollections<Long> consumer = Flatten.<Long>pCollections();
+    PCollectionList.of(output).apply(consumer);
+    UnconsumedReads.ensureAllReadsConsumed(pipeline);
+    pipeline.traverseTopologically(
+        new PipelineVisitor.Defaults() {
+          @Override
+          public void visitPrimitiveTransform(Node node) {
+            // The output should only be consumed by a single consumer
+            if (node.getInputs().values().contains(output)) {
+              assertThat(node.getTransform(), Matchers.<PTransform<?, ?>>is(consumer));
+            }
+          }
+        });
+  }
+
+  private void validateConsumed() {
+    final Set<PValue> consumedOutputs = new HashSet<PValue>();
+    final Set<PValue> allReadOutputs = new HashSet<PValue>();
+    pipeline.traverseTopologically(
+        new PipelineVisitor.Defaults() {
+          @Override
+          public void visitPrimitiveTransform(Node node) {
+            consumedOutputs.addAll(node.getInputs().values());
+          }
+
+          @Override
+          public void visitValue(PValue value, Node producer) {
+            if (producer.getTransform() instanceof Read.Bounded
+                || producer.getTransform() instanceof Read.Unbounded) {
+              allReadOutputs.add(value);
+            }
+          }
+        });
+    assertThat(consumedOutputs, Matchers.hasItems(allReadOutputs.toArray(new PValue[0])));
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/418c304d/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
index 4eec6b8..2912fa7 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
@@ -65,6 +65,7 @@ import org.apache.beam.runners.core.construction.PTransformReplacements;
 import org.apache.beam.runners.core.construction.ReplacementOutputs;
 import org.apache.beam.runners.core.construction.SingleInputOutputOverrideFactory;
 import org.apache.beam.runners.core.construction.UnboundedReadFromBoundedSource;
+import org.apache.beam.runners.core.construction.UnconsumedReads;
 import org.apache.beam.runners.dataflow.BatchViewOverrides.BatchCombineGloballyAsSingletonViewFactory;
 import org.apache.beam.runners.dataflow.DataflowPipelineTranslator.JobSpecification;
 import org.apache.beam.runners.dataflow.StreamingViewOverrides.StreamingCreatePCollectionViewFactory;
@@ -690,6 +691,9 @@ public class DataflowRunner extends PipelineRunner<DataflowPipelineJob> {
   @VisibleForTesting
   void replaceTransforms(Pipeline pipeline) {
     boolean streaming = options.isStreaming() || containsUnboundedPCollection(pipeline);
+    // Ensure all outputs of all reads are consumed before potentially replacing any
+    // Read PTransforms
+    UnconsumedReads.ensureAllReadsConsumed(pipeline);
     pipeline.replaceAll(getOverrides(streaming));
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/418c304d/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
index 79a96e7..36704bc 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowRunnerTest.java
@@ -23,6 +23,7 @@ import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.hasItem;
 import static org.hamcrest.Matchers.instanceOf;
+import static org.hamcrest.Matchers.is;
 import static org.hamcrest.Matchers.startsWith;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
@@ -57,6 +58,7 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.regex.Pattern;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineDebugOptions;
 import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
@@ -65,11 +67,13 @@ import org.apache.beam.sdk.Pipeline.PipelineVisitor;
 import org.apache.beam.sdk.coders.BigEndianIntegerCoder;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.io.TextIO.Read;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptions.CheckEnabled;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.runners.TransformHierarchy;
+import org.apache.beam.sdk.runners.TransformHierarchy.Node;
 import org.apache.beam.sdk.testing.ExpectedLogs;
 import org.apache.beam.sdk.testing.TestPipeline;
 import org.apache.beam.sdk.transforms.Create;
@@ -331,6 +335,26 @@ public class DataflowRunnerTest {
         .apply(TextIO.Write.to(options.getOutput()).withoutValidation());
   }
 
+  /**
+   * Tests that all reads are consumed by at least one {@link PTransform}.
+   */
+  @Test
+  public void testUnconsumedReads() throws IOException {
+    DataflowPipelineOptions dataflowOptions = buildPipelineOptions();
+    RuntimeTestOptions options = dataflowOptions.as(RuntimeTestOptions.class);
+    Pipeline p = buildDataflowPipeline(dataflowOptions);
+    PCollection<String> unconsumed = p.apply(Read.from(options.getInput()).withoutValidation());
+    DataflowRunner.fromOptions(dataflowOptions).replaceTransforms(p);
+    final AtomicBoolean unconsumedSeenAsInput = new AtomicBoolean();
+    p.traverseTopologically(new PipelineVisitor.Defaults() {
+      @Override
+      public void visitPrimitiveTransform(Node node) {
+        unconsumedSeenAsInput.set(true);
+      }
+    });
+    assertThat(unconsumedSeenAsInput.get(), is(true));
+  }
+
   @Test
   public void testRunReturnDifferentRequestId() throws IOException {
     DataflowPipelineOptions options = buildPipelineOptions();

[49/50] [abbrv] beam git commit: This closes #2592

Posted by dh...@apache.org.

This closes #2592


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/19ae8776
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/19ae8776
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/19ae8776

Branch: refs/heads/DSL_SQL
Commit: 19ae8776261a5a78044091d9172223244a2b8042
Parents: 391fb77 546aa61
Author: Dan Halperin <dh...@google.com>
Authored: Wed Apr 19 12:07:37 2017 -0700
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Apr 19 12:07:37 2017 -0700

----------------------------------------------------------------------
 runners/apex/pom.xml                       |  1 +
 runners/direct-java/pom.xml                |  1 +
 runners/flink/pom.xml                      |  2 ++
 runners/google-cloud-dataflow-java/pom.xml | 43 +++++++++++++++++++++++++
 runners/pom.xml                            | 40 -----------------------
 runners/spark/pom.xml                      |  1 +
 6 files changed, 48 insertions(+), 40 deletions(-)
----------------------------------------------------------------------

[21/50] [abbrv] beam git commit: [BEAM-1914] This closes #2558

Posted by dh...@apache.org.

[BEAM-1914] This closes #2558


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/470808c0
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/470808c0
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/470808c0

Branch: refs/heads/DSL_SQL
Commit: 470808c06fc10ad545712d6b1831530e3d5313ad
Parents: 57929fb d0c0a60
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Wed Apr 19 10:58:42 2017 +0200
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Wed Apr 19 10:58:42 2017 +0200

----------------------------------------------------------------------
 .../apache/beam/sdk/io/CompressedSource.java    |   4 +-
 .../main/java/org/apache/beam/sdk/io/XmlIO.java | 477 +++++++++++++++++++
 .../java/org/apache/beam/sdk/io/XmlSink.java    | 226 ++-------
 .../java/org/apache/beam/sdk/io/XmlSource.java  | 191 +-------
 .../sdk/transforms/display/DisplayData.java     |   6 +
 .../org/apache/beam/sdk/io/XmlSinkTest.java     |  89 ++--
 .../org/apache/beam/sdk/io/XmlSourceTest.java   | 248 ++++++----
 .../sdk/transforms/display/DisplayDataTest.java |  17 +
 8 files changed, 740 insertions(+), 518 deletions(-)
----------------------------------------------------------------------

[27/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/BoundedSourceWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/BoundedSourceWrapper.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/BoundedSourceWrapper.java
new file mode 100644
index 0000000..2ed5024
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/BoundedSourceWrapper.java
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming.io;
+
+import com.google.common.annotations.VisibleForTesting;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.flink.api.common.functions.StoppableFunction;
+import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
+import org.apache.flink.streaming.api.watermark.Watermark;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Wrapper for executing {@link BoundedSource BoundedSources} as a Flink Source.
+ */
+public class BoundedSourceWrapper<OutputT>
+    extends RichParallelSourceFunction<WindowedValue<OutputT>>
+    implements StoppableFunction {
+
+  private static final Logger LOG = LoggerFactory.getLogger(BoundedSourceWrapper.class);
+
+  /**
+   * Keep the options so that we can initialize the readers.
+   */
+  private final SerializedPipelineOptions serializedOptions;
+
+  /**
+   * The split sources. We split them in the constructor to ensure that all parallel
+   * sources are consistent about the split sources.
+   */
+  private List<? extends BoundedSource<OutputT>> splitSources;
+
+  /**
+   * Make it a field so that we can access it in {@link #close()}.
+   */
+  private transient List<BoundedSource.BoundedReader<OutputT>> readers;
+
+  /**
+   * Initialize here and not in run() to prevent races where we cancel a job before run() is
+   * ever called or run() is called after cancel().
+   */
+  private volatile boolean isRunning = true;
+
+  @SuppressWarnings("unchecked")
+  public BoundedSourceWrapper(
+      PipelineOptions pipelineOptions,
+      BoundedSource<OutputT> source,
+      int parallelism) throws Exception {
+    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+
+    long desiredBundleSize = source.getEstimatedSizeBytes(pipelineOptions) / parallelism;
+
+    // get the splits early. we assume that the generated splits are stable,
+    // this is necessary so that the mapping of state to source is correct
+    // when restoring
+    splitSources = source.split(desiredBundleSize, pipelineOptions);
+  }
+
+  @Override
+  public void run(SourceContext<WindowedValue<OutputT>> ctx) throws Exception {
+
+    // figure out which split sources we're responsible for
+    int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
+    int numSubtasks = getRuntimeContext().getNumberOfParallelSubtasks();
+
+    List<BoundedSource<OutputT>> localSources = new ArrayList<>();
+
+    for (int i = 0; i < splitSources.size(); i++) {
+      if (i % numSubtasks == subtaskIndex) {
+        localSources.add(splitSources.get(i));
+      }
+    }
+
+    LOG.info("Bounded Flink Source {}/{} is reading from sources: {}",
+        subtaskIndex,
+        numSubtasks,
+        localSources);
+
+    readers = new ArrayList<>();
+    // initialize readers from scratch
+    for (BoundedSource<OutputT> source : localSources) {
+      readers.add(source.createReader(serializedOptions.getPipelineOptions()));
+    }
+
+   if (readers.size() == 1) {
+      // the easy case, we just read from one reader
+      BoundedSource.BoundedReader<OutputT> reader = readers.get(0);
+
+      boolean dataAvailable = reader.start();
+      if (dataAvailable) {
+        emitElement(ctx, reader);
+      }
+
+      while (isRunning) {
+        dataAvailable = reader.advance();
+
+        if (dataAvailable)  {
+          emitElement(ctx, reader);
+        } else {
+          break;
+        }
+      }
+    } else {
+      // a bit more complicated, we are responsible for several readers
+      // loop through them and sleep if none of them had any data
+
+      int currentReader = 0;
+
+      // start each reader and emit data if immediately available
+      for (BoundedSource.BoundedReader<OutputT> reader : readers) {
+        boolean dataAvailable = reader.start();
+        if (dataAvailable) {
+          emitElement(ctx, reader);
+        }
+      }
+
+      // a flag telling us whether any of the readers had data
+      // if no reader had data, sleep for bit
+      boolean hadData = false;
+      while (isRunning && !readers.isEmpty()) {
+        BoundedSource.BoundedReader<OutputT> reader = readers.get(currentReader);
+        boolean dataAvailable = reader.advance();
+
+        if (dataAvailable) {
+          emitElement(ctx, reader);
+          hadData = true;
+        } else {
+          readers.remove(currentReader);
+          currentReader--;
+          if (readers.isEmpty()) {
+            break;
+          }
+        }
+
+        currentReader = (currentReader + 1) % readers.size();
+        if (currentReader == 0 && !hadData) {
+          Thread.sleep(50);
+        } else if (currentReader == 0) {
+          hadData = false;
+        }
+      }
+
+    }
+
+    // emit final Long.MAX_VALUE watermark, just to be sure
+    ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
+  }
+
+  /**
+   * Emit the current element from the given Reader. The reader is guaranteed to have data.
+   */
+  private void emitElement(
+      SourceContext<WindowedValue<OutputT>> ctx,
+      BoundedSource.BoundedReader<OutputT> reader) {
+    // make sure that reader state update and element emission are atomic
+    // with respect to snapshots
+    synchronized (ctx.getCheckpointLock()) {
+
+      OutputT item = reader.getCurrent();
+      Instant timestamp = reader.getCurrentTimestamp();
+
+      WindowedValue<OutputT> windowedValue =
+          WindowedValue.of(item, timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
+      ctx.collectWithTimestamp(windowedValue, timestamp.getMillis());
+    }
+  }
+
+  @Override
+  public void close() throws Exception {
+    super.close();
+    if (readers != null) {
+      for (BoundedSource.BoundedReader<OutputT> reader: readers) {
+        reader.close();
+      }
+    }
+  }
+
+  @Override
+  public void cancel() {
+    isRunning = false;
+  }
+
+  @Override
+  public void stop() {
+    this.isRunning = false;
+  }
+
+  /**
+   * Visible so that we can check this in tests. Must not be used for anything else.
+   */
+  @VisibleForTesting
+  public List<? extends BoundedSource<OutputT>> getSplitSources() {
+    return splitSources;
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSocketSource.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSocketSource.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSocketSource.java
new file mode 100644
index 0000000..910a33f
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSocketSource.java
@@ -0,0 +1,249 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming.io;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.InetSocketAddress;
+import java.net.Socket;
+import java.util.Collections;
+import java.util.List;
+import java.util.NoSuchElementException;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.io.UnboundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An example unbounded Beam source that reads input from a socket.
+ * This is used mainly for testing and debugging.
+ * */
+public class UnboundedSocketSource<CheckpointMarkT extends UnboundedSource.CheckpointMark>
+    extends UnboundedSource<String, CheckpointMarkT> {
+
+  private static final Coder<String> DEFAULT_SOCKET_CODER = StringUtf8Coder.of();
+
+  private static final long serialVersionUID = 1L;
+
+  private static final int DEFAULT_CONNECTION_RETRY_SLEEP = 500;
+
+  private static final int CONNECTION_TIMEOUT_TIME = 0;
+
+  private final String hostname;
+  private final int port;
+  private final char delimiter;
+  private final long maxNumRetries;
+  private final long delayBetweenRetries;
+
+  public UnboundedSocketSource(String hostname, int port, char delimiter, long maxNumRetries) {
+    this(hostname, port, delimiter, maxNumRetries, DEFAULT_CONNECTION_RETRY_SLEEP);
+  }
+
+  public UnboundedSocketSource(String hostname,
+                               int port,
+                               char delimiter,
+                               long maxNumRetries,
+                               long delayBetweenRetries) {
+    this.hostname = hostname;
+    this.port = port;
+    this.delimiter = delimiter;
+    this.maxNumRetries = maxNumRetries;
+    this.delayBetweenRetries = delayBetweenRetries;
+  }
+
+  public String getHostname() {
+    return this.hostname;
+  }
+
+  public int getPort() {
+    return this.port;
+  }
+
+  public char getDelimiter() {
+    return this.delimiter;
+  }
+
+  public long getMaxNumRetries() {
+    return this.maxNumRetries;
+  }
+
+  public long getDelayBetweenRetries() {
+    return this.delayBetweenRetries;
+  }
+
+  @Override
+  public List<? extends UnboundedSource<String, CheckpointMarkT>> split(
+      int desiredNumSplits,
+      PipelineOptions options) throws Exception {
+    return Collections.<UnboundedSource<String, CheckpointMarkT>>singletonList(this);
+  }
+
+  @Override
+  public UnboundedReader<String> createReader(PipelineOptions options,
+                                              @Nullable CheckpointMarkT checkpointMark) {
+    return new UnboundedSocketReader(this);
+  }
+
+  @Nullable
+  @Override
+  public Coder getCheckpointMarkCoder() {
+    // Flink and Dataflow have different checkpointing mechanisms.
+    // In our case we do not need a coder.
+    return null;
+  }
+
+  @Override
+  public void validate() {
+    checkArgument(port > 0 && port < 65536, "port is out of range");
+    checkArgument(maxNumRetries >= -1, "maxNumRetries must be zero or larger (num retries), "
+        + "or -1 (infinite retries)");
+    checkArgument(delayBetweenRetries >= 0, "delayBetweenRetries must be zero or positive");
+  }
+
+  @Override
+  public Coder getDefaultOutputCoder() {
+    return DEFAULT_SOCKET_CODER;
+  }
+
+  /**
+   * Unbounded socket reader.
+   */
+  public static class UnboundedSocketReader extends UnboundedSource.UnboundedReader<String> {
+
+    private static final Logger LOG = LoggerFactory.getLogger(UnboundedSocketReader.class);
+
+    private final UnboundedSocketSource source;
+
+    private Socket socket;
+    private BufferedReader reader;
+
+    private boolean isRunning;
+
+    private String currentRecord;
+
+    public UnboundedSocketReader(UnboundedSocketSource source) {
+      this.source = source;
+    }
+
+    private void openConnection() throws IOException {
+      this.socket = new Socket();
+      this.socket.connect(new InetSocketAddress(this.source.getHostname(), this.source.getPort()),
+          CONNECTION_TIMEOUT_TIME);
+      this.reader = new BufferedReader(new InputStreamReader(this.socket.getInputStream()));
+      this.isRunning = true;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      int attempt = 0;
+      while (!isRunning) {
+        try {
+          openConnection();
+          LOG.info("Connected to server socket " + this.source.getHostname() + ':'
+              + this.source.getPort());
+
+          return advance();
+        } catch (IOException e) {
+          LOG.info("Lost connection to server socket " + this.source.getHostname() + ':'
+              + this.source.getPort() + ". Retrying in "
+              + this.source.getDelayBetweenRetries() + " msecs...");
+
+          if (this.source.getMaxNumRetries() == -1 || attempt++ < this.source.getMaxNumRetries()) {
+            try {
+              Thread.sleep(this.source.getDelayBetweenRetries());
+            } catch (InterruptedException e1) {
+              e1.printStackTrace();
+            }
+          } else {
+            this.isRunning = false;
+            break;
+          }
+        }
+      }
+      LOG.error("Unable to connect to host " + this.source.getHostname()
+          + " : " + this.source.getPort());
+      return false;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      final StringBuilder buffer = new StringBuilder();
+      int data;
+      while (isRunning && (data = reader.read()) != -1) {
+        // check if the string is complete
+        if (data != this.source.getDelimiter()) {
+          buffer.append((char) data);
+        } else {
+          if (buffer.length() > 0 && buffer.charAt(buffer.length() - 1) == '\r') {
+            buffer.setLength(buffer.length() - 1);
+          }
+          this.currentRecord = buffer.toString();
+          buffer.setLength(0);
+          return true;
+        }
+      }
+      return false;
+    }
+
+    @Override
+    public byte[] getCurrentRecordId() throws NoSuchElementException {
+      return new byte[0];
+    }
+
+    @Override
+    public String getCurrent() throws NoSuchElementException {
+      return this.currentRecord;
+    }
+
+    @Override
+    public Instant getCurrentTimestamp() throws NoSuchElementException {
+      return Instant.now();
+    }
+
+    @Override
+    public void close() throws IOException {
+      this.reader.close();
+      this.socket.close();
+      this.isRunning = false;
+      LOG.info("Closed connection to server socket at " + this.source.getHostname() + ":"
+          + this.source.getPort() + ".");
+    }
+
+    @Override
+    public Instant getWatermark() {
+      return Instant.now();
+    }
+
+    @Override
+    public CheckpointMark getCheckpointMark() {
+      return null;
+    }
+
+    @Override
+    public UnboundedSource<String, ?> getCurrentSource() {
+      return this.source;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java
new file mode 100644
index 0000000..bb9b58a
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/UnboundedSourceWrapper.java
@@ -0,0 +1,476 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming.io;
+
+import com.google.common.annotations.VisibleForTesting;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.SerializableCoder;
+import org.apache.beam.sdk.io.UnboundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.TypeDescriptor;
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.functions.StoppableFunction;
+import org.apache.flink.api.common.state.ListState;
+import org.apache.flink.api.common.state.ListStateDescriptor;
+import org.apache.flink.api.common.state.OperatorStateStore;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.state.CheckpointListener;
+import org.apache.flink.runtime.state.DefaultOperatorStateBackend;
+import org.apache.flink.runtime.state.FunctionInitializationContext;
+import org.apache.flink.runtime.state.FunctionSnapshotContext;
+import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
+import org.apache.flink.streaming.api.functions.source.RichParallelSourceFunction;
+import org.apache.flink.streaming.api.operators.StreamingRuntimeContext;
+import org.apache.flink.streaming.api.watermark.Watermark;
+import org.apache.flink.streaming.runtime.tasks.ProcessingTimeCallback;
+import org.joda.time.Instant;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Wrapper for executing {@link UnboundedSource UnboundedSources} as a Flink Source.
+ */
+public class UnboundedSourceWrapper<
+    OutputT, CheckpointMarkT extends UnboundedSource.CheckpointMark>
+    extends RichParallelSourceFunction<WindowedValue<OutputT>>
+    implements ProcessingTimeCallback, StoppableFunction,
+    CheckpointListener, CheckpointedFunction {
+
+  private static final Logger LOG = LoggerFactory.getLogger(UnboundedSourceWrapper.class);
+
+  /**
+   * Keep the options so that we can initialize the localReaders.
+   */
+  private final SerializedPipelineOptions serializedOptions;
+
+  /**
+   * For snapshot and restore.
+   */
+  private final KvCoder<
+      ? extends UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> checkpointCoder;
+
+  /**
+   * The split sources. We split them in the constructor to ensure that all parallel
+   * sources are consistent about the split sources.
+   */
+  private final List<? extends UnboundedSource<OutputT, CheckpointMarkT>> splitSources;
+
+  /**
+   * The local split sources. Assigned at runtime when the wrapper is executed in parallel.
+   */
+  private transient List<UnboundedSource<OutputT, CheckpointMarkT>> localSplitSources;
+
+  /**
+   * The local split readers. Assigned at runtime when the wrapper is executed in parallel.
+   * Make it a field so that we can access it in {@link #onProcessingTime(long)} for
+   * emitting watermarks.
+   */
+  private transient List<UnboundedSource.UnboundedReader<OutputT>> localReaders;
+
+  /**
+   * Flag to indicate whether the source is running.
+   * Initialize here and not in run() to prevent races where we cancel a job before run() is
+   * ever called or run() is called after cancel().
+   */
+  private volatile boolean isRunning = true;
+
+  /**
+   * Make it a field so that we can access it in {@link #onProcessingTime(long)} for registering new
+   * triggers.
+   */
+  private transient StreamingRuntimeContext runtimeContext;
+
+  /**
+   * Make it a field so that we can access it in {@link #onProcessingTime(long)} for emitting
+   * watermarks.
+   */
+  private transient SourceContext<WindowedValue<OutputT>> context;
+
+  /**
+   * Pending checkpoints which have not been acknowledged yet.
+   */
+  private transient LinkedHashMap<Long, List<CheckpointMarkT>> pendingCheckpoints;
+  /**
+   * Keep a maximum of 32 checkpoints for {@code CheckpointMark.finalizeCheckpoint()}.
+   */
+  private static final int MAX_NUMBER_PENDING_CHECKPOINTS = 32;
+
+  private transient ListState<KV<? extends
+      UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT>> stateForCheckpoint;
+
+  /**
+   * false if checkpointCoder is null or no restore state by starting first.
+   */
+  private transient boolean isRestored = false;
+
+  @SuppressWarnings("unchecked")
+  public UnboundedSourceWrapper(
+      PipelineOptions pipelineOptions,
+      UnboundedSource<OutputT, CheckpointMarkT> source,
+      int parallelism) throws Exception {
+    this.serializedOptions = new SerializedPipelineOptions(pipelineOptions);
+
+    if (source.requiresDeduping()) {
+      LOG.warn("Source {} requires deduping but Flink runner doesn't support this yet.", source);
+    }
+
+    Coder<CheckpointMarkT> checkpointMarkCoder = source.getCheckpointMarkCoder();
+    if (checkpointMarkCoder == null) {
+      LOG.info("No CheckpointMarkCoder specified for this source. Won't create snapshots.");
+      checkpointCoder = null;
+    } else {
+
+      Coder<? extends UnboundedSource<OutputT, CheckpointMarkT>> sourceCoder =
+          (Coder) SerializableCoder.of(new TypeDescriptor<UnboundedSource>() {
+          });
+
+      checkpointCoder = KvCoder.of(sourceCoder, checkpointMarkCoder);
+    }
+
+    // get the splits early. we assume that the generated splits are stable,
+    // this is necessary so that the mapping of state to source is correct
+    // when restoring
+    splitSources = source.split(parallelism, pipelineOptions);
+  }
+
+
+  /**
+   * Initialize and restore state before starting execution of the source.
+   */
+  @Override
+  public void open(Configuration parameters) throws Exception {
+    runtimeContext = (StreamingRuntimeContext) getRuntimeContext();
+
+    // figure out which split sources we're responsible for
+    int subtaskIndex = runtimeContext.getIndexOfThisSubtask();
+    int numSubtasks = runtimeContext.getNumberOfParallelSubtasks();
+
+    localSplitSources = new ArrayList<>();
+    localReaders = new ArrayList<>();
+
+    pendingCheckpoints = new LinkedHashMap<>();
+
+    if (isRestored) {
+      // restore the splitSources from the checkpoint to ensure consistent ordering
+      for (KV<? extends UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> restored:
+          stateForCheckpoint.get()) {
+        localSplitSources.add(restored.getKey());
+        localReaders.add(restored.getKey().createReader(
+            serializedOptions.getPipelineOptions(), restored.getValue()));
+      }
+    } else {
+      // initialize localReaders and localSources from scratch
+      for (int i = 0; i < splitSources.size(); i++) {
+        if (i % numSubtasks == subtaskIndex) {
+          UnboundedSource<OutputT, CheckpointMarkT> source =
+              splitSources.get(i);
+          UnboundedSource.UnboundedReader<OutputT> reader =
+              source.createReader(serializedOptions.getPipelineOptions(), null);
+          localSplitSources.add(source);
+          localReaders.add(reader);
+        }
+      }
+    }
+
+    LOG.info("Unbounded Flink Source {}/{} is reading from sources: {}",
+        subtaskIndex,
+        numSubtasks,
+        localSplitSources);
+  }
+
+  @Override
+  public void run(SourceContext<WindowedValue<OutputT>> ctx) throws Exception {
+
+    context = ctx;
+
+    if (localReaders.size() == 0) {
+      // do nothing, but still look busy ...
+      // also, output a Long.MAX_VALUE watermark since we know that we're not
+      // going to emit anything
+      // we can't return here since Flink requires that all operators stay up,
+      // otherwise checkpointing would not work correctly anymore
+      ctx.emitWatermark(new Watermark(Long.MAX_VALUE));
+
+      // wait until this is canceled
+      final Object waitLock = new Object();
+      while (isRunning) {
+        try {
+          // Flink will interrupt us at some point
+          //noinspection SynchronizationOnLocalVariableOrMethodParameter
+          synchronized (waitLock) {
+            // don't wait indefinitely, in case something goes horribly wrong
+            waitLock.wait(1000);
+          }
+        } catch (InterruptedException e) {
+          if (!isRunning) {
+            // restore the interrupted state, and fall through the loop
+            Thread.currentThread().interrupt();
+          }
+        }
+      }
+    } else if (localReaders.size() == 1) {
+      // the easy case, we just read from one reader
+      UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(0);
+
+      boolean dataAvailable = reader.start();
+      if (dataAvailable) {
+        emitElement(ctx, reader);
+      }
+
+      setNextWatermarkTimer(this.runtimeContext);
+
+      while (isRunning) {
+        dataAvailable = reader.advance();
+
+        if (dataAvailable)  {
+          emitElement(ctx, reader);
+        } else {
+          Thread.sleep(50);
+        }
+      }
+    } else {
+      // a bit more complicated, we are responsible for several localReaders
+      // loop through them and sleep if none of them had any data
+
+      int numReaders = localReaders.size();
+      int currentReader = 0;
+
+      // start each reader and emit data if immediately available
+      for (UnboundedSource.UnboundedReader<OutputT> reader : localReaders) {
+        boolean dataAvailable = reader.start();
+        if (dataAvailable) {
+          emitElement(ctx, reader);
+        }
+      }
+
+      // a flag telling us whether any of the localReaders had data
+      // if no reader had data, sleep for bit
+      boolean hadData = false;
+      while (isRunning) {
+        UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(currentReader);
+        boolean dataAvailable = reader.advance();
+
+        if (dataAvailable) {
+          emitElement(ctx, reader);
+          hadData = true;
+        }
+
+        currentReader = (currentReader + 1) % numReaders;
+        if (currentReader == 0 && !hadData) {
+          Thread.sleep(50);
+        } else if (currentReader == 0) {
+          hadData = false;
+        }
+      }
+
+    }
+  }
+
+  /**
+   * Emit the current element from the given Reader. The reader is guaranteed to have data.
+   */
+  private void emitElement(
+      SourceContext<WindowedValue<OutputT>> ctx,
+      UnboundedSource.UnboundedReader<OutputT> reader) {
+    // make sure that reader state update and element emission are atomic
+    // with respect to snapshots
+    synchronized (ctx.getCheckpointLock()) {
+
+      OutputT item = reader.getCurrent();
+      Instant timestamp = reader.getCurrentTimestamp();
+
+      WindowedValue<OutputT> windowedValue =
+          WindowedValue.of(item, timestamp, GlobalWindow.INSTANCE, PaneInfo.NO_FIRING);
+      ctx.collectWithTimestamp(windowedValue, timestamp.getMillis());
+    }
+  }
+
+  @Override
+  public void close() throws Exception {
+    super.close();
+    if (localReaders != null) {
+      for (UnboundedSource.UnboundedReader<OutputT> reader: localReaders) {
+        reader.close();
+      }
+    }
+  }
+
+  @Override
+  public void cancel() {
+    isRunning = false;
+  }
+
+  @Override
+  public void stop() {
+    isRunning = false;
+  }
+
+  // ------------------------------------------------------------------------
+  //  Checkpoint and restore
+  // ------------------------------------------------------------------------
+
+  @Override
+  public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
+    if (!isRunning) {
+      LOG.debug("snapshotState() called on closed source");
+    } else {
+
+      if (checkpointCoder == null) {
+        // no checkpoint coder available in this source
+        return;
+      }
+
+      stateForCheckpoint.clear();
+
+      long checkpointId = functionSnapshotContext.getCheckpointId();
+
+      // we checkpoint the sources along with the CheckpointMarkT to ensure
+      // than we have a correct mapping of checkpoints to sources when
+      // restoring
+      List<CheckpointMarkT> checkpointMarks = new ArrayList<>(localSplitSources.size());
+
+      for (int i = 0; i < localSplitSources.size(); i++) {
+        UnboundedSource<OutputT, CheckpointMarkT> source = localSplitSources.get(i);
+        UnboundedSource.UnboundedReader<OutputT> reader = localReaders.get(i);
+
+        @SuppressWarnings("unchecked")
+        CheckpointMarkT mark = (CheckpointMarkT) reader.getCheckpointMark();
+        checkpointMarks.add(mark);
+        KV<UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT> kv =
+            KV.of(source, mark);
+        stateForCheckpoint.add(kv);
+      }
+
+      // cleanup old pending checkpoints and add new checkpoint
+      int diff = pendingCheckpoints.size() - MAX_NUMBER_PENDING_CHECKPOINTS;
+      if (diff >= 0) {
+        for (Iterator<Long> iterator = pendingCheckpoints.keySet().iterator();
+             diff >= 0;
+             diff--) {
+          iterator.next();
+          iterator.remove();
+        }
+      }
+      pendingCheckpoints.put(checkpointId, checkpointMarks);
+
+    }
+  }
+
+  @Override
+  public void initializeState(FunctionInitializationContext context) throws Exception {
+    if (checkpointCoder == null) {
+      // no checkpoint coder available in this source
+      return;
+    }
+
+    OperatorStateStore stateStore = context.getOperatorStateStore();
+    CoderTypeInformation<
+        KV<? extends UnboundedSource<OutputT, CheckpointMarkT>, CheckpointMarkT>>
+        typeInformation = (CoderTypeInformation) new CoderTypeInformation<>(checkpointCoder);
+    stateForCheckpoint = stateStore.getOperatorState(
+        new ListStateDescriptor<>(DefaultOperatorStateBackend.DEFAULT_OPERATOR_STATE_NAME,
+            typeInformation.createSerializer(new ExecutionConfig())));
+
+    if (context.isRestored()) {
+      isRestored = true;
+      LOG.info("Having restore state in the UnbounedSourceWrapper.");
+    } else {
+      LOG.info("No restore state for UnbounedSourceWrapper.");
+    }
+  }
+
+  @Override
+  public void onProcessingTime(long timestamp) throws Exception {
+    if (this.isRunning) {
+      synchronized (context.getCheckpointLock()) {
+        // find minimum watermark over all localReaders
+        long watermarkMillis = Long.MAX_VALUE;
+        for (UnboundedSource.UnboundedReader<OutputT> reader: localReaders) {
+          Instant watermark = reader.getWatermark();
+          if (watermark != null) {
+            watermarkMillis = Math.min(watermark.getMillis(), watermarkMillis);
+          }
+        }
+        context.emitWatermark(new Watermark(watermarkMillis));
+      }
+      setNextWatermarkTimer(this.runtimeContext);
+    }
+  }
+
+  private void setNextWatermarkTimer(StreamingRuntimeContext runtime) {
+    if (this.isRunning) {
+      long watermarkInterval =  runtime.getExecutionConfig().getAutoWatermarkInterval();
+      long timeToNextWatermark = getTimeToNextWatermark(watermarkInterval);
+      runtime.getProcessingTimeService().registerTimer(timeToNextWatermark, this);
+    }
+  }
+
+  private long getTimeToNextWatermark(long watermarkInterval) {
+    return System.currentTimeMillis() + watermarkInterval;
+  }
+
+  /**
+   * Visible so that we can check this in tests. Must not be used for anything else.
+   */
+  @VisibleForTesting
+  public List<? extends UnboundedSource<OutputT, CheckpointMarkT>> getSplitSources() {
+    return splitSources;
+  }
+
+  /**
+   * Visible so that we can check this in tests. Must not be used for anything else.
+   */
+  @VisibleForTesting
+  public List<? extends UnboundedSource<OutputT, CheckpointMarkT>> getLocalSplitSources() {
+    return localSplitSources;
+  }
+
+  @Override
+  public void notifyCheckpointComplete(long checkpointId) throws Exception {
+
+    List<CheckpointMarkT> checkpointMarks = pendingCheckpoints.get(checkpointId);
+
+    if (checkpointMarks != null) {
+
+      // remove old checkpoints including the current one
+      Iterator<Long> iterator = pendingCheckpoints.keySet().iterator();
+      long currentId;
+      do {
+        currentId = iterator.next();
+        iterator.remove();
+      } while (currentId != checkpointId);
+
+      // confirm all marks
+      for (CheckpointMarkT mark : checkpointMarks) {
+        mark.finalizeCheckpoint();
+      }
+
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/package-info.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/package-info.java
new file mode 100644
index 0000000..b431ce7
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/io/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internal implementation of the Beam runner for Apache Flink.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming.io;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/package-info.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/package-info.java
new file mode 100644
index 0000000..0674871
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internal implementation of the Beam runner for Apache Flink.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkBroadcastStateInternals.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkBroadcastStateInternals.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkBroadcastStateInternals.java
new file mode 100644
index 0000000..3203446
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkBroadcastStateInternals.java
@@ -0,0 +1,865 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.runners.core.StateInternals;
+import org.apache.beam.runners.core.StateNamespace;
+import org.apache.beam.runners.core.StateTag;
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.coders.MapCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.CombineWithContext;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.util.CombineContextFactory;
+import org.apache.beam.sdk.util.state.BagState;
+import org.apache.beam.sdk.util.state.CombiningState;
+import org.apache.beam.sdk.util.state.MapState;
+import org.apache.beam.sdk.util.state.ReadableState;
+import org.apache.beam.sdk.util.state.SetState;
+import org.apache.beam.sdk.util.state.State;
+import org.apache.beam.sdk.util.state.StateContext;
+import org.apache.beam.sdk.util.state.StateContexts;
+import org.apache.beam.sdk.util.state.ValueState;
+import org.apache.beam.sdk.util.state.WatermarkHoldState;
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.state.ListState;
+import org.apache.flink.api.common.state.ListStateDescriptor;
+import org.apache.flink.runtime.state.DefaultOperatorStateBackend;
+import org.apache.flink.runtime.state.OperatorStateBackend;
+
+/**
+ * {@link StateInternals} that uses a Flink {@link DefaultOperatorStateBackend}
+ * to manage the broadcast state.
+ * The state is the same on all parallel instances of the operator.
+ * So we just need store state of operator-0 in OperatorStateBackend.
+ *
+ * <p>Note: Ignore index of key.
+ * Mainly for SideInputs.
+ */
+public class FlinkBroadcastStateInternals<K> implements StateInternals<K> {
+
+  private int indexInSubtaskGroup;
+  private final DefaultOperatorStateBackend stateBackend;
+  // stateName -> <namespace, state>
+  private Map<String, Map<String, ?>> stateForNonZeroOperator;
+
+  public FlinkBroadcastStateInternals(int indexInSubtaskGroup, OperatorStateBackend stateBackend) {
+    //TODO flink do not yet expose through public API
+    this.stateBackend = (DefaultOperatorStateBackend) stateBackend;
+    this.indexInSubtaskGroup = indexInSubtaskGroup;
+    if (indexInSubtaskGroup != 0) {
+      stateForNonZeroOperator = new HashMap<>();
+    }
+  }
+
+  @Override
+  public K getKey() {
+    return null;
+  }
+
+  @Override
+  public <T extends State> T state(
+      final StateNamespace namespace,
+      StateTag<? super K, T> address) {
+
+    return state(namespace, address, StateContexts.nullContext());
+  }
+
+  @Override
+  public <T extends State> T state(
+      final StateNamespace namespace,
+      StateTag<? super K, T> address,
+      final StateContext<?> context) {
+
+    return address.bind(new StateTag.StateBinder<K>() {
+
+      @Override
+      public <T> ValueState<T> bindValue(
+          StateTag<? super K, ValueState<T>> address,
+          Coder<T> coder) {
+
+        return new FlinkBroadcastValueState<>(stateBackend, address, namespace, coder);
+      }
+
+      @Override
+      public <T> BagState<T> bindBag(
+          StateTag<? super K, BagState<T>> address,
+          Coder<T> elemCoder) {
+
+        return new FlinkBroadcastBagState<>(stateBackend, address, namespace, elemCoder);
+      }
+
+      @Override
+      public <T> SetState<T> bindSet(
+          StateTag<? super K, SetState<T>> address,
+          Coder<T> elemCoder) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", SetState.class.getSimpleName()));
+      }
+
+      @Override
+      public <KeyT, ValueT> MapState<KeyT, ValueT> bindMap(
+          StateTag<? super K, MapState<KeyT, ValueT>> spec,
+          Coder<KeyT> mapKeyCoder, Coder<ValueT> mapValueCoder) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", MapState.class.getSimpleName()));
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT>
+      bindCombiningValue(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          Combine.CombineFn<InputT, AccumT, OutputT> combineFn) {
+
+        return new FlinkCombiningState<>(
+            stateBackend, address, combineFn, namespace, accumCoder);
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValue(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
+        return new FlinkKeyedCombiningState<>(
+            stateBackend,
+            address,
+            combineFn,
+            namespace,
+            accumCoder,
+            FlinkBroadcastStateInternals.this);
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValueWithContext(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          CombineWithContext.KeyedCombineFnWithContext<
+              ? super K, InputT, AccumT, OutputT> combineFn) {
+        return new FlinkCombiningStateWithContext<>(
+            stateBackend,
+            address,
+            combineFn,
+            namespace,
+            accumCoder,
+            FlinkBroadcastStateInternals.this,
+            CombineContextFactory.createFromStateContext(context));
+      }
+
+      @Override
+      public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
+          StateTag<? super K, WatermarkHoldState<W>> address,
+          OutputTimeFn<? super W> outputTimeFn) {
+         throw new UnsupportedOperationException(
+             String.format("%s is not supported", WatermarkHoldState.class.getSimpleName()));
+      }
+    });
+  }
+
+  /**
+   * 1. The way we would use it is to only checkpoint anything from the operator
+   * with subtask index 0 because we assume that the state is the same on all
+   * parallel instances of the operator.
+   *
+   * <p>2. Use map to support namespace.
+   */
+  private abstract class AbstractBroadcastState<T> {
+
+    private String name;
+    private final StateNamespace namespace;
+    private final ListStateDescriptor<Map<String, T>> flinkStateDescriptor;
+    private final DefaultOperatorStateBackend flinkStateBackend;
+
+    AbstractBroadcastState(
+        DefaultOperatorStateBackend flinkStateBackend,
+        String name,
+        StateNamespace namespace,
+        Coder<T> coder) {
+      this.name = name;
+
+      this.namespace = namespace;
+      this.flinkStateBackend = flinkStateBackend;
+
+      CoderTypeInformation<Map<String, T>> typeInfo =
+          new CoderTypeInformation<>(MapCoder.of(StringUtf8Coder.of(), coder));
+
+      flinkStateDescriptor = new ListStateDescriptor<>(name,
+          typeInfo.createSerializer(new ExecutionConfig()));
+    }
+
+    /**
+     * Get map(namespce->T) from index 0.
+     */
+    Map<String, T> getMap() throws Exception {
+      if (indexInSubtaskGroup == 0) {
+        return getMapFromBroadcastState();
+      } else {
+        Map<String, T> result = (Map<String, T>) stateForNonZeroOperator.get(name);
+        // maybe restore from BroadcastState of Operator-0
+        if (result == null) {
+          result = getMapFromBroadcastState();
+          if (result != null) {
+            stateForNonZeroOperator.put(name, result);
+            // we don't need it anymore, must clear it.
+            flinkStateBackend.getBroadcastOperatorState(
+                flinkStateDescriptor).clear();
+          }
+        }
+        return result;
+      }
+    }
+
+    Map<String, T> getMapFromBroadcastState() throws Exception {
+      ListState<Map<String, T>> state = flinkStateBackend.getBroadcastOperatorState(
+          flinkStateDescriptor);
+      Iterable<Map<String, T>> iterable = state.get();
+      Map<String, T> ret = null;
+      if (iterable != null) {
+        // just use index 0
+        Iterator<Map<String, T>> iterator = iterable.iterator();
+        if (iterator.hasNext()) {
+          ret = iterator.next();
+        }
+      }
+      return ret;
+    }
+
+    /**
+     * Update map(namespce->T) from index 0.
+     */
+    void updateMap(Map<String, T> map) throws Exception {
+      if (indexInSubtaskGroup == 0) {
+        ListState<Map<String, T>> state = flinkStateBackend.getBroadcastOperatorState(
+            flinkStateDescriptor);
+        state.clear();
+        if (map.size() > 0) {
+          state.add(map);
+        }
+      } else {
+        if (map.size() == 0) {
+          stateForNonZeroOperator.remove(name);
+          // updateMap is always behind getMap,
+          // getMap will clear map in BroadcastOperatorState,
+          // we don't need clear here.
+        } else {
+          stateForNonZeroOperator.put(name, map);
+        }
+      }
+    }
+
+    void writeInternal(T input) {
+      try {
+        Map<String, T> map = getMap();
+        if (map == null) {
+          map = new HashMap<>();
+        }
+        map.put(namespace.stringKey(), input);
+        updateMap(map);
+      } catch (Exception e) {
+        throw new RuntimeException("Error updating state.", e);
+      }
+    }
+
+    T readInternal() {
+      try {
+        Map<String, T> map = getMap();
+        if (map == null) {
+          return null;
+        } else {
+          return map.get(namespace.stringKey());
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    void clearInternal() {
+      try {
+        Map<String, T> map = getMap();
+        if (map != null) {
+          map.remove(namespace.stringKey());
+          updateMap(map);
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error clearing state.", e);
+      }
+    }
+
+  }
+
+  private class FlinkBroadcastValueState<K, T>
+      extends AbstractBroadcastState<T> implements ValueState<T> {
+
+    private final StateNamespace namespace;
+    private final StateTag<? super K, ValueState<T>> address;
+
+    FlinkBroadcastValueState(
+        DefaultOperatorStateBackend flinkStateBackend,
+        StateTag<? super K, ValueState<T>> address,
+        StateNamespace namespace,
+        Coder<T> coder) {
+      super(flinkStateBackend, address.getId(), namespace, coder);
+
+      this.namespace = namespace;
+      this.address = address;
+
+    }
+
+    @Override
+    public void write(T input) {
+      writeInternal(input);
+    }
+
+    @Override
+    public ValueState<T> readLater() {
+      return this;
+    }
+
+    @Override
+    public T read() {
+      return readInternal();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkBroadcastValueState<?, ?> that = (FlinkBroadcastValueState<?, ?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+
+    @Override
+    public void clear() {
+      clearInternal();
+    }
+  }
+
+  private class FlinkBroadcastBagState<K, T> extends AbstractBroadcastState<List<T>>
+      implements BagState<T> {
+
+    private final StateNamespace namespace;
+    private final StateTag<? super K, BagState<T>> address;
+
+    FlinkBroadcastBagState(
+        DefaultOperatorStateBackend flinkStateBackend,
+        StateTag<? super K, BagState<T>> address,
+        StateNamespace namespace,
+        Coder<T> coder) {
+      super(flinkStateBackend, address.getId(), namespace, ListCoder.of(coder));
+
+      this.namespace = namespace;
+      this.address = address;
+    }
+
+    @Override
+    public void add(T input) {
+      List<T> list = readInternal();
+      if (list == null) {
+        list = new ArrayList<>();
+      }
+      list.add(input);
+      writeInternal(list);
+    }
+
+    @Override
+    public BagState<T> readLater() {
+      return this;
+    }
+
+    @Override
+    public Iterable<T> read() {
+      List<T> result = readInternal();
+      return result != null ? result : Collections.<T>emptyList();
+    }
+
+    @Override
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            List<T> result = readInternal();
+            return result == null;
+          } catch (Exception e) {
+            throw new RuntimeException("Error reading state.", e);
+          }
+
+        }
+
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+      };
+    }
+
+    @Override
+    public void clear() {
+      clearInternal();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkBroadcastBagState<?, ?> that = (FlinkBroadcastBagState<?, ?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+  }
+
+  private class FlinkCombiningState<K, InputT, AccumT, OutputT>
+      extends AbstractBroadcastState<AccumT>
+      implements CombiningState<InputT, AccumT, OutputT> {
+
+    private final StateNamespace namespace;
+    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
+    private final Combine.CombineFn<InputT, AccumT, OutputT> combineFn;
+
+    FlinkCombiningState(
+        DefaultOperatorStateBackend flinkStateBackend,
+        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+        Combine.CombineFn<InputT, AccumT, OutputT> combineFn,
+        StateNamespace namespace,
+        Coder<AccumT> accumCoder) {
+      super(flinkStateBackend, address.getId(), namespace, accumCoder);
+
+      this.namespace = namespace;
+      this.address = address;
+      this.combineFn = combineFn;
+    }
+
+    @Override
+    public CombiningState<InputT, AccumT, OutputT> readLater() {
+      return this;
+    }
+
+    @Override
+    public void add(InputT value) {
+      AccumT current = readInternal();
+      if (current == null) {
+        current = combineFn.createAccumulator();
+      }
+      current = combineFn.addInput(current, value);
+      writeInternal(current);
+    }
+
+    @Override
+    public void addAccum(AccumT accum) {
+      AccumT current = readInternal();
+
+      if (current == null) {
+        writeInternal(accum);
+      } else {
+        current = combineFn.mergeAccumulators(Arrays.asList(current, accum));
+        writeInternal(current);
+      }
+    }
+
+    @Override
+    public AccumT getAccum() {
+      return readInternal();
+    }
+
+    @Override
+    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
+      return combineFn.mergeAccumulators(accumulators);
+    }
+
+    @Override
+    public OutputT read() {
+      AccumT accum = readInternal();
+      if (accum != null) {
+        return combineFn.extractOutput(accum);
+      } else {
+        return combineFn.extractOutput(combineFn.createAccumulator());
+      }
+    }
+
+    @Override
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            return readInternal() == null;
+          } catch (Exception e) {
+            throw new RuntimeException("Error reading state.", e);
+          }
+
+        }
+
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+      };
+    }
+
+    @Override
+    public void clear() {
+      clearInternal();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkCombiningState<?, ?, ?, ?> that =
+          (FlinkCombiningState<?, ?, ?, ?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+  }
+
+  private class FlinkKeyedCombiningState<K, InputT, AccumT, OutputT>
+      extends AbstractBroadcastState<AccumT>
+      implements CombiningState<InputT, AccumT, OutputT> {
+
+    private final StateNamespace namespace;
+    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
+    private final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn;
+    private final FlinkBroadcastStateInternals<K> flinkStateInternals;
+
+    FlinkKeyedCombiningState(
+        DefaultOperatorStateBackend flinkStateBackend,
+        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+        Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn,
+        StateNamespace namespace,
+        Coder<AccumT> accumCoder,
+        FlinkBroadcastStateInternals<K> flinkStateInternals) {
+      super(flinkStateBackend, address.getId(), namespace, accumCoder);
+
+      this.namespace = namespace;
+      this.address = address;
+      this.combineFn = combineFn;
+      this.flinkStateInternals = flinkStateInternals;
+
+    }
+
+    @Override
+    public CombiningState<InputT, AccumT, OutputT> readLater() {
+      return this;
+    }
+
+    @Override
+    public void add(InputT value) {
+      try {
+        AccumT current = readInternal();
+        if (current == null) {
+          current = combineFn.createAccumulator(flinkStateInternals.getKey());
+        }
+        current = combineFn.addInput(flinkStateInternals.getKey(), current, value);
+        writeInternal(current);
+      } catch (Exception e) {
+        throw new RuntimeException("Error adding to state." , e);
+      }
+    }
+
+    @Override
+    public void addAccum(AccumT accum) {
+      try {
+        AccumT current = readInternal();
+        if (current == null) {
+          writeInternal(accum);
+        } else {
+          current = combineFn.mergeAccumulators(
+              flinkStateInternals.getKey(),
+              Arrays.asList(current, accum));
+          writeInternal(current);
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error adding to state.", e);
+      }
+    }
+
+    @Override
+    public AccumT getAccum() {
+      try {
+        return readInternal();
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
+      return combineFn.mergeAccumulators(flinkStateInternals.getKey(), accumulators);
+    }
+
+    @Override
+    public OutputT read() {
+      try {
+        AccumT accum = readInternal();
+        if (accum != null) {
+          return combineFn.extractOutput(flinkStateInternals.getKey(), accum);
+        } else {
+          return combineFn.extractOutput(
+              flinkStateInternals.getKey(),
+              combineFn.createAccumulator(flinkStateInternals.getKey()));
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            return readInternal() == null;
+          } catch (Exception e) {
+            throw new RuntimeException("Error reading state.", e);
+          }
+
+        }
+
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+      };
+    }
+
+    @Override
+    public void clear() {
+      clearInternal();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkKeyedCombiningState<?, ?, ?, ?> that =
+          (FlinkKeyedCombiningState<?, ?, ?, ?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+  }
+
+  private class FlinkCombiningStateWithContext<K, InputT, AccumT, OutputT>
+      extends AbstractBroadcastState<AccumT>
+      implements CombiningState<InputT, AccumT, OutputT> {
+
+    private final StateNamespace namespace;
+    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
+    private final CombineWithContext.KeyedCombineFnWithContext<
+        ? super K, InputT, AccumT, OutputT> combineFn;
+    private final FlinkBroadcastStateInternals<K> flinkStateInternals;
+    private final CombineWithContext.Context context;
+
+    FlinkCombiningStateWithContext(
+        DefaultOperatorStateBackend flinkStateBackend,
+        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+        CombineWithContext.KeyedCombineFnWithContext<
+            ? super K, InputT, AccumT, OutputT> combineFn,
+        StateNamespace namespace,
+        Coder<AccumT> accumCoder,
+        FlinkBroadcastStateInternals<K> flinkStateInternals,
+        CombineWithContext.Context context) {
+      super(flinkStateBackend, address.getId(), namespace, accumCoder);
+
+      this.namespace = namespace;
+      this.address = address;
+      this.combineFn = combineFn;
+      this.flinkStateInternals = flinkStateInternals;
+      this.context = context;
+
+    }
+
+    @Override
+    public CombiningState<InputT, AccumT, OutputT> readLater() {
+      return this;
+    }
+
+    @Override
+    public void add(InputT value) {
+      try {
+        AccumT current = readInternal();
+        if (current == null) {
+          current = combineFn.createAccumulator(flinkStateInternals.getKey(), context);
+        }
+        current = combineFn.addInput(flinkStateInternals.getKey(), current, value, context);
+        writeInternal(current);
+      } catch (Exception e) {
+        throw new RuntimeException("Error adding to state." , e);
+      }
+    }
+
+    @Override
+    public void addAccum(AccumT accum) {
+      try {
+
+        AccumT current = readInternal();
+        if (current == null) {
+          writeInternal(accum);
+        } else {
+          current = combineFn.mergeAccumulators(
+              flinkStateInternals.getKey(),
+              Arrays.asList(current, accum),
+              context);
+          writeInternal(current);
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error adding to state.", e);
+      }
+    }
+
+    @Override
+    public AccumT getAccum() {
+      try {
+        return readInternal();
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
+      return combineFn.mergeAccumulators(flinkStateInternals.getKey(), accumulators, context);
+    }
+
+    @Override
+    public OutputT read() {
+      try {
+        AccumT accum = readInternal();
+        return combineFn.extractOutput(flinkStateInternals.getKey(), accum, context);
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            return readInternal() == null;
+          } catch (Exception e) {
+            throw new RuntimeException("Error reading state.", e);
+          }
+
+        }
+
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+      };
+    }
+
+    @Override
+    public void clear() {
+      clearInternal();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkCombiningStateWithContext<?, ?, ?, ?> that =
+          (FlinkCombiningStateWithContext<?, ?, ?, ?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkKeyGroupStateInternals.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkKeyGroupStateInternals.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkKeyGroupStateInternals.java
new file mode 100644
index 0000000..24b340e
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkKeyGroupStateInternals.java
@@ -0,0 +1,487 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
+
+import static org.apache.flink.util.Preconditions.checkArgument;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.runners.core.StateInternals;
+import org.apache.beam.runners.core.StateNamespace;
+import org.apache.beam.runners.core.StateTag;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.Coder.Context;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.CombineWithContext;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.util.state.BagState;
+import org.apache.beam.sdk.util.state.CombiningState;
+import org.apache.beam.sdk.util.state.MapState;
+import org.apache.beam.sdk.util.state.ReadableState;
+import org.apache.beam.sdk.util.state.SetState;
+import org.apache.beam.sdk.util.state.State;
+import org.apache.beam.sdk.util.state.StateContext;
+import org.apache.beam.sdk.util.state.StateContexts;
+import org.apache.beam.sdk.util.state.ValueState;
+import org.apache.beam.sdk.util.state.WatermarkHoldState;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.runtime.state.KeyGroupsList;
+import org.apache.flink.runtime.state.KeyedStateBackend;
+import org.apache.flink.streaming.api.operators.HeapInternalTimerService;
+import org.apache.flink.util.InstantiationUtil;
+import org.apache.flink.util.Preconditions;
+
+/**
+ * {@link StateInternals} that uses {@link KeyGroupCheckpointedOperator}
+ * to checkpoint state.
+ *
+ * <p>Note:
+ * Ignore index of key.
+ * Just implement BagState.
+ *
+ * <p>Reference from {@link HeapInternalTimerService} to the local key-group range.
+ */
+public class FlinkKeyGroupStateInternals<K> implements StateInternals<K> {
+
+  private final Coder<K> keyCoder;
+  private final KeyGroupsList localKeyGroupRange;
+  private KeyedStateBackend keyedStateBackend;
+  private final int localKeyGroupRangeStartIdx;
+
+  // stateName -> namespace -> (valueCoder, value)
+  private final Map<String, Tuple2<Coder<?>, Map<String, ?>>>[] stateTables;
+
+  public FlinkKeyGroupStateInternals(
+      Coder<K> keyCoder,
+      KeyedStateBackend keyedStateBackend) {
+    this.keyCoder = keyCoder;
+    this.keyedStateBackend = keyedStateBackend;
+    this.localKeyGroupRange = keyedStateBackend.getKeyGroupRange();
+    // find the starting index of the local key-group range
+    int startIdx = Integer.MAX_VALUE;
+    for (Integer keyGroupIdx : localKeyGroupRange) {
+      startIdx = Math.min(keyGroupIdx, startIdx);
+    }
+    this.localKeyGroupRangeStartIdx = startIdx;
+    stateTables = (Map<String, Tuple2<Coder<?>, Map<String, ?>>>[])
+        new Map[localKeyGroupRange.getNumberOfKeyGroups()];
+    for (int i = 0; i < stateTables.length; i++) {
+      stateTables[i] = new HashMap<>();
+    }
+  }
+
+  @Override
+  public K getKey() {
+    ByteBuffer keyBytes = (ByteBuffer) keyedStateBackend.getCurrentKey();
+    try {
+      return CoderUtils.decodeFromByteArray(keyCoder, keyBytes.array());
+    } catch (CoderException e) {
+      throw new RuntimeException("Error decoding key.", e);
+    }
+  }
+
+  @Override
+  public <T extends State> T state(
+      final StateNamespace namespace,
+      StateTag<? super K, T> address) {
+
+    return state(namespace, address, StateContexts.nullContext());
+  }
+
+  @Override
+  public <T extends State> T state(
+      final StateNamespace namespace,
+      StateTag<? super K, T> address,
+      final StateContext<?> context) {
+
+    return address.bind(new StateTag.StateBinder<K>() {
+
+      @Override
+      public <T> ValueState<T> bindValue(
+          StateTag<? super K, ValueState<T>> address,
+          Coder<T> coder) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", ValueState.class.getSimpleName()));
+      }
+
+      @Override
+      public <T> BagState<T> bindBag(
+          StateTag<? super K, BagState<T>> address,
+          Coder<T> elemCoder) {
+
+        return new FlinkKeyGroupBagState<>(address, namespace, elemCoder);
+      }
+
+      @Override
+      public <T> SetState<T> bindSet(
+          StateTag<? super K, SetState<T>> address,
+          Coder<T> elemCoder) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", SetState.class.getSimpleName()));
+      }
+
+      @Override
+      public <KeyT, ValueT> MapState<KeyT, ValueT> bindMap(
+          StateTag<? super K, MapState<KeyT, ValueT>> spec,
+          Coder<KeyT> mapKeyCoder, Coder<ValueT> mapValueCoder) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", MapState.class.getSimpleName()));
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT>
+      bindCombiningValue(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          Combine.CombineFn<InputT, AccumT, OutputT> combineFn) {
+        throw new UnsupportedOperationException("bindCombiningValue is not supported.");
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValue(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
+        throw new UnsupportedOperationException("bindKeyedCombiningValue is not supported.");
+
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValueWithContext(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          CombineWithContext.KeyedCombineFnWithContext<
+              ? super K, InputT, AccumT, OutputT> combineFn) {
+        throw new UnsupportedOperationException(
+            "bindKeyedCombiningValueWithContext is not supported.");
+      }
+
+      @Override
+      public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
+          StateTag<? super K, WatermarkHoldState<W>> address,
+          OutputTimeFn<? super W> outputTimeFn) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", CombiningState.class.getSimpleName()));
+      }
+    });
+  }
+
+  /**
+   * Reference from {@link Combine.CombineFn}.
+   *
+   * <p>Accumulators are stored in each KeyGroup, call addInput() when a element comes,
+   * call extractOutput() to produce the desired value when need to read data.
+   */
+  interface KeyGroupCombiner<InputT, AccumT, OutputT> {
+
+    /**
+     * Returns a new, mutable accumulator value, representing the accumulation
+     * of zero input values.
+     */
+    AccumT createAccumulator();
+
+    /**
+     * Adds the given input value to the given accumulator, returning the
+     * new accumulator value.
+     */
+    AccumT addInput(AccumT accumulator, InputT input);
+
+    /**
+     * Returns the output value that is the result of all accumulators from KeyGroups
+     * that are assigned to this operator.
+     */
+    OutputT extractOutput(Iterable<AccumT> accumulators);
+  }
+
+  private abstract class AbstractKeyGroupState<InputT, AccumT, OutputT> {
+
+    private String stateName;
+    private String namespace;
+    private Coder<AccumT> coder;
+    private KeyGroupCombiner<InputT, AccumT, OutputT> keyGroupCombiner;
+
+    AbstractKeyGroupState(
+        String stateName,
+        String namespace,
+        Coder<AccumT> coder,
+        KeyGroupCombiner<InputT, AccumT, OutputT> keyGroupCombiner) {
+      this.stateName = stateName;
+      this.namespace = namespace;
+      this.coder = coder;
+      this.keyGroupCombiner = keyGroupCombiner;
+    }
+
+    /**
+     * Choose keyGroup of input and addInput to accumulator.
+     */
+    void addInput(InputT input) {
+      int keyGroupIdx = keyedStateBackend.getCurrentKeyGroupIndex();
+      int localIdx = getIndexForKeyGroup(keyGroupIdx);
+      Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable = stateTables[localIdx];
+      Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
+      if (tuple2 == null) {
+        tuple2 = new Tuple2<>();
+        tuple2.f0 = coder;
+        tuple2.f1 = new HashMap<>();
+        stateTable.put(stateName, tuple2);
+      }
+      Map<String, AccumT> map = (Map<String, AccumT>) tuple2.f1;
+      AccumT accumulator = map.get(namespace);
+      if (accumulator == null) {
+        accumulator = keyGroupCombiner.createAccumulator();
+      }
+      accumulator = keyGroupCombiner.addInput(accumulator, input);
+      map.put(namespace, accumulator);
+    }
+
+    /**
+     * Get all accumulators and invoke extractOutput().
+     */
+    OutputT extractOutput() {
+      List<AccumT> accumulators = new ArrayList<>(stateTables.length);
+      for (Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable : stateTables) {
+        Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
+        if (tuple2 != null) {
+          AccumT accumulator = (AccumT) tuple2.f1.get(namespace);
+          if (accumulator != null) {
+            accumulators.add(accumulator);
+          }
+        }
+      }
+      return keyGroupCombiner.extractOutput(accumulators);
+    }
+
+    /**
+     * Find the first accumulator and return immediately.
+     */
+    boolean isEmptyInternal() {
+      for (Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable : stateTables) {
+        Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
+        if (tuple2 != null) {
+          AccumT accumulator = (AccumT) tuple2.f1.get(namespace);
+          if (accumulator != null) {
+            return false;
+          }
+        }
+      }
+      return true;
+    }
+
+    /**
+     * Clear accumulators and clean empty map.
+     */
+    void clearInternal() {
+      for (Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable : stateTables) {
+        Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
+        if (tuple2 != null) {
+          tuple2.f1.remove(namespace);
+          if (tuple2.f1.size() == 0) {
+            stateTable.remove(stateName);
+          }
+        }
+      }
+    }
+
+  }
+
+  private int getIndexForKeyGroup(int keyGroupIdx) {
+    checkArgument(localKeyGroupRange.contains(keyGroupIdx),
+        "Key Group " + keyGroupIdx + " does not belong to the local range.");
+    return keyGroupIdx - this.localKeyGroupRangeStartIdx;
+  }
+
+  private class KeyGroupBagCombiner<T> implements KeyGroupCombiner<T, List<T>, Iterable<T>> {
+
+    @Override
+    public List<T> createAccumulator() {
+      return new ArrayList<>();
+    }
+
+    @Override
+    public List<T> addInput(List<T> accumulator, T input) {
+      accumulator.add(input);
+      return accumulator;
+    }
+
+    @Override
+    public Iterable<T> extractOutput(Iterable<List<T>> accumulators) {
+      List<T> result = new ArrayList<>();
+      // maybe can return an unmodifiable view.
+      for (List<T> list : accumulators) {
+        result.addAll(list);
+      }
+      return result;
+    }
+  }
+
+  private class FlinkKeyGroupBagState<T> extends AbstractKeyGroupState<T, List<T>, Iterable<T>>
+      implements BagState<T> {
+
+    private final StateNamespace namespace;
+    private final StateTag<? super K, BagState<T>> address;
+
+    FlinkKeyGroupBagState(
+        StateTag<? super K, BagState<T>> address,
+        StateNamespace namespace,
+        Coder<T> coder) {
+      super(address.getId(), namespace.stringKey(), ListCoder.of(coder),
+          new KeyGroupBagCombiner<T>());
+      this.namespace = namespace;
+      this.address = address;
+    }
+
+    @Override
+    public void add(T input) {
+      addInput(input);
+    }
+
+    @Override
+    public BagState<T> readLater() {
+      return this;
+    }
+
+    @Override
+    public Iterable<T> read() {
+      Iterable<T> result = extractOutput();
+      return result != null ? result : Collections.<T>emptyList();
+    }
+
+    @Override
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            return isEmptyInternal();
+          } catch (Exception e) {
+            throw new RuntimeException("Error reading state.", e);
+          }
+
+        }
+
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+      };
+    }
+
+    @Override
+    public void clear() {
+      clearInternal();
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkKeyGroupBagState<?> that = (FlinkKeyGroupBagState<?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+  }
+
+  /**
+   * Snapshots the state {@code (stateName -> (valueCoder && (namespace -> value)))} for a given
+   * {@code keyGroupIdx}.
+   *
+   * @param keyGroupIdx the id of the key-group to be put in the snapshot.
+   * @param out the stream to write to.
+   */
+  public void snapshotKeyGroupState(int keyGroupIdx, DataOutputStream out) throws Exception {
+    int localIdx = getIndexForKeyGroup(keyGroupIdx);
+    Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable = stateTables[localIdx];
+    Preconditions.checkState(stateTable.size() <= Short.MAX_VALUE,
+        "Too many States: " + stateTable.size() + ". Currently at most "
+            + Short.MAX_VALUE + " states are supported");
+    out.writeShort(stateTable.size());
+    for (Map.Entry<String, Tuple2<Coder<?>, Map<String, ?>>> entry : stateTable.entrySet()) {
+      out.writeUTF(entry.getKey());
+      Coder coder = entry.getValue().f0;
+      InstantiationUtil.serializeObject(out, coder);
+      Map<String, ?> map = entry.getValue().f1;
+      out.writeInt(map.size());
+      for (Map.Entry<String, ?> entry1 : map.entrySet()) {
+        StringUtf8Coder.of().encode(entry1.getKey(), out, Context.NESTED);
+        coder.encode(entry1.getValue(), out, Context.NESTED);
+      }
+    }
+  }
+
+  /**
+   * Restore the state {@code (stateName -> (valueCoder && (namespace -> value)))}
+   * for a given {@code keyGroupIdx}.
+   *
+   * @param keyGroupIdx the id of the key-group to be put in the snapshot.
+   * @param in the stream to read from.
+   * @param userCodeClassLoader the class loader that will be used to deserialize
+   *                            the valueCoder.
+   */
+  public void restoreKeyGroupState(int keyGroupIdx, DataInputStream in,
+                                   ClassLoader userCodeClassLoader) throws Exception {
+    int localIdx = getIndexForKeyGroup(keyGroupIdx);
+    Map<String, Tuple2<Coder<?>, Map<String, ?>>> stateTable = stateTables[localIdx];
+    int numStates = in.readShort();
+    for (int i = 0; i < numStates; ++i) {
+      String stateName = in.readUTF();
+      Coder coder = InstantiationUtil.deserializeObject(in, userCodeClassLoader);
+      Tuple2<Coder<?>, Map<String, ?>> tuple2 = stateTable.get(stateName);
+      if (tuple2 == null) {
+        tuple2 = new Tuple2<>();
+        tuple2.f0 = coder;
+        tuple2.f1 = new HashMap<>();
+        stateTable.put(stateName, tuple2);
+      }
+      Map<String, Object> map = (Map<String, Object>) tuple2.f1;
+      int mapSize = in.readInt();
+      for (int j = 0; j < mapSize; j++) {
+        String namespace = StringUtf8Coder.of().decode(in, Context.NESTED);
+        Object value = coder.decode(in, Context.NESTED);
+        map.put(namespace, value);
+      }
+    }
+  }
+
+}

[23/50] [abbrv] beam git commit: This closes #2588

Posted by dh...@apache.org.

This closes #2588


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/8a00f225
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/8a00f225
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/8a00f225

Branch: refs/heads/DSL_SQL
Commit: 8a00f225454bcc742e5b38a499237bd5de455fb7
Parents: 470808c 28b692d
Author: Jean-Baptiste Onofr� <jb...@apache.org>
Authored: Wed Apr 19 13:27:51 2017 +0200
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Wed Apr 19 13:27:51 2017 +0200

----------------------------------------------------------------------
 pom.xml                      | 7 +++++++
 runners/flink/runner/pom.xml | 1 -
 runners/spark/pom.xml        | 1 -
 sdks/java/core/pom.xml       | 1 -
 4 files changed, 7 insertions(+), 3 deletions(-)
----------------------------------------------------------------------

[13/50] [abbrv] beam git commit: Fix tests to properly fake out BigQueryService, and add tests for dynamic-table functionality.

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java
index 3c67c3d..a2454fb 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java
@@ -1,12 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.beam.sdk.io.gcp.bigquery;
 
 import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkState;
 
 import com.google.api.client.json.JsonFactory;
 import com.google.api.client.util.BackOff;
 import com.google.api.client.util.BackOffUtils;
 import com.google.api.client.util.Sleeper;
+import com.google.api.services.bigquery.model.ErrorProto;
 import com.google.api.services.bigquery.model.Job;
 import com.google.api.services.bigquery.model.JobConfiguration;
 import com.google.api.services.bigquery.model.JobConfigurationExtract;
@@ -29,9 +47,18 @@ import java.io.ByteArrayInputStream;
 import java.io.FileReader;
 import java.io.IOException;
 import java.io.Serializable;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
 import java.nio.charset.StandardCharsets;
 import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ThreadLocalRandom;
 
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.generic.GenericRecordBuilder;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.Coder.Context;
 import org.apache.beam.sdk.coders.TableRowJsonCoder;
@@ -40,10 +67,13 @@ import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService;
 import org.apache.beam.sdk.util.FluentBackoff;
 
+import org.apache.beam.sdk.util.IOChannelUtils;
+import org.apache.beam.sdk.util.MimeTypes;
 import org.apache.beam.sdk.util.Transport;
 import org.joda.time.Duration;
 
 /**
+ * A fake implementation of BigQuery's job service.
  */
 class FakeJobService implements JobService, Serializable {
   static final JsonFactory JSON_FACTORY = Transport.getJsonFactory();
@@ -66,6 +96,8 @@ class FakeJobService implements JobService, Serializable {
   private static final com.google.common.collect.Table<String, String, JobInfo> allJobs =
       HashBasedTable.create();
 
+  private static final com.google.common.collect.Table<String, String, List<String>>
+      filesForLoadJobs = HashBasedTable.create();
   private static final com.google.common.collect.Table<String, String, JobStatistics>
       dryRunQueryResults = HashBasedTable.create();
 
@@ -82,6 +114,18 @@ class FakeJobService implements JobService, Serializable {
       job.setConfiguration(new JobConfiguration().setLoad(loadConfig));
       job.setKind(" bigquery#job");
       job.setStatus(new JobStatus().setState("PENDING"));
+
+      // Copy the files to a new location for import, as the temporary files will be deleted by
+      // the caller.
+      if (loadConfig.getSourceUris().size() > 0) {
+        List<String> loadFiles = Lists.newArrayList();
+        for (String filename : loadConfig.getSourceUris()) {
+          loadFiles.add(filename + ThreadLocalRandom.current().nextInt());
+        }
+        IOChannelUtils.getFactory(loadFiles.get(0)).copy(loadConfig.getSourceUris(), loadFiles);
+        filesForLoadJobs.put(jobRef.getProjectId(), jobRef.getJobId(), loadFiles);
+      }
+
       allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
     }
   }
@@ -91,8 +135,6 @@ class FakeJobService implements JobService, Serializable {
       throws InterruptedException, IOException {
     checkArgument(extractConfig.getDestinationFormat().equals("AVRO"),
         "Only extract to AVRO is supported");
-    checkArgument(extractConfig.getDestinationUris().size() == 1,
-        "Must specify exactly one destination URI.");
     synchronized (allJobs) {
       Job job = new Job();
       job.setJobReference(jobRef);
@@ -106,6 +148,14 @@ class FakeJobService implements JobService, Serializable {
   @Override
   public void startQueryJob(JobReference jobRef, JobConfigurationQuery query)
       throws IOException, InterruptedException {
+    synchronized (allJobs) {
+      Job job = new Job();
+      job.setJobReference(jobRef);
+      job.setConfiguration(new JobConfiguration().setQuery(query));
+      job.setKind(" bigquery#job");
+      job.setStatus(new JobStatus().setState("PENDING"));
+      allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
+    }
   }
 
   @Override
@@ -127,8 +177,8 @@ class FakeJobService implements JobService, Serializable {
     BackOff backoff =
         FluentBackoff.DEFAULT
             .withMaxRetries(maxAttempts)
-            .withInitialBackoff(Duration.millis(50))
-            .withMaxBackoff(Duration.standardMinutes(1))
+            .withInitialBackoff(Duration.millis(10))
+            .withMaxBackoff(Duration.standardSeconds(1))
             .backoff();
     Sleeper sleeper = Sleeper.DEFAULT;
     try {
@@ -136,7 +186,8 @@ class FakeJobService implements JobService, Serializable {
         Job job = getJob(jobRef);
         if (job != null) {
           JobStatus status = job.getStatus();
-          if (status != null && status.getState() != null && status.getState().equals("DONE")) {
+          if (status != null && status.getState() != null
+              && (status.getState().equals("DONE") || status.getState().equals("FAILED"))) {
             return job;
           }
         }
@@ -173,12 +224,15 @@ class FakeJobService implements JobService, Serializable {
         if (job == null) {
           return null;
         }
-        ++job.getJobCount;
-        if (job.getJobCount == GET_JOBS_TRANSITION_INTERVAL + 1) {
-          job.job.getStatus().setState("RUNNING");
-        } else if (job.getJobCount == 2 * GET_JOBS_TRANSITION_INTERVAL + 1) {
-          runJob(job.job);
-          job.job.getStatus().setState("DONE");
+        try {
+          ++job.getJobCount;
+          if (job.getJobCount == GET_JOBS_TRANSITION_INTERVAL + 1) {
+            job.job.getStatus().setState("RUNNING");
+          } else if (job.getJobCount == 2 * GET_JOBS_TRANSITION_INTERVAL + 1) {
+            job.job.setStatus(runJob(job.job));
+          }
+        } catch (Exception e) {
+          job.job.getStatus().setState("FAILED").setErrorResult(new ErrorProto());
         }
         return JSON_FACTORY.fromString(JSON_FACTORY.toString(job.job), Job.class);
       }
@@ -187,41 +241,50 @@ class FakeJobService implements JobService, Serializable {
     }
   }
 
-  private void runJob(Job job) throws InterruptedException, IOException {
+  private JobStatus runJob(Job job) throws InterruptedException, IOException {
     if (job.getConfiguration().getLoad() != null) {
-      runLoadJob(job.getConfiguration().getLoad());
+      return runLoadJob(job.getJobReference(), job.getConfiguration().getLoad());
     } else if (job.getConfiguration().getCopy() != null) {
-      runCopyJob(job.getConfiguration().getCopy());
+      return runCopyJob(job.getConfiguration().getCopy());
     } else if (job.getConfiguration().getExtract() != null) {
-      runExtractJob(job, job.getConfiguration().getExtract());
+      return runExtractJob(job, job.getConfiguration().getExtract());
+    } else if (job.getConfiguration().getQuery() != null) {
+      return runQueryJob(job.getConfiguration().getQuery());
     }
+    return new JobStatus().setState("DONE");
   }
 
-  private void validateDispositions(Table table, CreateDisposition createDisposition,
-                                    WriteDisposition writeDisposition)
+  private boolean validateDispositions(Table table, CreateDisposition createDisposition,
+                                       WriteDisposition writeDisposition)
       throws InterruptedException, IOException {
     if (table == null) {
-      checkState(createDisposition != CreateDisposition.CREATE_NEVER,
-          "CreateDisposition == CREATE_NEVER but the table doesn't exist.");
+      if (createDisposition == CreateDisposition.CREATE_NEVER) {
+        return false;
+      }
     } else if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
       datasetService.deleteTable(table.getTableReference());
     } else if (writeDisposition == WriteDisposition.WRITE_EMPTY) {
       List<TableRow> allRows = datasetService.getAllRows(table.getTableReference().getProjectId(),
           table.getTableReference().getDatasetId(), table.getTableReference().getTableId());
-      checkState(allRows.isEmpty(), "Write disposition was set to WRITE_EMPTY,"
-          + " but the table was not empty.");
+      if (!allRows.isEmpty()) {
+        return false;
+      }
     }
+    return true;
   }
-  private void runLoadJob(JobConfigurationLoad load)
+
+  private JobStatus runLoadJob(JobReference jobRef, JobConfigurationLoad load)
       throws InterruptedException, IOException {
     TableReference destination = load.getDestinationTable();
     TableSchema schema = load.getSchema();
-    List<String> sourceFiles = load.getSourceUris();
+    List<String> sourceFiles = filesForLoadJobs.get(jobRef.getProjectId(), jobRef.getJobId());
     WriteDisposition writeDisposition = WriteDisposition.valueOf(load.getWriteDisposition());
     CreateDisposition createDisposition = CreateDisposition.valueOf(load.getCreateDisposition());
     checkArgument(load.getSourceFormat().equals("NEWLINE_DELIMITED_JSON"));
     Table existingTable = datasetService.getTable(destination);
-    validateDispositions(existingTable, createDisposition, writeDisposition);
+    if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
+      return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
+    }
 
     datasetService.createTable(new Table().setTableReference(destination).setSchema(schema));
 
@@ -230,31 +293,52 @@ class FakeJobService implements JobService, Serializable {
       rows.addAll(readRows(filename));
     }
     datasetService.insertAll(destination, rows, null);
+    return new JobStatus().setState("DONE");
   }
 
-  private void runCopyJob(JobConfigurationTableCopy copy)
+  private JobStatus runCopyJob(JobConfigurationTableCopy copy)
       throws InterruptedException, IOException {
     List<TableReference> sources = copy.getSourceTables();
     TableReference destination = copy.getDestinationTable();
     WriteDisposition writeDisposition = WriteDisposition.valueOf(copy.getWriteDisposition());
     CreateDisposition createDisposition = CreateDisposition.valueOf(copy.getCreateDisposition());
     Table existingTable = datasetService.getTable(destination);
-    validateDispositions(existingTable, createDisposition, writeDisposition);
+    if (!validateDispositions(existingTable, createDisposition, writeDisposition)) {
+      return new JobStatus().setState("FAILED").setErrorResult(new ErrorProto());
+    }
 
     List<TableRow> allRows = Lists.newArrayList();
     for (TableReference source : sources) {
       allRows.addAll(datasetService.getAllRows(
           source.getProjectId(), source.getDatasetId(), source.getTableId()));
     }
+    datasetService.createTable(new Table().setTableReference(destination));
     datasetService.insertAll(destination, allRows, null);
+    return new JobStatus().setState("DONE");
   }
 
-  private void runExtractJob(Job job, JobConfigurationExtract extract) {
+  private JobStatus runExtractJob(Job job, JobConfigurationExtract extract)
+      throws InterruptedException, IOException {
     TableReference sourceTable = extract.getSourceTable();
-    extract.getDestinationUris().get(0);
-    List<Long> destinationFileCounts = Lists.newArrayList(0L);
+
+    List<TableRow> rows = datasetService.getAllRows(
+        sourceTable.getProjectId(), sourceTable.getDatasetId(), sourceTable.getTableId());
+    TableSchema schema = datasetService.getTable(sourceTable).getSchema();
+    List<Long> destinationFileCounts = Lists.newArrayList();
+    for (String destination : extract.getDestinationUris()) {
+      destinationFileCounts.add(writeRows(sourceTable.getTableId(), rows, schema, destination));
+    }
     job.setStatistics(new JobStatistics().setExtract(
         new JobStatistics4().setDestinationUriFileCounts(destinationFileCounts)));
+    return new JobStatus().setState("DONE");
+  }
+
+  private JobStatus runQueryJob(JobConfigurationQuery query)
+      throws IOException, InterruptedException  {
+    List<TableRow> rows = FakeBigQueryServices.rowsFromEncodedQuery(query.getQuery());
+    datasetService.createTable(new Table().setTableReference(query.getDestinationTable()));
+    datasetService.insertAll(query.getDestinationTable(), rows, null);
+    return new JobStatus().setState("DONE");
   }
 
   private List<TableRow> readRows(String filename) throws IOException {
@@ -270,4 +354,42 @@ class FakeJobService implements JobService, Serializable {
     }
     return tableRows;
   }
+
+  private long writeRows(String tableId, List<TableRow> rows, TableSchema schema,
+                         String destinationPattern) throws IOException {
+    Schema avroSchema = BigQueryAvroUtils.toGenericAvroSchema(tableId, schema.getFields());
+    List<TableRow> rowsToWrite = Lists.newArrayList();
+    int shard = 0;
+    for (int i = 0; i < rows.size(); ++i) {
+      rowsToWrite.add(rows.get(i));
+      if (rowsToWrite.size() == 5) {
+        writeRowsHelper(rowsToWrite, avroSchema, destinationPattern, shard++);
+        rowsToWrite.clear();
+      }
+    }
+    if (!rowsToWrite.isEmpty()) {
+      writeRowsHelper(rowsToWrite, avroSchema, destinationPattern, shard++);
+    }
+    return shard;
+  }
+
+  private void writeRowsHelper(List<TableRow> rows, Schema avroSchema,
+                               String destinationPattern, int shard) throws IOException {
+    String filename = destinationPattern.replace("*", String.format("%012d", shard));
+    try (WritableByteChannel channel = IOChannelUtils.create(filename, MimeTypes.BINARY);
+         DataFileWriter<GenericRecord> tableRowWriter =
+             new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(avroSchema))
+                 .create(avroSchema, Channels.newOutputStream(channel))) {
+      for (Map<String, Object> record : rows) {
+        GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(avroSchema);
+        for (Map.Entry<String, Object> field : record.entrySet()) {
+          genericRecordBuilder.set(field.getKey(), field.getValue());
+        }
+        tableRowWriter.append(genericRecordBuilder.build());
+      }
+    } catch (IOException e) {
+      throw new IllegalStateException(
+          String.format("Could not create destination for extract job %s", filename), e);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableContainer.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableContainer.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableContainer.java
index b2fc170..d52723b 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableContainer.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableContainer.java
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.beam.sdk.io.gcp.bigquery;
 
 import com.google.api.services.bigquery.model.Table;
@@ -7,23 +24,31 @@ import java.util.ArrayList;
 import java.util.List;
 
 /**
- * Created by relax on 3/30/17.
+ * Encapsulates a BigQuery Table, and it's contents.
  */
 class TableContainer {
   Table table;
   List<TableRow> rows;
   List<String> ids;
-
+  Long sizeBytes;
   TableContainer(Table table) {
     this.table = table;
     this.rows = new ArrayList<>();
     this.ids = new ArrayList<>();
+    this.sizeBytes = 0L;
   }
 
-  TableContainer addRow(TableRow row, String id) {
+  long addRow(TableRow row, String id) {
     rows.add(row);
     ids.add(id);
-    return this;
+    long rowSize =  row.toString().length();
+    Long tableSize = table.getNumBytes();
+    if (tableSize == null) {
+      table.setNumBytes(rowSize);
+    } else {
+      table.setNumBytes(tableSize + rowSize);
+    }
+    return rowSize;
   }
 
   Table getTable() {

[16/50] [abbrv] beam git commit: Refactor streaming write branch into separate reusable components.

Posted by dh...@apache.org.

Refactor streaming write branch into separate reusable components.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/58ed5c7e
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/58ed5c7e
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/58ed5c7e

Branch: refs/heads/DSL_SQL
Commit: 58ed5c7ecd247f9c5e5a15deff40ffa8c800af25
Parents: 67a5f82
Author: Reuven Lax <re...@google.com>
Authored: Tue Mar 28 19:34:56 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 21:12:50 2017 -0700

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    |  69 ++++++------
 .../beam/sdk/io/gcp/bigquery/CreateTables.java  | 100 +++++++++++++++++
 .../io/gcp/bigquery/GenerateShardedTable.java   |  48 ++++++++
 .../beam/sdk/io/gcp/bigquery/PrepareWrite.java  |  65 ++++++-----
 .../sdk/io/gcp/bigquery/StreamWithDeDup.java    |  90 ---------------
 .../sdk/io/gcp/bigquery/StreamingInserts.java   | 110 +++++++++++++++++++
 .../sdk/io/gcp/bigquery/StreamingWriteFn.java   |  82 +-------------
 .../sdk/io/gcp/bigquery/TableDestination.java   |  48 +++++++-
 .../io/gcp/bigquery/TableDestinationCoder.java  |  64 +++++++++++
 .../sdk/io/gcp/bigquery/TagWithUniqueIds.java   |  71 ++++++++++++
 .../gcp/bigquery/TagWithUniqueIdsAndTable.java  | 101 -----------------
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     |  18 +--
 12 files changed, 521 insertions(+), 345 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index af0d561..af19b83 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -40,6 +40,7 @@ import java.util.regex.Pattern;
 import javax.annotation.Nullable;
 
 import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.TableRowJsonCoder;
 import org.apache.beam.sdk.coders.VoidCoder;
 import org.apache.beam.sdk.io.BoundedSource;
@@ -60,6 +61,7 @@ import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
 import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.runners.PipelineRunner;
 import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.util.GcsUtil.GcsUtilFactory;
@@ -67,6 +69,7 @@ import org.apache.beam.sdk.util.IOChannelFactory;
 import org.apache.beam.sdk.util.IOChannelUtils;
 import org.apache.beam.sdk.util.Transport;
 import org.apache.beam.sdk.util.gcsfs.GcsPath;
+import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PBegin;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollection.IsBounded;
@@ -681,8 +684,8 @@ public class BigQueryIO {
     static final int LOAD_JOB_POLL_MAX_RETRIES = Integer.MAX_VALUE;
 
     @Nullable abstract ValueProvider<String> getJsonTableRef();
-    @Nullable abstract SerializableFunction<ValueInSingleWindow<T>, TableReference>
-      getTableRefFunction();
+    @Nullable abstract SerializableFunction<ValueInSingleWindow<T>, TableDestination>
+      getTableFunction();
     @Nullable abstract SerializableFunction<T, TableRow> getFormatFunction();
     /** Table schema. The schema is required only if the table does not exist. */
     @Nullable abstract ValueProvider<String> getJsonSchema();
@@ -783,7 +786,7 @@ public class BigQueryIO {
     private void ensureToNotCalledYet() {
       checkState(
           getJsonTableRef() == null && getTable() == null
-              && getTableRefFunction() == null, "to() already called");
+              && getTableFunction() == null, "to() already called");
     }
 
     /**
@@ -802,13 +805,16 @@ public class BigQueryIO {
     /** Same as {@link #to(String)}, but with a {@link ValueProvider}. */
     public Write<T> to(ValueProvider<String> tableSpec) {
       ensureToNotCalledYet();
+      String tableDescription = getTableDescription();
+      if (tableDescription == null) {
+        tableDescription = "";
+      }
       return toBuilder()
           .setJsonTableRef(
               NestedValueProvider.of(
                   NestedValueProvider.of(tableSpec, new TableSpecToTableRef()),
                   new TableRefToJson()))
-          .setTableRefFunction(new TranslateTableSpecFunction<T>(
-              new ConstantTableSpecFunction<T>(tableSpec)))
+          .setTableFunction(new ConstantTableFunction<T>(tableSpec, tableDescription))
           .build();
     }
 
@@ -819,6 +825,8 @@ public class BigQueryIO {
     public Write<T> to(
         SerializableFunction<ValueInSingleWindow<T>, String> tableSpecFunction) {
       return toTableReference(new TranslateTableSpecFunction<T>(tableSpecFunction));
+      ensureToNotCalledYet();
+      return toBuilder().setTableFunction(tableFunction).build();
     }
 
     /**
@@ -828,7 +836,7 @@ public class BigQueryIO {
     private Write<T> toTableReference(
         SerializableFunction<ValueInSingleWindow<T>, TableReference> tableRefFunction) {
       ensureToNotCalledYet();
-      return toBuilder().setTableRefFunction(tableRefFunction).build();
+      return toBuilder().setTableFunction(tableFunction).build();
     }
 
     /**
@@ -838,32 +846,19 @@ public class BigQueryIO {
       return toBuilder().setFormatFunction(formatFunction).build();
     }
 
-    private static class TranslateTableSpecFunction<T> implements
-        SerializableFunction<ValueInSingleWindow<T>, TableReference> {
-      private SerializableFunction<ValueInSingleWindow<T>, String> tableSpecFunction;
-
-      TranslateTableSpecFunction(
-          SerializableFunction<ValueInSingleWindow<T>, String> tableSpecFunction) {
-        this.tableSpecFunction = tableSpecFunction;
-      }
+    static class ConstantTableFunction<T> implements
+        SerializableFunction<ValueInSingleWindow<T>, TableDestination> {
+      private final ValueProvider<String> tableSpec;
+      private final String tableDescription;
 
-      @Override
-      public TableReference apply(ValueInSingleWindow<T> value) {
-        return BigQueryHelpers.parseTableSpec(tableSpecFunction.apply(value));
-      }
-    }
-
-    static class ConstantTableSpecFunction<T> implements
-        SerializableFunction<ValueInSingleWindow<T>, String> {
-      private ValueProvider<String> tableSpec;
-
-      ConstantTableSpecFunction(ValueProvider<String> tableSpec) {
+      ConstantTableFunction(ValueProvider<String> tableSpec, String tableDescription) {
         this.tableSpec = tableSpec;
+        this.tableDescription = tableDescription;
       }
 
       @Override
-      public String apply(ValueInSingleWindow<T> value) {
-        return tableSpec.get();
+      public TableDestination apply(ValueInSingleWindow<T> value) {
+        return new TableDestination(tableSpec.get(), tableDescription);
       }
     }
 
@@ -919,7 +914,7 @@ public class BigQueryIO {
       BigQueryOptions options = input.getPipeline().getOptions().as(BigQueryOptions.class);
 
       // Exactly one of the table and table reference can be configured.
-      checkState(getTableRefFunction() != null,
+      checkState(getTableFunction() != null,
           "must set the table reference of a BigQueryIO.Write transform");
 
       checkArgument(getFormatFunction() != null,
@@ -978,10 +973,16 @@ public class BigQueryIO {
 
     @Override
     public WriteResult expand(PCollection<T> input) {
+      PCollection<KV<TableDestination, TableRow>> rowsWithDestination =
+          input.apply("PrepareWrite", ParDo.of(
+              new PrepareWrite<T>(getTableFunction(), getFormatFunction())))
+              .setCoder(KvCoder.of(TableDestinationCoder.of(), TableRowJsonCoder.of()));
+
+
       // When writing an Unbounded PCollection, or when a tablespec function is defined, we use
-      // StreamWithDeDup and BigQuery's streaming import API.
+      // StreamingInserts and BigQuery's streaming import API.
       if (input.isBounded() == IsBounded.UNBOUNDED) {
-        return input.apply(new StreamWithDeDup<T>(this));
+        return rowsWithDestination.apply(new StreamingInserts(this));
       } else {
         return input.apply(new BatchLoadBigQuery<T>(this));
       }
@@ -1002,8 +1003,8 @@ public class BigQueryIO {
           .addIfNotNull(DisplayData.item("schema", getJsonSchema())
             .withLabel("Table Schema"));
 
-      if (getTableRefFunction() != null) {
-        builder.add(DisplayData.item("tableFn", getTableRefFunction().getClass())
+      if (getTableFunction() != null) {
+        builder.add(DisplayData.item("tableFn", getTableFunction().getClass())
           .withLabel("Table Reference Function"));
       }
 
@@ -1025,7 +1026,7 @@ public class BigQueryIO {
     }
 
     /**
-     * Returns the table to write, or {@code null} if writing with {@code tableRefFunction}.
+     * Returns the table to write, or {@code null} if writing with {@code tableFunction}.
      *
      * <p>If the table's project is not specified, use the executing project.
      */
@@ -1066,7 +1067,7 @@ public class BigQueryIO {
    */
   @VisibleForTesting
   static void clearCreatedTables() {
-    StreamingWriteFn.clearCreatedTables();
+    CreateTables.clearCreatedTables();
   }
 
   /////////////////////////////////////////////////////////////////////////////

http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTables.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTables.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTables.java
new file mode 100644
index 0000000..e216553
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/CreateTables.java
@@ -0,0 +1,100 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Set;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
+import org.apache.beam.sdk.options.BigQueryOptions;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.values.KV;
+
+
+/**
+ * Creates any tables needed before performing streaming writes to the tables. This is a
+ * side-effect {l@ink DoFn}, and returns the original collection unchanged.
+ */
+public class CreateTables extends DoFn<KV<TableDestination, TableRow>,
+    KV<TableDestination, TableRow>> {
+  private final CreateDisposition createDisposition;
+  private final BigQueryServices bqServices;
+  private final SerializableFunction<TableDestination, TableSchema> schemaFunction;
+
+
+  /** The list of tables created so far, so we don't try the creation
+   each time.
+   * TODO: We should put a bound on memory usage of this. Use guava cache instead.
+   */
+  private static Set<String> createdTables =
+      Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
+
+  public CreateTables(CreateDisposition createDisposition, BigQueryServices bqServices,
+                      SerializableFunction<TableDestination, TableSchema> schemaFunction) {
+    this.createDisposition = createDisposition;
+    this.bqServices = bqServices;
+    this.schemaFunction = schemaFunction;
+  }
+
+  @ProcessElement
+  public void processElement(ProcessContext context) throws InterruptedException, IOException {
+    BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
+    possibleCreateTable(options, context.element().getKey());
+    context.output(context.element());
+  }
+
+  private void possibleCreateTable(BigQueryOptions options, TableDestination tableDestination)
+      throws InterruptedException, IOException {
+    String tableSpec = tableDestination.getTableSpec();
+    TableReference tableReference = tableDestination.getTableReference();
+    String tableDescription = tableDestination.getTableDescription();
+    if (createDisposition != createDisposition.CREATE_NEVER
+        && !createdTables.contains(tableSpec)) {
+      synchronized (createdTables) {
+        // Another thread may have succeeded in creating the table in the meanwhile, so
+        // check again. This check isn't needed for correctness, but we add it to prevent
+        // every thread from attempting a create and overwhelming our BigQuery quota.
+        DatasetService datasetService = bqServices.getDatasetService(options);
+        if (!createdTables.contains(tableSpec)) {
+          TableSchema tableSchema = schemaFunction.apply(tableDestination);
+          if (datasetService.getTable(tableReference) == null) {
+            datasetService.createTable(
+                new Table()
+                    .setTableReference(tableReference)
+                    .setSchema(tableSchema)
+                    .setDescription(tableDescription));
+          }
+          createdTables.add(tableSpec);
+        }
+      }
+    }
+  }
+
+  static void clearCreatedTables() {
+    synchronized (createdTables) {
+      createdTables.clear();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java
new file mode 100644
index 0000000..da3a70a
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/GenerateShardedTable.java
@@ -0,0 +1,48 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableRow;
+import java.io.IOException;
+import java.util.concurrent.ThreadLocalRandom;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.values.KV;
+
+/**
+ * Given a write to a specific table, assign that to one of the
+ * {@link GenerateShardedTable#numShards} keys assigned to that table.
+ */
+class GenerateShardedTable extends DoFn<KV<TableDestination, TableRow>,
+    KV<ShardedKey<String>, TableRow>> {
+  private final int numShards;
+
+  GenerateShardedTable(int numShards) {
+    this.numShards = numShards;
+  }
+
+  @ProcessElement
+  public void processElement(ProcessContext context, BoundedWindow window) throws IOException {
+    ThreadLocalRandom randomGenerator = ThreadLocalRandom.current();
+    // We output on keys 0-50 to ensure that there's enough batching for
+    // BigQuery.
+    String tableSpec = context.element().getKey().getTableSpec();
+    context.output(KV.of(ShardedKey.of(tableSpec, randomGenerator.nextInt(0, numShards)),
+        context.element().getValue()));
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java
index 0c08e18..7712417 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/PrepareWrite.java
@@ -1,3 +1,20 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
 package org.apache.beam.sdk.io.gcp.bigquery;
 
 import com.google.api.services.bigquery.model.TableReference;
@@ -6,8 +23,6 @@ import com.google.common.base.Strings;
 import java.io.IOException;
 import org.apache.beam.sdk.options.BigQueryOptions;
 import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.values.KV;
@@ -15,44 +30,38 @@ import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.ValueInSingleWindow;
 
 /**
- * Prepare an input {@link PCollection<T>} for writing to BigQuery. Use the table-reference
+ * Prepare an input {@link PCollection} for writing to BigQuery. Use the table-reference
  * function to determine which tables each element is written to, and format the element into a
  * {@link TableRow} using the user-supplied format function.
  */
-public class PrepareWrite<T> extends PTransform<PCollection<T>, PCollection<KV<String, TableRow>>> {
-  private static final String NAME = "PrepareWrite";
-  private SerializableFunction<ValueInSingleWindow<T>, TableReference> tableRefFunction;
+public class PrepareWrite<T> extends DoFn<T, KV<TableDestination, TableRow>> {
+  private SerializableFunction<ValueInSingleWindow<T>, TableDestination> tableFunction;
   private SerializableFunction<T, TableRow> formatFunction;
 
-  public PrepareWrite(SerializableFunction<ValueInSingleWindow<T>, TableReference> tableRefFunction,
+  public PrepareWrite(SerializableFunction<ValueInSingleWindow<T>, TableDestination> tableFunction,
                       SerializableFunction<T, TableRow> formatFunction) {
-    super(NAME);
-    this.tableRefFunction = tableRefFunction;
+    this.tableFunction = tableFunction;
     this.formatFunction = formatFunction;
   }
 
-  @Override
-  public PCollection<KV<String, TableRow>> expand(PCollection<T> input) {
-    PCollection<KV<String, TableRow>> elementsByTable =
-        input.apply(ParDo.of(new DoFn<T, KV<String, TableRow>>() {
-      @ProcessElement
-      public void processElement(ProcessContext context, BoundedWindow window) throws IOException {
-        String tableSpec = tableSpecFromWindowedValue(
-            context.getPipelineOptions().as(BigQueryOptions.class),
-            ValueInSingleWindow.of(context.element(), context.timestamp(), window, context.pane()));
-        TableRow tableRow = formatFunction.apply(context.element());
-        context.output(KV.of(tableSpec, tableRow));
-      }
-    }));
-    return elementsByTable;
+  @ProcessElement
+  public void processElement(ProcessContext context, BoundedWindow window) throws IOException {
+    TableDestination tableDestination = tableSpecFromWindowedValue(
+        context.getPipelineOptions().as(BigQueryOptions.class),
+        ValueInSingleWindow.of(context.element(), context.timestamp(), window, context.pane()));
+    TableRow tableRow = formatFunction.apply(context.element());
+    context.output(KV.of(tableDestination, tableRow));
   }
 
-  private String tableSpecFromWindowedValue(BigQueryOptions options,
+  private TableDestination tableSpecFromWindowedValue(BigQueryOptions options,
                                             ValueInSingleWindow<T> value) {
-    TableReference table = tableRefFunction.apply(value);
-    if (Strings.isNullOrEmpty(table.getProjectId())) {
-      table.setProjectId(options.getProject());
+    TableDestination tableDestination = tableFunction.apply(value);
+    TableReference tableReference = tableDestination.getTableReference();
+    if (Strings.isNullOrEmpty(tableReference.getProjectId())) {
+      tableReference.setProjectId(options.getProject());
+      tableDestination = new TableDestination(tableReference,
+          tableDestination.getTableDescription());
     }
-    return BigQueryHelpers.toTableSpec(table);
+    return tableDestination;
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamWithDeDup.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamWithDeDup.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamWithDeDup.java
deleted file mode 100644
index 506a564..0000000
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamWithDeDup.java
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.sdk.io.gcp.bigquery;
-
-import com.google.api.services.bigquery.model.TableSchema;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.coders.VoidCoder;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write;
-import org.apache.beam.sdk.options.BigQueryOptions;
-import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.util.Reshuffle;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-
-/**
-* PTransform that performs streaming BigQuery write. To increase consistency,
-* it leverages BigQuery best effort de-dup mechanism.
- */
-class StreamWithDeDup<T> extends PTransform<PCollection<T>, WriteResult> {
-  private final Write<T> write;
-
-  /** Constructor. */
-  StreamWithDeDup(Write<T> write) {
-    this.write = write;
-  }
-
-  @Override
-  protected Coder<Void> getDefaultOutputCoder() {
-    return VoidCoder.of();
-  }
-
-  @Override
-  public WriteResult expand(PCollection<T> input) {
-    // A naive implementation would be to simply stream data directly to BigQuery.
-    // However, this could occasionally lead to duplicated data, e.g., when
-    // a VM that runs this code is restarted and the code is re-run.
-
-    // The above risk is mitigated in this implementation by relying on
-    // BigQuery built-in best effort de-dup mechanism.
-
-    // To use this mechanism, each input TableRow is tagged with a generated
-    // unique id, which is then passed to BigQuery and used to ignore duplicates.
-
-    PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged =
-        input.apply(ParDo.of(new TagWithUniqueIdsAndTable<T>(
-            input.getPipeline().getOptions().as(BigQueryOptions.class), write)));
-
-    // To prevent having the same TableRow processed more than once with regenerated
-    // different unique ids, this implementation relies on "checkpointing", which is
-    // achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
-    // performed by Reshuffle.
-    NestedValueProvider<TableSchema, String> schema =
-        write.getJsonSchema() == null
-            ? null
-            : NestedValueProvider.of(write.getJsonSchema(), new JsonSchemaToTableSchema());
-    tagged
-        .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()))
-        .apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of())
-        .apply(
-            ParDo.of(
-                new StreamingWriteFn(
-                    schema,
-                    write.getCreateDisposition(),
-                    write.getTableDescription(),
-                    write.getBigQueryServices())));
-
-    return WriteResult.in(input.getPipeline());
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
new file mode 100644
index 0000000..37afbdf
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.TableRowJsonCoder;
+import org.apache.beam.sdk.coders.VoidCoder;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.util.Reshuffle;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.PCollection;
+
+/**
+* PTransform that performs streaming BigQuery write. To increase consistency,
+* it leverages BigQuery best effort de-dup mechanism.
+ */
+
+class StreamingInserts
+    extends PTransform<PCollection<KV<TableDestination, TableRow>>, WriteResult> {
+  private final Write<?> write;
+
+  private static class ConstantSchemaFunction implements
+      SerializableFunction<TableDestination, TableSchema> {
+    private final @Nullable String jsonSchema;
+
+    ConstantSchemaFunction(TableSchema schema) {
+      this.jsonSchema = BigQueryHelpers.toJsonString(schema);
+    }
+
+    @Override
+    @Nullable
+    public TableSchema apply(TableDestination table) {
+      return BigQueryHelpers.fromJsonString(jsonSchema, TableSchema.class);
+    }
+  }
+
+  /** Constructor. */
+  StreamingInserts(Write<?> write) {
+    this.write = write;
+  }
+
+  @Override
+  protected Coder<Void> getDefaultOutputCoder() {
+    return VoidCoder.of();
+  }
+
+  @Override
+  public WriteResult expand(PCollection<KV<TableDestination, TableRow>> input) {
+    // Since BigQueryIO.java does not yet have support for per-table schemas, inject a constant
+    // schema function here. If no schema is specified, this function will return null.
+    SerializableFunction<TableDestination, TableSchema> schemaFunction =
+        new ConstantSchemaFunction(write.getSchema());
+
+    // A naive implementation would be to simply stream data directly to BigQuery.
+    // However, this could occasionally lead to duplicated data, e.g., when
+    // a VM that runs this code is restarted and the code is re-run.
+
+    // The above risk is mitigated in this implementation by relying on
+    // BigQuery built-in best effort de-dup mechanism.
+
+    // To use this mechanism, each input TableRow is tagged with a generated
+    // unique id, which is then passed to BigQuery and used to ignore duplicates.
+    PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged = input
+        .apply("CreateTables", ParDo.of(new CreateTables(write.getCreateDisposition(),
+            write.getBigQueryServices(), schemaFunction)))
+        // We create 50 keys per BigQuery table to generate output on. This is few enough that we
+        // get good batching into BigQuery's insert calls, and enough that we can max out the
+        // streaming insert quota.
+        .apply("ShardTableWrites", ParDo.of(new GenerateShardedTable(50)))
+        .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowJsonCoder.of()))
+        .apply("TagWithUniqueIds", ParDo.of(new TagWithUniqueIds()));
+
+    // To prevent having the same TableRow processed more than once with regenerated
+    // different unique ids, this implementation relies on "checkpointing", which is
+    // achieved as a side effect of having StreamingWriteFn immediately follow a GBK,
+    // performed by Reshuffle.
+    tagged
+        .setCoder(KvCoder.of(ShardedKeyCoder.of(StringUtf8Coder.of()), TableRowInfoCoder.of()))
+        .apply(Reshuffle.<ShardedKey<String>, TableRowInfo>of())
+        .apply("StreamingWrite",
+            ParDo.of(
+                new StreamingWriteFn(write.getBigQueryServices())));
+
+    return WriteResult.in(input.getPipeline());
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
index 1d93fa3..83ed3d2 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
@@ -18,28 +18,16 @@
 
 package org.apache.beam.sdk.io.gcp.bigquery;
 
-import static com.google.common.base.Preconditions.checkNotNull;
-import com.google.api.services.bigquery.model.Table;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
-import com.google.api.services.bigquery.model.TableSchema;
 import com.google.common.annotations.VisibleForTesting;
 import java.io.IOException;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
-import javax.annotation.Nullable;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.TableSchemaToJsonSchema;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
 import org.apache.beam.sdk.metrics.Counter;
 import org.apache.beam.sdk.metrics.Metrics;
 import org.apache.beam.sdk.options.BigQueryOptions;
-import org.apache.beam.sdk.options.ValueProvider;
-import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.util.SystemDoFnInternal;
@@ -52,48 +40,19 @@ import org.apache.beam.sdk.values.KV;
 @VisibleForTesting
 class StreamingWriteFn
     extends DoFn<KV<ShardedKey<String>, TableRowInfo>, Void> {
-  /** TableSchema in JSON. Use String to make the class Serializable. */
-  @Nullable
-  private final ValueProvider<String> jsonTableSchema;
-
-  @Nullable private final String tableDescription;
-
   private final BigQueryServices bqServices;
 
   /** JsonTableRows to accumulate BigQuery rows in order to batch writes. */
   private transient Map<String, List<TableRow>> tableRows;
 
-  private final Write.CreateDisposition createDisposition;
-
   /** The list of unique ids for each BigQuery table row. */
   private transient Map<String, List<String>> uniqueIdsForTableRows;
 
-  /** The list of tables created so far, so we don't try the creation
-      each time. */
-  private static Set<String> createdTables =
-      Collections.newSetFromMap(new ConcurrentHashMap<String, Boolean>());
-
   /** Tracks bytes written, exposed as "ByteCount" Counter. */
   private Counter byteCounter = Metrics.counter(StreamingWriteFn.class, "ByteCount");
 
-  /** Constructor. */
-  StreamingWriteFn(@Nullable ValueProvider<TableSchema> schema,
-                   Write.CreateDisposition createDisposition,
-                   @Nullable String tableDescription, BigQueryServices bqServices) {
-    this.jsonTableSchema = schema == null ? null :
-        NestedValueProvider.of(schema, new TableSchemaToJsonSchema());
-    this.createDisposition = createDisposition;
-    this.bqServices = checkNotNull(bqServices, "bqServices");
-    this.tableDescription = tableDescription;
-  }
-
-  /**
-   * Clear the cached map of created tables. Used for testing.
-   */
-  static void clearCreatedTables() {
-    synchronized (createdTables) {
-      createdTables.clear();
-    }
+  StreamingWriteFn(BigQueryServices bqServices) {
+    this.bqServices = bqServices;
   }
 
   /** Prepares a target BigQuery table. */
@@ -119,9 +78,8 @@ class StreamingWriteFn
   @FinishBundle
   public void finishBundle(Context context) throws Exception {
     BigQueryOptions options = context.getPipelineOptions().as(BigQueryOptions.class);
-
     for (Map.Entry<String, List<TableRow>> entry : tableRows.entrySet()) {
-      TableReference tableReference = getOrCreateTable(options, entry.getKey());
+      TableReference tableReference = BigQueryHelpers.parseTableSpec(entry.getKey());
       flushRows(tableReference, entry.getValue(),
           uniqueIdsForTableRows.get(entry.getKey()), options);
     }
@@ -132,39 +90,6 @@ class StreamingWriteFn
   @Override
   public void populateDisplayData(DisplayData.Builder builder) {
     super.populateDisplayData(builder);
-
-    builder
-        .addIfNotNull(DisplayData.item("schema", jsonTableSchema)
-          .withLabel("Table Schema"))
-        .addIfNotNull(DisplayData.item("tableDescription", tableDescription)
-          .withLabel("Table Description"));
-  }
-
-  public TableReference getOrCreateTable(BigQueryOptions options, String tableSpec)
-      throws InterruptedException, IOException {
-    TableReference tableReference = BigQueryHelpers.parseTableSpec(tableSpec);
-    if (createDisposition != createDisposition.CREATE_NEVER
-        && !createdTables.contains(tableSpec)) {
-      synchronized (createdTables) {
-        // Another thread may have succeeded in creating the table in the meanwhile, so
-        // check again. This check isn't needed for correctness, but we add it to prevent
-        // every thread from attempting a create and overwhelming our BigQuery quota.
-        DatasetService datasetService = bqServices.getDatasetService(options);
-        if (!createdTables.contains(tableSpec)) {
-          if (datasetService.getTable(tableReference) == null) {
-            TableSchema tableSchema = BigQueryIO.JSON_FACTORY.fromString(
-                jsonTableSchema.get(), TableSchema.class);
-            datasetService.createTable(
-                new Table()
-                    .setTableReference(tableReference)
-                    .setSchema(tableSchema)
-                    .setDescription(tableDescription));
-          }
-          createdTables.add(tableSpec);
-        }
-      }
-    }
-    return tableReference;
   }
 
   /**
@@ -173,6 +98,7 @@ class StreamingWriteFn
   private void flushRows(TableReference tableReference,
       List<TableRow> tableRows, List<String> uniqueIds, BigQueryOptions options)
           throws InterruptedException {
+    System.out.println("FlUSHING ROWS " + tableRows.size());
     if (!tableRows.isEmpty()) {
       try {
         long totalBytes = bqServices.getDatasetService(options).insertAll(

http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
index 3cbbf3b..631afeb 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
@@ -1,7 +1,53 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
 package org.apache.beam.sdk.io.gcp.bigquery;
 
+import com.google.api.services.bigquery.model.TableReference;
+
 /**
- * Created by relax on 3/28/17.
+ * Encapsulates a BigQuery table destination.
  */
 public class TableDestination {
+  private final String tableSpec;
+  private final String tableDescription;
+
+
+  public TableDestination(String tableSpec, String tableDescription) {
+    this.tableSpec = tableSpec;
+    this.tableDescription = tableDescription;
+  }
+
+  public TableDestination(TableReference tableReference, String tableDescription) {
+    this.tableSpec = BigQueryHelpers.toTableSpec(tableReference);
+    this.tableDescription = tableDescription;
+  }
+
+  public String getTableSpec() {
+    return tableSpec;
+  }
+
+
+  public TableReference getTableReference() {
+    return BigQueryHelpers.parseTableSpec(tableSpec);
+  }
+
+  public String getTableDescription() {
+    return tableDescription;
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestinationCoder.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestinationCoder.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestinationCoder.java
new file mode 100644
index 0000000..fa24700
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestinationCoder.java
@@ -0,0 +1,64 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import org.apache.beam.sdk.coders.AtomicCoder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+
+/**
+ * A coder for {@link TableDestination} objects.
+ */
+public class TableDestinationCoder extends AtomicCoder<TableDestination> {
+  private static final TableDestinationCoder INSTANCE = new TableDestinationCoder();
+
+
+  @JsonCreator
+  public static TableDestinationCoder of() {
+      return INSTANCE;
+    }
+
+  @Override
+    public void encode(TableDestination value, OutputStream outStream, Context context)
+      throws IOException {
+    if (value == null) {
+      throw new CoderException("cannot encode a null value");
+    }
+    stringCoder.encode(value.getTableSpec(), outStream, context.nested());
+    stringCoder.encode(value.getTableDescription(), outStream, context);
+  }
+
+  @Override
+  public TableDestination decode(InputStream inStream, Context context) throws IOException {
+      return new TableDestination(
+          stringCoder.decode(inStream, context.nested()),
+          stringCoder.decode(inStream, context.nested()));
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+      return;
+    }
+
+    StringUtf8Coder stringCoder = StringUtf8Coder.of();
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
new file mode 100644
index 0000000..6f0186e
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Strings;
+import java.io.IOException;
+import java.util.UUID;
+import java.util.concurrent.ThreadLocalRandom;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.TableRefToTableSpec;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write;
+import org.apache.beam.sdk.options.BigQueryOptions;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.display.DisplayData;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.values.KV;
+import org.apache.beam.sdk.values.ValueInSingleWindow;
+
+/**
+ * Fn that tags each table row with a unique id and destination table.
+ * To avoid calling UUID.randomUUID() for each element, which can be costly,
+ * a randomUUID is generated only once per bucket of data. The actual unique
+ * id is created by concatenating this randomUUID with a sequential number.
+ */
+@VisibleForTesting
+class TagWithUniqueIds
+    extends DoFn<KV<ShardedKey<String>, TableRow>, KV<ShardedKey<String>, TableRowInfo>> {
+
+  private transient String randomUUID;
+  private transient long sequenceNo = 0L;
+
+  @StartBundle
+  public void startBundle(Context context) {
+    randomUUID = UUID.randomUUID().toString();
+  }
+
+  /** Tag the input with a unique id. */
+  @ProcessElement
+  public void processElement(ProcessContext context, BoundedWindow window) throws IOException {
+    String uniqueId = randomUUID + sequenceNo++;
+    // We output on keys 0-50 to ensure that there's enough batching for
+    // BigQuery.
+    context.output(KV.of(context.element().getKey(),
+        new TableRowInfo(context.element().getValue(), uniqueId)));
+  }
+
+  @Override
+  public void populateDisplayData(DisplayData.Builder builder) {
+    super.populateDisplayData(builder);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java
deleted file mode 100644
index 4e50f7c..0000000
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.sdk.io.gcp.bigquery;
-
-import com.google.api.services.bigquery.model.TableReference;
-import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Strings;
-import java.io.IOException;
-import java.util.UUID;
-import java.util.concurrent.ThreadLocalRandom;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.TableRefToTableSpec;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write;
-import org.apache.beam.sdk.options.BigQueryOptions;
-import org.apache.beam.sdk.options.ValueProvider;
-import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.display.DisplayData;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.ValueInSingleWindow;
-
-/**
- * Fn that tags each table row with a unique id and destination table.
- * To avoid calling UUID.randomUUID() for each element, which can be costly,
- * a randomUUID is generated only once per bucket of data. The actual unique
- * id is created by concatenating this randomUUID with a sequential number.
- */
-@VisibleForTesting
-class TagWithUniqueIdsAndTable<T>
-    extends DoFn<T, KV<ShardedKey<String>, TableRowInfo>> {
-  /** TableSpec to write to in the case of a single static destination. */
-  private ValueProvider<String> tableSpec = null;
-
-  private final Write<T, ?> write;
-
-  private transient String randomUUID;
-  private transient long sequenceNo = 0L;
-
-  TagWithUniqueIdsAndTable(BigQueryOptions options,
-                           Write<T, ?> write) {
-    ValueProvider<TableReference> table = write.getTableWithDefaultProject(
-        options.as(BigQueryOptions.class));
-    if (table != null) {
-      this.tableSpec = NestedValueProvider.of(table, new TableRefToTableSpec());
-    }
-    this.write = write;
-  }
-
-
-  @StartBundle
-  public void startBundle(Context context) {
-    randomUUID = UUID.randomUUID().toString();
-  }
-
-  /** Tag the input with a unique id. */
-  @ProcessElement
-  public void processElement(ProcessContext context, BoundedWindow window) throws IOException {
-    String uniqueId = randomUUID + sequenceNo++;
-    ThreadLocalRandom randomGenerator = ThreadLocalRandom.current();
-      String tableSpec = tableSpecFromWindowedValue(
-          context.getPipelineOptions().as(BigQueryOptions.class),
-          ValueInSingleWindow.of(context.element(), context.timestamp(), window, context.pane()));
-    // We output on keys 0-50 to ensure that there's enough batching for
-    // BigQuery.
-    context.output(KV.of(ShardedKey.of(tableSpec, randomGenerator.nextInt(0, 50)),
-        new TableRowInfo(write.getFormatFunction().apply(context.element()), uniqueId)));
-  }
-
-  @Override
-  public void populateDisplayData(DisplayData.Builder builder) {
-    super.populateDisplayData(builder);
-
-    builder.addIfNotNull(DisplayData.item("table", tableSpec));
-    builder.add(DisplayData.item("tableFn", write.getTableRefFunction().getClass())
-        .withLabel("Table Reference Function"));
-  }
-
-  @VisibleForTesting
-  ValueProvider<String> getTableSpec() {
-    return tableSpec;
-  }
-
-
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/58ed5c7e/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index 499aa74..d953edd 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -518,7 +518,6 @@ public class BigQueryIOTest implements Serializable {
 
   /** A fake dataset service that can be serialized, for use in testReadFromTable. */
   private static class FakeDatasetService implements DatasetService, Serializable {
-
     @Override
     public Table getTable(TableReference tableRef)
         throws InterruptedException, IOException {
@@ -630,6 +629,7 @@ public class BigQueryIOTest implements Serializable {
         TableContainer tableContainer = getTableContainer(
             ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
         for (int i = 0; i < rowList.size(); ++i) {
+          System.out.println("adding row " + rowList.get(i));
           tableContainer.addRow(rowList.get(i), insertIdList.get(i));
           dataSize += rowList.get(i).toString().length();
         }
@@ -1121,15 +1121,15 @@ public class BigQueryIOTest implements Serializable {
         }
     );
 
-    SerializableFunction<ValueInSingleWindow<Integer>, String> tableFunction =
-        new SerializableFunction<ValueInSingleWindow<Integer>, String>() {
+    SerializableFunction<ValueInSingleWindow<Integer>, TableDestination> tableFunction =
+        new SerializableFunction<ValueInSingleWindow<Integer>, TableDestination>() {
           @Override
-          public String apply(ValueInSingleWindow<Integer> input) {
+          public TableDestination apply(ValueInSingleWindow<Integer> input) {
             PartitionedGlobalWindow window = (PartitionedGlobalWindow) input.getWindow();
             // Check that we can access the element as well here.
             checkArgument(window.value.equals(Integer.toString(input.getValue() % 5)),
                 "Incorrect element");
-            return "project-id:dataset-id.table-id-" + window.value;
+            return new TableDestination("project-id:dataset-id.table-id-" + window.value, "");
           }
     };
 
@@ -1559,14 +1559,6 @@ public class BigQueryIOTest implements Serializable {
   }
 
   @Test
-  public void testStreamingWriteFnCreateNever() throws Exception {
-    StreamingWriteFn fn = new StreamingWriteFn(
-        null, CreateDisposition.CREATE_NEVER, null, new FakeBigQueryServices());
-    assertEquals(BigQueryHelpers.parseTableSpec("dataset.table"),
-        fn.getOrCreateTable(null, "dataset.table"));
-  }
-
-  @Test
   public void testCreateNeverWithStreaming() throws Exception {
     BigQueryOptions options = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
     options.setProject("project");

[02/50] [abbrv] beam git commit: This closes #2572

Posted by dh...@apache.org.

This closes #2572


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a9bcc8b1
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a9bcc8b1
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a9bcc8b1

Branch: refs/heads/DSL_SQL
Commit: a9bcc8b154190bc41a0225838409e2b9cd2e9c93
Parents: d988150 09e0f77
Author: Dan Halperin <dh...@google.com>
Authored: Tue Apr 18 17:58:04 2017 -0700
Committer: Dan Halperin <dh...@google.com>
Committed: Tue Apr 18 17:58:04 2017 -0700

----------------------------------------------------------------------
 sdks/java/core/pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------

[14/50] [abbrv] beam git commit: Fix tests to properly fake out BigQueryService, and add tests for dynamic-table functionality.

Posted by dh...@apache.org.

Fix tests to properly fake out BigQueryService, and add tests for dynamic-table functionality.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b486137d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b486137d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b486137d

Branch: refs/heads/DSL_SQL
Commit: b486137d2190db9212a92176f703e6ed7858fe59
Parents: 760a945
Author: Reuven Lax <re...@google.com>
Authored: Fri Mar 31 14:16:48 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 21:12:50 2017 -0700

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BatchLoads.java    |   7 +-
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    |  15 +-
 .../beam/sdk/io/gcp/bigquery/ShardedKey.java    |   2 +-
 .../sdk/io/gcp/bigquery/StreamingInserts.java   |   5 +-
 .../sdk/io/gcp/bigquery/StreamingWriteFn.java   |   1 -
 .../sdk/io/gcp/bigquery/TableDestination.java   |   3 +-
 .../sdk/io/gcp/bigquery/TableRowWriter.java     |   3 +-
 .../sdk/io/gcp/bigquery/TagWithUniqueIds.java   |   9 -
 .../io/gcp/bigquery/WriteBundlesToFiles.java    |  12 +-
 .../sdk/io/gcp/bigquery/WritePartition.java     |  13 +-
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 613 ++++++++++---------
 .../io/gcp/bigquery/FakeBigQueryServices.java   | 114 +++-
 .../sdk/io/gcp/bigquery/FakeDatasetService.java | 138 +++--
 .../sdk/io/gcp/bigquery/FakeJobService.java     | 182 +++++-
 .../sdk/io/gcp/bigquery/TableContainer.java     |  33 +-
 15 files changed, 703 insertions(+), 447 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
index 5e80fae..06fdfce 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
@@ -58,9 +58,8 @@ import org.apache.beam.sdk.values.TupleTagList;
 /**
  * PTransform that uses BigQuery batch-load jobs to write a PCollection to BigQuery.
  */
-class BatchLoads<T> extends
-    PTransform<PCollection<KV<TableDestination, TableRow>>, WriteResult> {
-  BigQueryIO.Write<T> write;
+class BatchLoads extends PTransform<PCollection<KV<TableDestination, TableRow>>, WriteResult> {
+  BigQueryIO.Write<?> write;
 
   private static class ConstantSchemaFunction implements
       SerializableFunction<TableDestination, TableSchema> {
@@ -79,7 +78,7 @@ class BatchLoads<T> extends
     }
   }
 
-  BatchLoads(BigQueryIO.Write<T> write) {
+  BatchLoads(BigQueryIO.Write<?> write) {
     this.write = write;
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index f1baaf7..54a25c7 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -64,7 +64,6 @@ import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
 import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.display.DisplayData;
-import org.apache.beam.sdk.util.GcsUtil.GcsUtilFactory;
 import org.apache.beam.sdk.util.IOChannelFactory;
 import org.apache.beam.sdk.util.IOChannelUtils;
 import org.apache.beam.sdk.util.Transport;
@@ -536,7 +535,7 @@ public class BigQueryIO {
                 }
               }
               if (extractFiles != null && !extractFiles.isEmpty()) {
-                new GcsUtilFactory().create(options).remove(extractFiles);
+                IOChannelUtils.getFactory(extractFiles.iterator().next()).remove(extractFiles);
               }
             }
           };
@@ -701,8 +700,8 @@ public class BigQueryIO {
     @AutoValue.Builder
     abstract static class Builder<T> {
       abstract Builder<T> setJsonTableRef(ValueProvider<String> jsonTableRef);
-      abstract Builder<T> setTableRefFunction(
-          SerializableFunction<ValueInSingleWindow<T>, TableReference> tableRefFunction);
+      abstract Builder<T> setTableFunction(
+          SerializableFunction<ValueInSingleWindow<T>, TableDestination> tableFunction);
       abstract Builder<T> setFormatFunction(
           SerializableFunction<T, TableRow> formatFunction);
       abstract Builder<T> setJsonSchema(ValueProvider<String> jsonSchema);
@@ -823,8 +822,7 @@ public class BigQueryIO {
      * {@link ValueInSingleWindow}, so can be determined by the value or by the window.
      */
     public Write<T> to(
-        SerializableFunction<ValueInSingleWindow<T>, String> tableSpecFunction) {
-      return toTableReference(new TranslateTableSpecFunction<T>(tableSpecFunction));
+        SerializableFunction<ValueInSingleWindow<T>, TableDestination> tableFunction) {
       ensureToNotCalledYet();
       return toBuilder().setTableFunction(tableFunction).build();
     }
@@ -834,7 +832,7 @@ public class BigQueryIO {
      * {@link TableReference} instead of a string table specification.
      */
     private Write<T> toTableReference(
-        SerializableFunction<ValueInSingleWindow<T>, TableReference> tableRefFunction) {
+        SerializableFunction<ValueInSingleWindow<T>, TableDestination> tableFunction) {
       ensureToNotCalledYet();
       return toBuilder().setTableFunction(tableFunction).build();
     }
@@ -984,8 +982,7 @@ public class BigQueryIO {
       if (input.isBounded() == IsBounded.UNBOUNDED) {
         return rowsWithDestination.apply(new StreamingInserts(this));
       } else {
-
-        return rowsWithDestination.apply(new BatchLoads<T>(this));
+        return rowsWithDestination.apply(new BatchLoads(this));
       }
     }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
index ab57446..09b4fbf 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
@@ -56,7 +56,7 @@ class ShardedKey<K> implements Serializable {
       return false;
     }
     ShardedKey<K> other = (ShardedKey<K>) o;
-    return (key == other.key) && (shardNumber == other.shardNumber);
+    return Objects.equals(key, other.key) && Objects.equals(shardNumber, other.shardNumber);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
index 37afbdf..ced1d66 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingInserts.java
@@ -38,9 +38,8 @@ import org.apache.beam.sdk.values.PCollection;
 * PTransform that performs streaming BigQuery write. To increase consistency,
 * it leverages BigQuery best effort de-dup mechanism.
  */
-
-class StreamingInserts
-    extends PTransform<PCollection<KV<TableDestination, TableRow>>, WriteResult> {
+class StreamingInserts extends PTransform<PCollection<KV<TableDestination, TableRow>>,
+    WriteResult> {
   private final Write<?> write;
 
   private static class ConstantSchemaFunction implements

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
index 83ed3d2..22b2078 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamingWriteFn.java
@@ -98,7 +98,6 @@ class StreamingWriteFn
   private void flushRows(TableReference tableReference,
       List<TableRow> tableRows, List<String> uniqueIds, BigQueryOptions options)
           throws InterruptedException {
-    System.out.println("FlUSHING ROWS " + tableRows.size());
     if (!tableRows.isEmpty()) {
       try {
         long totalBytes = bqServices.getDatasetService(options).insertAll(

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
index e8538e0..36e1401 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
@@ -64,7 +64,8 @@ public class TableDestination implements Serializable {
       return false;
     }
     TableDestination other = (TableDestination) o;
-    return (tableSpec == other.tableSpec) && (tableDescription == other.tableDescription);
+    return Objects.equals(this.tableSpec, other.tableSpec)
+        && Objects.equals(this.tableDescription, other.tableDescription);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
index a1f6153..ee8f466 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableRowWriter.java
@@ -29,7 +29,6 @@ import org.apache.beam.sdk.coders.Coder.Context;
 import org.apache.beam.sdk.coders.TableRowJsonCoder;
 import org.apache.beam.sdk.util.IOChannelUtils;
 import org.apache.beam.sdk.util.MimeTypes;
-import org.apache.beam.sdk.values.KV;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -57,7 +56,7 @@ class TableRowWriter {
     }
   }
   TableRowWriter(String basename) {
-    this.tempFilePrefix = basename;
+      this.tempFilePrefix = basename;
   }
 
   public final void open(String uId) throws Exception {

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
index 6f0186e..7379784 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIds.java
@@ -18,23 +18,14 @@
 
 package org.apache.beam.sdk.io.gcp.bigquery;
 
-import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.base.Strings;
 import java.io.IOException;
 import java.util.UUID;
-import java.util.concurrent.ThreadLocalRandom;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.TableRefToTableSpec;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write;
-import org.apache.beam.sdk.options.BigQueryOptions;
-import org.apache.beam.sdk.options.ValueProvider;
-import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.ValueInSingleWindow;
 
 /**
  * Fn that tags each table row with a unique id and destination table.

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
index b8069f6..869e68a 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
@@ -19,19 +19,16 @@
 package org.apache.beam.sdk.io.gcp.bigquery;
 
 import com.google.api.services.bigquery.model.TableRow;
-
+import com.google.common.collect.Maps;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
 import java.util.Map;
 import java.util.UUID;
-
-import com.google.common.collect.Maps;
 import org.apache.beam.sdk.coders.AtomicCoder;
 import org.apache.beam.sdk.coders.CoderException;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.coders.TableRowJsonCoder;
 import org.apache.beam.sdk.coders.VarLongCoder;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.display.DisplayData;
@@ -50,6 +47,10 @@ class WriteBundlesToFiles extends DoFn<KV<TableDestination, TableRow>, WriteBund
   private transient Map<TableDestination, TableRowWriter> writers;
   private final String tempFilePrefix;
 
+  /**
+   * The result of the {@link WriteBundlesToFiles} transform. Corresponds to a single output file,
+   * and encapsulates the table it is destined to as well as the file byte size.
+   */
   public static class Result implements Serializable {
     public String filename;
     public Long fileByteSize;
@@ -62,6 +63,9 @@ class WriteBundlesToFiles extends DoFn<KV<TableDestination, TableRow>, WriteBund
     }
   }
 
+  /**
+   * a coder for the {@link Result} class.
+   */
   public static class ResultCoder extends AtomicCoder<Result> {
     private static final ResultCoder INSTANCE = new ResultCoder();
 

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
index c48955b..9c48b82 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
@@ -89,8 +89,8 @@ class WritePartition extends DoFn<String, KV<ShardedKey<TableDestination>, List<
         partitions.add(Lists.<String>newArrayList());
         currResultsMap.put(tableDestination, partitions);
       }
-      int currNumFiles = currNumFilesMap.getOrDefault(tableDestination, 0);
-      long currSizeBytes = currSizeBytesMap.getOrDefault(tableDestination, 0L);
+      int currNumFiles = getOrDefault(currNumFilesMap, tableDestination, 0);
+      long currSizeBytes = getOrDefault(currSizeBytesMap, tableDestination, 0L);
       if (currNumFiles + 1 > Write.MAX_NUM_FILES
           || currSizeBytes + fileResult.fileByteSize > Write.MAX_SIZE_BYTES) {
         // Add a new partition for this table.
@@ -117,4 +117,13 @@ class WritePartition extends DoFn<String, KV<ShardedKey<TableDestination>, List<
       }
     }
   }
+
+  private <T> T getOrDefault(Map<TableDestination, T> map, TableDestination tableDestination,
+                     T defaultValue) {
+    if (map.containsKey(tableDestination)) {
+      return map.get(tableDestination);
+    } else {
+      return defaultValue;
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index d1ef8e2..f10be13 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -18,6 +18,8 @@
 package org.apache.beam.sdk.io.gcp.bigquery;
 
 import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkNotNull;
+import static com.google.common.base.Preconditions.checkState;
 import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString;
 import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
 import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -26,17 +28,9 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.anyString;
-import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.doNothing;
-import static org.mockito.Mockito.doThrow;
-import static org.mockito.Mockito.when;
-
-import com.google.api.client.json.GenericJson;
+
 import com.google.api.client.util.Data;
 import com.google.api.services.bigquery.model.Job;
-import com.google.api.services.bigquery.model.JobReference;
 import com.google.api.services.bigquery.model.JobStatistics;
 import com.google.api.services.bigquery.model.JobStatistics2;
 import com.google.api.services.bigquery.model.JobStatistics4;
@@ -48,7 +42,7 @@ import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 import com.google.common.collect.HashBasedTable;
 import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
 
@@ -58,9 +52,12 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
+import java.math.BigDecimal;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
+import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Collection;
@@ -69,14 +66,10 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.avro.Schema;
-import org.apache.avro.file.DataFileWriter;
-import org.apache.avro.generic.GenericDatumWriter;
-import org.apache.avro.generic.GenericRecord;
-import org.apache.avro.generic.GenericRecordBuilder;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.coders.AtomicCoder;
 import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.Coder.Context;
 import org.apache.beam.sdk.coders.CoderException;
 import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
@@ -88,7 +81,6 @@ import org.apache.beam.sdk.io.CountingSource;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
 import org.apache.beam.sdk.io.gcp.bigquery.PassThroughThenCleanup.CleanupOperation;
 import org.apache.beam.sdk.io.gcp.bigquery.WriteBundlesToFiles.Result;
 import org.apache.beam.sdk.options.BigQueryOptions;
@@ -122,7 +114,6 @@ import org.apache.beam.sdk.transforms.windowing.Window;
 import org.apache.beam.sdk.transforms.windowing.WindowFn;
 import org.apache.beam.sdk.transforms.windowing.WindowMappingFn;
 import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.beam.sdk.util.IOChannelFactory;
 import org.apache.beam.sdk.util.IOChannelUtils;
 import org.apache.beam.sdk.util.MimeTypes;
 import org.apache.beam.sdk.util.PCollectionViews;
@@ -140,6 +131,7 @@ import org.hamcrest.Matchers;
 import org.joda.time.Instant;
 import org.junit.Assert;
 import org.junit.Before;
+import org.junit.BeforeClass;
 import org.junit.Ignore;
 import org.junit.Rule;
 import org.junit.Test;
@@ -147,10 +139,6 @@ import org.junit.rules.ExpectedException;
 import org.junit.rules.TemporaryFolder;
 import org.junit.runner.RunWith;
 import org.junit.runners.JUnit4;
-import org.mockito.ArgumentCaptor;
-import org.mockito.Mock;
-import org.mockito.Mockito;
-import org.mockito.MockitoAnnotations;
 
 /**
  * Tests for BigQueryIO.
@@ -158,6 +146,8 @@ import org.mockito.MockitoAnnotations;
 @RunWith(JUnit4.class)
 public class BigQueryIOTest implements Serializable {
 
+  private static Path tempFolder;
+
   // Table information must be static, as each ParDo will get a separate instance of
   // FakeDatasetServices, and they must all modify the same storage.
   static com.google.common.collect.Table<String, String, Map<String, TableContainer>>
@@ -169,8 +159,6 @@ public class BigQueryIOTest implements Serializable {
   @Rule public transient ExpectedLogs loggedWriteRename = ExpectedLogs.none(WriteRename.class);
   @Rule public transient ExpectedLogs loggedWriteTables = ExpectedLogs.none(WriteTables.class);
   @Rule public transient TemporaryFolder testFolder = new TemporaryFolder();
-  @Mock private transient IOChannelFactory mockIOChannelFactory;
-  @Mock(extraInterfaces = Serializable.class) private transient DatasetService mockDatasetService;
 
   private void checkReadTableObject(
       BigQueryIO.Read read, String project, String dataset, String table) {
@@ -227,9 +215,13 @@ public class BigQueryIOTest implements Serializable {
     assertEquals(validate, write.getValidate());
   }
 
+  @BeforeClass
+  public static void setupClass() throws IOException {
+    tempFolder = Files.createTempDirectory("BigQueryIOTest");
+  }
+
   @Before
   public void setUp() throws IOException {
-    MockitoAnnotations.initMocks(this);
     tables = HashBasedTable.create();
     BigQueryIO.clearCreatedTables();
   }
@@ -289,29 +281,53 @@ public class BigQueryIOTest implements Serializable {
     String tableId = "sometable";
     BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
     bqOptions.setProject(projectId);
-    bqOptions.setTempLocation("gs://testbucket/testdir");
+
+    Path baseDir = Files.createTempDirectory(tempFolder, "testValidateReadSetsDefaultProject");
+    bqOptions.setTempLocation(baseDir.toString());
 
     FakeDatasetService fakeDatasetService = new FakeDatasetService();
     fakeDatasetService.createDataset(projectId, datasetId, "", "");
     TableReference tableReference =
         new TableReference().setProjectId(projectId).setDatasetId(datasetId).setTableId(tableId);
-    fakeDatasetService.createTable(new Table().setTableReference(tableReference));
+    fakeDatasetService.createTable(new Table()
+        .setTableReference(tableReference)
+        .setSchema(new TableSchema()
+            .setFields(
+                ImmutableList.of(
+                    new TableFieldSchema().setName("name").setType("STRING"),
+                    new TableFieldSchema().setName("number").setType("INTEGER")))));
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
         .withJobService(new FakeJobService())
         .withDatasetService(fakeDatasetService);
 
+    List<TableRow> expected = ImmutableList.of(
+        new TableRow().set("name", "a").set("number", 1L),
+        new TableRow().set("name", "b").set("number", 2L),
+        new TableRow().set("name", "c").set("number", 3L),
+        new TableRow().set("name", "d").set("number", 4L),
+        new TableRow().set("name", "e").set("number", 5L),
+        new TableRow().set("name", "f").set("number", 6L));
+    fakeDatasetService.insertAll(tableReference, expected, null);
+
     Pipeline p = TestPipeline.create(bqOptions);
 
     TableReference tableRef = new TableReference();
     tableRef.setDatasetId(datasetId);
     tableRef.setTableId(tableId);
 
-    thrown.expect(RuntimeException.class);
-    // Message will be one of following depending on the execution environment.
-    thrown.expectMessage(Matchers.containsString("Unsupported"));
-    p.apply(BigQueryIO.read().from(tableRef)
-        .withTestServices(fakeBqServices));
+    PCollection<KV<String, Long>> output =
+        p.apply(BigQueryIO.read().from(tableRef).withTestServices(fakeBqServices))
+            .apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {
+              @ProcessElement
+              public void processElement(ProcessContext c) throws Exception {
+                c.output(KV.of((String) c.element().get("name"),
+                    Long.valueOf((String) c.element().get("number"))));
+              }
+            }));
+    PAssert.that(output).containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L),
+        KV.of("c", 3L), KV.of("d", 4L), KV.of("e", 5L), KV.of("f", 6L)));
+     p.run();
   }
 
   @Test
@@ -400,54 +416,32 @@ public class BigQueryIOTest implements Serializable {
     FakeDatasetService fakeDatasetService = new FakeDatasetService();
     fakeDatasetService.createDataset("non-executing-project", "somedataset", "", "");
     fakeDatasetService.createTable(sometable);
-    SerializableFunction<Void, Schema> schemaGenerator =
-        new SerializableFunction<Void, Schema>() {
-          @Override
-          public Schema apply(Void input) {
-            return BigQueryAvroUtils.toGenericAvroSchema(
-                "sometable",
-                ImmutableList.of(
-                    new TableFieldSchema().setName("name").setType("STRING"),
-                    new TableFieldSchema().setName("number").setType("INTEGER")));
-          }
-        };
-    Collection<Map<String, Object>> records =
-        ImmutableList.<Map<String, Object>>builder()
-            .add(ImmutableMap.<String, Object>builder().put("name", "a").put("number", 1L).build())
-            .add(ImmutableMap.<String, Object>builder().put("name", "b").put("number", 2L).build())
-            .add(ImmutableMap.<String, Object>builder().put("name", "c").put("number", 3L).build())
-            .build();
 
-    SerializableFunction<GenericJson, Void> onStartJob =
-        new WriteExtractFiles(schemaGenerator, records);
+    List<TableRow> records = Lists.newArrayList(
+        new TableRow().set("name", "a").set("number", 1L),
+        new TableRow().set("name", "b").set("number", 2L),
+        new TableRow().set("name", "c").set("number", 3L));
+    fakeDatasetService.insertAll(sometable.getTableReference(), records, null);
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
         .withJobService(new FakeJobService())
-           // .startJobReturns(onStartJob, "done")
-          //  .pollJobReturns(job)
-         //   .getJobReturns((Job) null)
-          //  .verifyExecutingProject(bqOptions.getProject()))
-        .withDatasetService(fakeDatasetService)
-        .readerReturns(
-            toJsonString(new TableRow().set("name", "a").set("number", 1)),
-            toJsonString(new TableRow().set("name", "b").set("number", 2)),
-            toJsonString(new TableRow().set("name", "c").set("number", 3)));
+        .withDatasetService(fakeDatasetService);
 
     Pipeline p = TestPipeline.create(bqOptions);
-    PCollection<String> output = p
+    PCollection<KV<String, Long>> output = p
         .apply(BigQueryIO.read().from("non-executing-project:somedataset.sometable")
             .withTestServices(fakeBqServices)
             .withoutValidation())
-        .apply(ParDo.of(new DoFn<TableRow, String>() {
+        .apply(ParDo.of(new DoFn<TableRow, KV<String, Long>>() {
           @ProcessElement
           public void processElement(ProcessContext c) throws Exception {
-            c.output((String) c.element().get("name"));
+            c.output(KV.of((String) c.element().get("name"),
+                Long.valueOf((String) c.element().get("number"))));
           }
         }));
 
     PAssert.that(output)
-        .containsInAnyOrder(ImmutableList.of("a", "b", "c"));
-
+        .containsInAnyOrder(ImmutableList.of(KV.of("a", 1L), KV.of("b", 2L), KV.of("c", 3L)));
     p.run();
   }
 
@@ -457,13 +451,12 @@ public class BigQueryIOTest implements Serializable {
     bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
+    FakeDatasetService datasetService = new FakeDatasetService();
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
         .withJobService(new FakeJobService())
-        //    .startJobReturns("done", "done", "done")
-        //    .pollJobReturns(Status.FAILED, Status.FAILED, Status.SUCCEEDED))
-        .withDatasetService(mockDatasetService);
+        .withDatasetService(datasetService);
 
-    mockDatasetService.createDataset("defaultproject", "dataset-id", "", "");
+    datasetService.createDataset("defaultproject", "dataset-id", "", "");
 
     Pipeline p = TestPipeline.create(bqOptions);
     p.apply(Create.of(
@@ -715,11 +708,11 @@ public class BigQueryIOTest implements Serializable {
     bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
+    FakeDatasetService datasetService = new FakeDatasetService();
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(new FakeJobService());
-       //     .startJobReturns("done", "done")
-        //    .pollJobReturns(Status.FAILED, Status.UNKNOWN));
-
+        .withJobService(new FakeJobService())
+        .withDatasetService(datasetService);
+    datasetService.createDataset("project-id", "dataset-id", "", "");
     Pipeline p = TestPipeline.create(bqOptions);
     p.apply(Create.of(
         new TableRow().set("name", "a").set("number", 1),
@@ -732,7 +725,7 @@ public class BigQueryIOTest implements Serializable {
         .withoutValidation());
 
     thrown.expect(RuntimeException.class);
-    thrown.expectMessage("UNKNOWN status of load job");
+    thrown.expectMessage("Failed to create load job");
     try {
       p.run();
     } finally {
@@ -747,10 +740,10 @@ public class BigQueryIOTest implements Serializable {
     bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
+    FakeDatasetService datasetService = new FakeDatasetService();
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(new FakeJobService());
-         //   .startJobReturns("done", "done", "done")
-         //   .pollJobReturns(Status.FAILED, Status.FAILED, Status.FAILED));
+        .withJobService(new FakeJobService())
+        .withDatasetService(datasetService);
 
     Pipeline p = TestPipeline.create(bqOptions);
     p.apply(Create.of(
@@ -817,7 +810,7 @@ public class BigQueryIOTest implements Serializable {
     BigQueryIO.Read read = BigQueryIO.read()
         .from("project:dataset.tableId")
         .withTestServices(new FakeBigQueryServices()
-            .withDatasetService(mockDatasetService)
+            .withDatasetService(new FakeDatasetService())
             .withJobService(new FakeJobService()))
         .withoutValidation();
 
@@ -833,7 +826,7 @@ public class BigQueryIOTest implements Serializable {
     BigQueryIO.Read read = BigQueryIO.read()
         .fromQuery("foobar")
         .withTestServices(new FakeBigQueryServices()
-            .withDatasetService(mockDatasetService)
+            .withDatasetService(new FakeDatasetService())
             .withJobService(new FakeJobService()))
         .withoutValidation();
 
@@ -874,7 +867,7 @@ public class BigQueryIOTest implements Serializable {
         .to("project:dataset.table")
         .withSchema(new TableSchema().set("col1", "type1").set("col2", "type2"))
         .withTestServices(new FakeBigQueryServices()
-          .withDatasetService(mockDatasetService)
+          .withDatasetService(new FakeDatasetService())
           .withJobService(new FakeJobService()))
         .withoutValidation();
 
@@ -1040,9 +1033,7 @@ public class BigQueryIOTest implements Serializable {
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
         .withJobService(new FakeJobService())
-        .withDatasetService(mockDatasetService);
-    when(mockDatasetService.getDataset(projectId, datasetId)).thenThrow(
-        new RuntimeException("Unable to confirm BigQuery dataset presence"));
+        .withDatasetService(new FakeDatasetService());
 
     Pipeline p = TestPipeline.create(options);
 
@@ -1206,26 +1197,31 @@ public class BigQueryIOTest implements Serializable {
 
   @Test
   public void testBigQueryTableSourceThroughJsonAPI() throws Exception {
+    FakeDatasetService datasetService = new FakeDatasetService();
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
         .withJobService(new FakeJobService())
-        .readerReturns(
-            toJsonString(new TableRow().set("name", "a").set("number", "1")),
-            toJsonString(new TableRow().set("name", "b").set("number", "2")),
-            toJsonString(new TableRow().set("name", "c").set("number", "3")));
+        .withDatasetService(datasetService);
 
+    List<TableRow> expected = ImmutableList.of(
+        new TableRow().set("name", "a").set("number", "1"),
+        new TableRow().set("name", "b").set("number", "2"),
+        new TableRow().set("name", "c").set("number", "3"),
+        new TableRow().set("name", "d").set("number", "4"),
+        new TableRow().set("name", "e").set("number", "5"),
+        new TableRow().set("name", "f").set("number", "6"));
+
+    TableReference table = BigQueryHelpers.parseTableSpec("project:data_set.table_name");
+    datasetService.createDataset(table.getProjectId(), table.getDatasetId(), "", "");
+    datasetService.createTable(new Table().setTableReference(table));
+    datasetService.insertAll(table, expected, null);
+
+    Path baseDir = Files.createTempDirectory(tempFolder, "testBigQueryTableSourceThroughJsonAPI");
     String jobIdToken = "testJobIdToken";
-    TableReference table = BigQueryHelpers.parseTableSpec("project.data_set.table_name");
-    String extractDestinationDir = "mock://tempLocation";
     BoundedSource<TableRow> bqSource = BigQueryTableSource.create(
         StaticValueProvider.of(jobIdToken), StaticValueProvider.of(table),
-        extractDestinationDir, fakeBqServices,
+        baseDir.toString(), fakeBqServices,
         StaticValueProvider.of("project"));
 
-    List<TableRow> expected = ImmutableList.of(
-        new TableRow().set("name", "a").set("number", "1"),
-        new TableRow().set("name", "b").set("number", "2"),
-        new TableRow().set("name", "c").set("number", "3"));
-
     PipelineOptions options = PipelineOptionsFactory.create();
     Assert.assertThat(
         SourceTestUtils.readFromSource(bqSource, options),
@@ -1244,43 +1240,48 @@ public class BigQueryIOTest implements Serializable {
     extractJob.setStatus(new JobStatus())
         .setStatistics(jobStats);
 
+    FakeDatasetService fakeDatasetService = new FakeDatasetService();
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
         .withJobService(new FakeJobService())
-        .withDatasetService(mockDatasetService)
-        .readerReturns(
-            toJsonString(new TableRow().set("name", "a").set("number", "1")),
-            toJsonString(new TableRow().set("name", "b").set("number", "2")),
-            toJsonString(new TableRow().set("name", "c").set("number", "3")));
+        .withDatasetService(fakeDatasetService);
+
+    List<TableRow> expected = ImmutableList.of(
+        new TableRow().set("name", "a").set("number", 1L),
+        new TableRow().set("name", "b").set("number", 2L),
+        new TableRow().set("name", "c").set("number", 3L),
+        new TableRow().set("name", "d").set("number", 4L),
+        new TableRow().set("name", "e").set("number", 5L),
+        new TableRow().set("name", "f").set("number", 6L));
 
-    String jobIdToken = "testJobIdToken";
     TableReference table = BigQueryHelpers.parseTableSpec("project:data_set.table_name");
-    String extractDestinationDir = "mock://tempLocation";
+    fakeDatasetService.createDataset("project", "data_set", "", "");
+    fakeDatasetService.createTable(new Table().setTableReference(table)
+        .setSchema(new TableSchema()
+            .setFields(
+                ImmutableList.of(
+                    new TableFieldSchema().setName("name").setType("STRING"),
+                    new TableFieldSchema().setName("number").setType("INTEGER")))));
+    fakeDatasetService.insertAll(table, expected, null);
+
+    Path baseDir = Files.createTempDirectory(tempFolder, "testBigQueryTableSourceInitSplit");
+
+    String jobIdToken = "testJobIdToken";
+    String extractDestinationDir = baseDir.toString();
     BoundedSource<TableRow> bqSource = BigQueryTableSource.create(
         StaticValueProvider.of(jobIdToken), StaticValueProvider.of(table),
         extractDestinationDir, fakeBqServices, StaticValueProvider.of("project"));
 
-    List<TableRow> expected = ImmutableList.of(
-        new TableRow().set("name", "a").set("number", "1"),
-        new TableRow().set("name", "b").set("number", "2"),
-        new TableRow().set("name", "c").set("number", "3"));
 
     PipelineOptions options = PipelineOptionsFactory.create();
-    options.setTempLocation("mock://tempLocation");
-
-    IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true /* override */);
-    when(mockIOChannelFactory.resolve(anyString(), anyString()))
-        .thenReturn("mock://tempLocation/output");
-    when(mockDatasetService.getTable(any(TableReference.class)))
-        .thenReturn(new Table().setSchema(new TableSchema()));
+    options.setTempLocation(baseDir.toString());
 
-    Assert.assertThat(
-        SourceTestUtils.readFromSource(bqSource, options),
-        CoreMatchers.is(expected));
+    List<TableRow> read = SourceTestUtils.readFromSource(bqSource, options);
+    assertThat(read, containsInAnyOrder(Iterables.toArray(expected, TableRow.class)));
     SourceTestUtils.assertSplitAtFractionBehavior(
         bqSource, 2, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
 
     List<? extends BoundedSource<TableRow>> sources = bqSource.split(100, options);
-    assertEquals(1, sources.size());
+    assertEquals(2, sources.size());
     BoundedSource<TableRow> actual = sources.get(0);
     assertThat(actual, CoreMatchers.instanceOf(TransformingSource.class));
   }
@@ -1306,80 +1307,63 @@ public class BigQueryIOTest implements Serializable {
         .setStatistics(extractJobStats);
 
     FakeJobService fakeJobService = new FakeJobService();
+    FakeDatasetService fakeDatasetService = new FakeDatasetService();
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
         .withJobService(fakeJobService)
-        .withDatasetService(mockDatasetService)
-        .readerReturns(
-            toJsonString(new TableRow().set("name", "a").set("number", "1")),
-            toJsonString(new TableRow().set("name", "b").set("number", "2")),
-            toJsonString(new TableRow().set("name", "c").set("number", "3")));
+        .withDatasetService(fakeDatasetService);
+
+    List<TableRow> expected = ImmutableList.of(
+        new TableRow().set("name", "a").set("number", 1L),
+        new TableRow().set("name", "b").set("number", 2L),
+        new TableRow().set("name", "c").set("number", 3L),
+        new TableRow().set("name", "d").set("number", 4L),
+        new TableRow().set("name", "e").set("number", 5L),
+        new TableRow().set("name", "f").set("number", 6L));
 
-    String jobIdToken = "testJobIdToken";
-    String extractDestinationDir = "mock://tempLocation";
     TableReference destinationTable = BigQueryHelpers.parseTableSpec("project:data_set.table_name");
+    fakeDatasetService.createDataset("project", "data_set", "", "");
+    fakeDatasetService.createTable(new Table()
+        .setTableReference(destinationTable)
+        .setSchema(new TableSchema()
+            .setFields(
+                ImmutableList.of(
+                    new TableFieldSchema().setName("name").setType("STRING"),
+                    new TableFieldSchema().setName("number").setType("INTEGER")))));
+    Path baseDir = Files.createTempDirectory(tempFolder, "testBigQueryQuerySourceInitSplit");
+
+    String jobIdToken = "testJobIdToken";
+    String query = FakeBigQueryServices.encodeQuery(expected);
+    String extractDestinationDir = baseDir.toString();
     BoundedSource<TableRow> bqSource = BigQueryQuerySource.create(
-        StaticValueProvider.of(jobIdToken), StaticValueProvider.of("query"),
+        StaticValueProvider.of(jobIdToken), StaticValueProvider.of(query),
         StaticValueProvider.of(destinationTable),
         true /* flattenResults */, true /* useLegacySql */,
         extractDestinationDir, fakeBqServices);
 
-    List<TableRow> expected = ImmutableList.of(
-        new TableRow().set("name", "a").set("number", "1"),
-        new TableRow().set("name", "b").set("number", "2"),
-        new TableRow().set("name", "c").set("number", "3"));
-
     PipelineOptions options = PipelineOptionsFactory.create();
     options.setTempLocation(extractDestinationDir);
 
     TableReference queryTable = new TableReference()
-        .setProjectId("testproject")
-        .setDatasetId("testDataset")
-        .setTableId("testTable");
-  //  when(mockJobService.dryRunQuery(anyString(), Mockito.<JobConfigurationQuery>any()))
-     //   .thenReturn(new JobStatistics().setQuery(
-     //       new JobStatistics2()
-     //           .setTotalBytesProcessed(100L)
-     //           .setReferencedTables(ImmutableList.of(queryTable))));
-    fakeJobService.expectDryRunQuery("testproject", "query",
+        .setProjectId("project")
+        .setDatasetId("data_set")
+        .setTableId("table_name");
+
+    fakeJobService.expectDryRunQuery("project", query,
         new JobStatistics().setQuery(
             new JobStatistics2()
                 .setTotalBytesProcessed(100L)
                 .setReferencedTables(ImmutableList.of(queryTable))));
 
-   // when(mockDatasetService.getTable(eq(queryTable)))
-     //   .thenReturn(new Table().setSchema(new TableSchema()));
-   // when(mockDatasetService.getTable(eq(destinationTable)))
-    //    .thenReturn(new Table().setSchema(new TableSchema()));
-    IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true /* override */);
-    when(mockIOChannelFactory.resolve(anyString(), anyString()))
-        .thenReturn("mock://tempLocation/output");
-    //when(mockJobService.pollJob(Mockito.<JobReference>any(), Mockito.anyInt()))
-    //    .thenReturn(extractJob);
-
-    Assert.assertThat(
-        SourceTestUtils.readFromSource(bqSource, options),
-        CoreMatchers.is(expected));
+    List<TableRow> read = SourceTestUtils.readFromSource(bqSource, options);
+    assertThat(read, containsInAnyOrder(Iterables.toArray(expected, TableRow.class)));
     SourceTestUtils.assertSplitAtFractionBehavior(
         bqSource, 2, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
 
+
     List<? extends BoundedSource<TableRow>> sources = bqSource.split(100, options);
-    assertEquals(1, sources.size());
+    assertEquals(2, sources.size());
     BoundedSource<TableRow> actual = sources.get(0);
     assertThat(actual, CoreMatchers.instanceOf(TransformingSource.class));
-
-    /*
-    Mockito.verify(mockJobService)
-        .startQueryJob(
-            Mockito.<JobReference>any(), Mockito.<JobConfigurationQuery>any());
-    Mockito.verify(mockJobService)
-        .startExtractJob(Mockito.<JobReference>any(), Mockito.<JobConfigurationExtract>any());
-    Mockito.verify(mockDatasetService)
-        .createDataset(anyString(), anyString(), anyString(), anyString());
-    ArgumentCaptor<JobConfigurationQuery> queryConfigArg =
-        ArgumentCaptor.forClass(JobConfigurationQuery.class);
-    Mockito.verify(mockJobService).dryRunQuery(anyString(), queryConfigArg.capture());
-    assertEquals(true, queryConfigArg.getValue().getFlattenResults());
-    assertEquals(true, queryConfigArg.getValue().getUseLegacySql());*/
   }
 
   @Test
@@ -1402,68 +1386,60 @@ public class BigQueryIOTest implements Serializable {
     extractJob.setStatus(new JobStatus())
         .setStatistics(extractJobStats);
 
+    FakeDatasetService datasetService = new FakeDatasetService();
+    FakeJobService jobService = new FakeJobService();
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(new FakeJobService())
-        .withDatasetService(mockDatasetService)
-        .readerReturns(
-            toJsonString(new TableRow().set("name", "a").set("number", "1")),
-            toJsonString(new TableRow().set("name", "b").set("number", "2")),
-            toJsonString(new TableRow().set("name", "c").set("number", "3")));
+        .withJobService(jobService)
+        .withDatasetService(datasetService);
 
-    String jobIdToken = "testJobIdToken";
-    String extractDestinationDir = "mock://tempLocation";
     TableReference destinationTable = BigQueryHelpers.parseTableSpec("project:data_set.table_name");
+    List<TableRow> expected = ImmutableList.of(
+        new TableRow().set("name", "a").set("number", 1L),
+        new TableRow().set("name", "b").set("number", 2L),
+        new TableRow().set("name", "c").set("number", 3L),
+        new TableRow().set("name", "d").set("number", 4L),
+        new TableRow().set("name", "e").set("number", 5L),
+        new TableRow().set("name", "f").set("number", 6L));
+    datasetService.createDataset(destinationTable.getProjectId(), destinationTable.getDatasetId(),
+        "", "");
+    Table table = new Table()
+        .setTableReference(destinationTable)
+        .setSchema(new TableSchema()
+                .setFields(
+                    ImmutableList.of(
+                        new TableFieldSchema().setName("name").setType("STRING"),
+                        new TableFieldSchema().setName("number").setType("INTEGER"))));
+    datasetService.createTable(table);
+
+    String query = FakeBigQueryServices.encodeQuery(expected);
+    jobService.expectDryRunQuery("project", query,
+        new JobStatistics().setQuery(
+            new JobStatistics2()
+                .setTotalBytesProcessed(100L)
+                .setReferencedTables(ImmutableList.of(table.getTableReference()))));
+
+    Path baseDir = Files.createTempDirectory(tempFolder, "testBigQueryNoTableQuerySourceInitSplit");
+    String jobIdToken = "testJobIdToken";
     BoundedSource<TableRow> bqSource = BigQueryQuerySource.create(
-        StaticValueProvider.of(jobIdToken), StaticValueProvider.of("query"),
+        StaticValueProvider.of(jobIdToken),
+        StaticValueProvider.of(query),
         StaticValueProvider.of(destinationTable),
-        true /* flattenResults */, true /* useLegacySql */,
-        extractDestinationDir, fakeBqServices);
+        true /* flattenResults */, true /* useLegacySql */, baseDir.toString(), fakeBqServices);
 
-    List<TableRow> expected = ImmutableList.of(
-        new TableRow().set("name", "a").set("number", "1"),
-        new TableRow().set("name", "b").set("number", "2"),
-        new TableRow().set("name", "c").set("number", "3"));
 
-    PipelineOptions options = PipelineOptionsFactory.create();
-    options.setTempLocation(extractDestinationDir);
-
-    /*
-    when(mockJobService.dryRunQuery(anyString(), Mockito.<JobConfigurationQuery>any()))
-        .thenReturn(new JobStatistics().setQuery(
-            new JobStatistics2()
-                .setTotalBytesProcessed(100L)));
-    when(mockDatasetService.getTable(eq(destinationTable)))
-        .thenReturn(new Table().setSchema(new TableSchema()));
-    IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true);
-    when(mockIOChannelFactory.resolve(anyString(), anyString()))
-        .thenReturn("mock://tempLocation/output");
-    when(mockJobService.pollJob(Mockito.<JobReference>any(), Mockito.anyInt()))
-        .thenReturn(extractJob);*/
 
-    Assert.assertThat(
-        SourceTestUtils.readFromSource(bqSource, options),
-        CoreMatchers.is(expected));
+    PipelineOptions options = PipelineOptionsFactory.create();
+    options.setTempLocation(baseDir.toString());
+    List<TableRow> read = convertBigDecimaslToLong(
+        SourceTestUtils.readFromSource(bqSource, options));
+    assertThat(read, containsInAnyOrder(Iterables.toArray(expected, TableRow.class)));
     SourceTestUtils.assertSplitAtFractionBehavior(
         bqSource, 2, 0.3, ExpectedSplitOutcome.MUST_BE_CONSISTENT_IF_SUCCEEDS, options);
 
     List<? extends BoundedSource<TableRow>> sources = bqSource.split(100, options);
-    assertEquals(1, sources.size());
+    assertEquals(2, sources.size());
     BoundedSource<TableRow> actual = sources.get(0);
     assertThat(actual, CoreMatchers.instanceOf(TransformingSource.class));
-
-    /*
-    Mockito.verify(Service)
-        .startQueryJob(
-            Mockito.<JobReference>any(), Mockito.<JobConfigurationQuery>any());
-    Mockito.verify(mockJobService)
-        .startExtractJob(Mockito.<JobReference>any(), Mockito.<JobConfigurationExtract>any());
-    Mockito.verify(mockDatasetService)
-        .createDataset(anyString(), anyString(), anyString(), anyString());
-    ArgumentCaptor<JobConfigurationQuery> queryConfigArg =
-        ArgumentCaptor.forClass(JobConfigurationQuery.class);
-    Mockito.verify(mockJobService).dryRunQuery(anyString(), queryConfigArg.capture());
-    assertEquals(true, queryConfigArg.getValue().getFlattenResults());
-    assertEquals(true, queryConfigArg.getValue().getUseLegacySql());*/
   }
 
   @Test
@@ -1604,12 +1580,27 @@ public class BigQueryIOTest implements Serializable {
       throws Exception {
     p.enableAbandonedNodeEnforcement(false);
 
+    // In the case where a static destination is specified (i.e. not through a dynamic table
+    // function) and there is no input data, WritePartition will generate an empty table. This
+    // code is to test that path.
+    TableReference singletonReference = new TableReference()
+        .setProjectId("projectid")
+        .setDatasetId("dataset")
+        .setTableId("table");
+    String singletonDescription = "singleton";
+    boolean isSingleton = numTables == 1 && numFilesPerTable == 0;
+
     List<ShardedKey<TableDestination>> expectedPartitions = Lists.newArrayList();
-    for (int i = 0; i < numTables; ++i) {
-      for (int j = 1; j <= expectedNumPartitionsPerTable; ++j) {
-        String tableName = String.format("project-id:dataset-id.tables%05d", i);
-        TableDestination destination = new TableDestination(tableName, tableName);
-        expectedPartitions.add(ShardedKey.of(destination, j));
+    if (isSingleton) {
+      expectedPartitions.add(ShardedKey.of(
+          new TableDestination(singletonReference, singletonDescription), 1));
+    } else {
+      for (int i = 0; i < numTables; ++i) {
+        for (int j = 1; j <= expectedNumPartitionsPerTable; ++j) {
+          String tableName = String.format("project-id:dataset-id.tables%05d", i);
+          TableDestination destination = new TableDestination(tableName, tableName);
+          expectedPartitions.add(ShardedKey.of(destination, j));
+        }
       }
     }
 
@@ -1642,11 +1633,7 @@ public class BigQueryIOTest implements Serializable {
         WriteBundlesToFiles.ResultCoder.of());
 
     ValueProvider<String> singletonTable = null;
-    if (numFilesPerTable == 0 && numTables == 1) {
-      TableReference singletonReference = new TableReference()
-          .setProjectId("projectid")
-          .setDatasetId("dataset")
-          .setTableId("table");
+    if (isSingleton) {
       singletonTable = StaticValueProvider.of(BigQueryHelpers.toJsonString(singletonReference));
     }
     WritePartition writePartition =
@@ -1680,12 +1667,10 @@ public class BigQueryIOTest implements Serializable {
       tableFilesResult.addAll(partition.getValue());
     }
 
-    assertEquals(expectedPartitions.size(), partitionsResult.size());
+    assertThat(partitionsResult,
+        containsInAnyOrder(Iterables.toArray(expectedPartitions, ShardedKey.class)));
 
-   // assertThat(partitionsResult,
-     //   containsInAnyOrder(Iterables.toArray(expectedPartitions, ShardedKey.class)));
-
-    if (numFilesPerTable == 0 && numTables == 1) {
+    if (isSingleton) {
       assertEquals(1, filesPerTableResult.size());
       List<String> singletonFiles = filesPerTableResult.values().iterator().next();
       assertTrue(Files.exists(Paths.get(singletonFiles.get(0))));
@@ -1700,15 +1685,11 @@ public class BigQueryIOTest implements Serializable {
   public void testWriteTables() throws Exception {
     p.enableAbandonedNodeEnforcement(false);
 
+    FakeDatasetService datasetService = new FakeDatasetService();
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
         .withJobService(new FakeJobService())
-        //    .startJobReturns("done", "done", "done", "done", "done", "done", "done", "done",
-       //         "done", "done")
-       //     .pollJobReturns(Status.FAILED, Status.SUCCEEDED, Status.SUCCEEDED, Status.SUCCEEDED,
-       //         Status.SUCCEEDED, Status.SUCCEEDED, Status.SUCCEEDED, Status.SUCCEEDED,
-      //          Status.SUCCEEDED, Status.SUCCEEDED))
-        .withDatasetService(mockDatasetService);
-
+        .withDatasetService(datasetService);
+    datasetService.createDataset("project-id", "dataset-id", "", "");
     long numTables = 3;
     long numPartitions = 3;
     long numFilesPerPartition = 10;
@@ -1716,6 +1697,8 @@ public class BigQueryIOTest implements Serializable {
     String tempFilePrefix = "tempFilePrefix";
     Map<TableDestination, List<String>> expectedTempTables = Maps.newHashMap();
 
+    Path baseDir = Files.createTempDirectory(tempFolder, "testWriteTables");
+
     List<KV<ShardedKey<TableDestination>, Iterable<List<String>>>> partitions =
         Lists.newArrayList();
     for (int i = 0; i < numTables; ++i) {
@@ -1726,7 +1709,16 @@ public class BigQueryIOTest implements Serializable {
             jobIdToken + "_0x%08x_%05d", tableDestination.hashCode(), j);
         List<String> filesPerPartition = Lists.newArrayList();
         for (int k = 0; k < numFilesPerPartition; ++k) {
-          filesPerPartition.add(String.format("files0x%08x_%05d", tableDestination.hashCode(), k));
+          String filename = Paths.get(baseDir.toString(),
+              String.format("files0x%08x_%05d", tempTableId.hashCode(), k)).toString();
+          try (WritableByteChannel channel = IOChannelUtils.create(filename, MimeTypes.TEXT)) {
+            try (OutputStream output = Channels.newOutputStream(channel)) {
+              TableRow tableRow = new TableRow().set("name", tableName);
+              TableRowJsonCoder.of().encode(tableRow, output, Context.OUTER);
+              output.write("\n".getBytes(StandardCharsets.UTF_8));
+            }
+          }
+          filesPerPartition.add(filename);
         }
         partitions.add(KV.of(ShardedKey.of(tableDestination, j),
             (Iterable<List<String>>) Collections.singleton(filesPerPartition)));
@@ -1814,25 +1806,45 @@ public class BigQueryIOTest implements Serializable {
   public void testWriteRename() throws Exception {
     p.enableAbandonedNodeEnforcement(false);
 
+    FakeDatasetService datasetService = new FakeDatasetService();
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
         .withJobService(new FakeJobService())
-         //   .startJobReturns("done", "done")
-        //    .pollJobReturns(Status.FAILED, Status.SUCCEEDED))
-        .withDatasetService(mockDatasetService);
+        .withDatasetService(datasetService);
+    datasetService.createDataset("project-id", "dataset-id", "", "");
 
-    int numFinalTables = 3;
-    int numTempTables = 3;
+    final int numFinalTables = 3;
+    final int numTempTablesPerFinalTable = 3;
+    final int numRecordsPerTempTable = 10;
+
+    Map<TableDestination, List<TableRow>> expectedRowsPerTable = Maps.newHashMap();
     String jobIdToken = "jobIdToken";
-    String jsonTable = "{}";
     Map<TableDestination, Iterable<String>> tempTables = Maps.newHashMap();
     for (int i = 0; i < numFinalTables; ++i) {
       String tableName = "project-id:dataset-id.table_" + i;
-      TableDestination tableDestination = new TableDestination(tableName, tableName);
+      TableDestination tableDestination = new TableDestination(
+          tableName, "table_" + i + "_desc");
       List<String> tables = Lists.newArrayList();
       tempTables.put(tableDestination, tables);
-      for (int j = 0; i < numTempTables; ++i) {
-        tables.add(String.format(
-            "{\"project-id:dataset-id.tableId\":\"%s_%05d_%05d\"}", jobIdToken, i, j));
+
+      List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
+      if (expectedRows == null) {
+        expectedRows = Lists.newArrayList();
+        expectedRowsPerTable.put(tableDestination, expectedRows);
+      }
+      for (int j = 0; i < numTempTablesPerFinalTable; ++i) {
+        TableReference tempTable = new TableReference()
+            .setProjectId("project-id")
+            .setDatasetId("dataset-id")
+            .setTableId(String.format("%s_%05d_%05d", jobIdToken, i, j));
+        datasetService.createTable(new Table().setTableReference(tempTable));
+
+        List<TableRow> rows = Lists.newArrayList();
+        for (int k = 0; k < numRecordsPerTempTable; ++k) {
+          rows.add(new TableRow().set("number", j * numTempTablesPerFinalTable + k));
+        }
+        datasetService.insertAll(tempTable, rows, null);
+        expectedRows.addAll(rows);
+        tables.add(BigQueryHelpers.toJsonString(tempTable));
       }
     }
 
@@ -1857,37 +1869,52 @@ public class BigQueryIOTest implements Serializable {
     tester.setSideInput(tempTablesView, GlobalWindow.INSTANCE, tempTables);
     tester.setSideInput(jobIdTokenView, GlobalWindow.INSTANCE, jobIdToken);
     tester.processElement(null);
+
+    for (Map.Entry<TableDestination, Iterable<String>> entry : tempTables.entrySet()) {
+      TableDestination tableDestination = entry.getKey();
+      TableReference tableReference = tableDestination.getTableReference();
+      Table table = checkNotNull(datasetService.getTable(tableReference));
+      assertEquals(tableReference.getTableId() + "_desc", tableDestination.getTableDescription());
+
+      List<TableRow> expectedRows = expectedRowsPerTable.get(tableDestination);
+      assertThat(datasetService.getAllRows(tableReference.getProjectId(),
+          tableReference.getDatasetId(), tableReference.getTableId()),
+          containsInAnyOrder(Iterables.toArray(expectedRows, TableRow.class)));
+
+      // Temp tables should be deleted.
+      for (String tempTableJson : entry.getValue()) {
+        TableReference tempTable = BigQueryHelpers.fromJsonString(
+            tempTableJson, TableReference.class);
+        assertEquals(null, datasetService.getTable(tempTable));
+      }
+    }
   }
 
   @Test
   public void testRemoveTemporaryTables() throws Exception {
-    String projectId = "someproject";
-    String datasetId = "somedataset";
-    List<String> tables = Lists.newArrayList("table1", "table2", "table3");
+    FakeDatasetService datasetService = new FakeDatasetService();
+    String projectId = "project";
+    String datasetId = "dataset";
+    datasetService.createDataset(projectId, datasetId, "", "");
     List<TableReference> tableRefs = Lists.newArrayList(
-        BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId,
-            tables.get(0))),
-        BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId,
-            tables.get(1))),
-        BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId,
-            tables.get(2))));
+        BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table1")),
+        BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table2")),
+        BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table3")));
+    for (TableReference tableRef : tableRefs) {
+      datasetService.createTable(new Table().setTableReference(tableRef));
+    }
 
-    doThrow(new IOException("Unable to delete table"))
-        .when(mockDatasetService).deleteTable(tableRefs.get(0));
-    doNothing().when(mockDatasetService).deleteTable(tableRefs.get(1));
-    doNothing().when(mockDatasetService).deleteTable(tableRefs.get(2));
+    // Add one more table to delete that does not actually exist.
+    tableRefs.add(
+        BigQueryHelpers.parseTableSpec(String.format("%s:%s.%s", projectId, datasetId, "table4")));
 
-    WriteRename.removeTemporaryTables(mockDatasetService, tableRefs);
+    WriteRename.removeTemporaryTables(datasetService, tableRefs);
 
     for (TableReference ref : tableRefs) {
       loggedWriteRename.verifyDebug("Deleting table " + toJsonString(ref));
+      checkState(datasetService.getTable(ref) == null,
+          "Table " + ref + " was not deleted!");
     }
-    loggedWriteRename.verifyWarn("Failed to delete the table "
-        + toJsonString(tableRefs.get(0)));
-    loggedWriteRename.verifyNotLogged("Failed to delete the table "
-        + toJsonString(tableRefs.get(1)));
-    loggedWriteRename.verifyNotLogged("Failed to delete the table "
-        + toJsonString(tableRefs.get(2)));
   }
 
   /** Test options. **/
@@ -1957,43 +1984,6 @@ public class BigQueryIOTest implements Serializable {
       }}).length);
   }
 
-  private class WriteExtractFiles implements SerializableFunction<GenericJson, Void> {
-    private final SerializableFunction<Void, Schema> schemaGenerator;
-    private final Collection<Map<String, Object>> records;
-
-    private WriteExtractFiles(
-        SerializableFunction<Void, Schema> schemaGenerator,
-        Collection<Map<String, Object>> records) {
-      this.schemaGenerator = schemaGenerator;
-      this.records = records;
-    }
-
-    @Override
-    public Void apply(GenericJson input) {
-      List<String> destinations = (List<String>) input.get("destinationUris");
-      for (String destination : destinations) {
-        String newDest = destination.replace("*", "000000000000");
-        Schema schema = schemaGenerator.apply(null);
-        try (WritableByteChannel channel = IOChannelUtils.create(newDest, MimeTypes.BINARY);
-            DataFileWriter<GenericRecord> tableRowWriter =
-                new DataFileWriter<>(new GenericDatumWriter<GenericRecord>(schema))
-                    .create(schema, Channels.newOutputStream(channel))) {
-          for (Map<String, Object> record : records) {
-            GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(schema);
-            for (Map.Entry<String, Object> field : record.entrySet()) {
-              genericRecordBuilder.set(field.getKey(), field.getValue());
-            }
-            tableRowWriter.append(genericRecordBuilder.build());
-          }
-        } catch (IOException e) {
-          throw new IllegalStateException(
-              String.format("Could not create destination for extract job %s", destination), e);
-        }
-      }
-      return null;
-    }
-  }
-
   @Test
   public void testShardedKeyCoderIsSerializableWithWellKnownCoderType() {
     CoderProperties.coderSerializable(ShardedKeyCoder.of(GlobalWindow.Coder.INSTANCE));
@@ -2013,4 +2003,19 @@ public class BigQueryIOTest implements Serializable {
                 TableRowInfoCoder.of()),
             IntervalWindow.getCoder()));
   }
+
+  List<TableRow> convertBigDecimaslToLong(List<TableRow> toConvert) {
+    // The numbers come back as BigDecimal objects after JSON serialization. Change them back to
+    // longs so that we can assert the output.
+    List<TableRow> converted = Lists.newArrayList();
+    for (TableRow entry : toConvert) {
+      TableRow convertedEntry = entry.clone();
+      Object num = convertedEntry.get("number");
+      if (num instanceof BigDecimal) {
+        convertedEntry.set("number", ((BigDecimal) num).longValue());
+      }
+      converted.add(convertedEntry);
+    }
+    return converted;
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeBigQueryServices.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeBigQueryServices.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeBigQueryServices.java
index ed3ab37..6dfd9d7 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeBigQueryServices.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeBigQueryServices.java
@@ -1,39 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.beam.sdk.io.gcp.bigquery;
 
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.fromJsonString;
 import static org.junit.Assert.assertEquals;
 
+import com.google.api.client.util.Base64;
 import com.google.api.services.bigquery.model.JobConfigurationQuery;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
+import com.google.common.collect.Lists;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
+import java.util.List;
 import java.util.NoSuchElementException;
+
+import org.apache.beam.sdk.coders.Coder.Context;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.coders.TableRowJsonCoder;
 import org.apache.beam.sdk.options.BigQueryOptions;
 
 
 /**
- * Created by relax on 3/30/17.
+ * A fake implementation of BigQuery's query service..
  */
 class FakeBigQueryServices implements BigQueryServices {
-  private String[] jsonTableRowReturns = new String[0];
   private JobService jobService;
-  private DatasetService datasetService;
+  private FakeDatasetService datasetService;
 
-  public FakeBigQueryServices withJobService(JobService jobService) {
+  FakeBigQueryServices withJobService(JobService jobService) {
     this.jobService = jobService;
     return this;
   }
 
-  public FakeBigQueryServices withDatasetService(DatasetService datasetService) {
+  FakeBigQueryServices withDatasetService(FakeDatasetService datasetService) {
     this.datasetService = datasetService;
     return this;
   }
 
-  public FakeBigQueryServices readerReturns(String... jsonTableRowReturns) {
-    this.jsonTableRowReturns = jsonTableRowReturns;
-    return this;
-  }
-
   @Override
   public JobService getJobService(BigQueryOptions bqOptions) {
     return jobService;
@@ -45,26 +65,58 @@ class FakeBigQueryServices implements BigQueryServices {
   }
 
   @Override
-  public BigQueryJsonReader getReaderFromTable(
-      BigQueryOptions bqOptions, TableReference tableRef) {
-    return new FakeBigQueryReader(jsonTableRowReturns);
+  public BigQueryJsonReader getReaderFromTable(BigQueryOptions bqOptions, TableReference tableRef) {
+    try {
+      List<TableRow> rows = datasetService.getAllRows(
+          tableRef.getProjectId(), tableRef.getDatasetId(), tableRef.getTableId());
+      return new FakeBigQueryReader(rows);
+    } catch (Exception e) {
+      return null;
+    }
   }
 
   @Override
   public BigQueryJsonReader getReaderFromQuery(
       BigQueryOptions bqOptions, String projectId, JobConfigurationQuery queryConfig) {
-    return new FakeBigQueryReader(jsonTableRowReturns);
+    try {
+      List<TableRow> rows = rowsFromEncodedQuery(queryConfig.getQuery());
+      return new FakeBigQueryReader(rows);
+    } catch (IOException e) {
+      return null;
+    }
+  }
+
+  static List<TableRow> rowsFromEncodedQuery(String query) throws IOException {
+    ListCoder<TableRow> listCoder = ListCoder.of(TableRowJsonCoder.of());
+    ByteArrayInputStream input = new ByteArrayInputStream(Base64.decodeBase64(query));
+    List<TableRow> rows = listCoder.decode(input, Context.OUTER);
+    for (TableRow row : rows) {
+      convertNumbers(row);
+    }
+    return rows;
+  }
+
+  static String encodeQuery(List<TableRow> rows) throws IOException {
+    ListCoder<TableRow> listCoder = ListCoder.of(TableRowJsonCoder.of());
+    ByteArrayOutputStream output = new ByteArrayOutputStream();
+    listCoder.encode(rows, output, Context.OUTER);
+    return Base64.encodeBase64String(output.toByteArray());
   }
 
   private static class FakeBigQueryReader implements BigQueryJsonReader {
     private static final int UNSTARTED = -1;
     private static final int CLOSED = Integer.MAX_VALUE;
 
-    private String[] jsonTableRowReturns;
+    private List<byte[]> serializedTableRowReturns;
     private int currIndex;
 
-    FakeBigQueryReader(String[] jsonTableRowReturns) {
-      this.jsonTableRowReturns = jsonTableRowReturns;
+    FakeBigQueryReader(List<TableRow> tableRowReturns) throws IOException {
+      this.serializedTableRowReturns = Lists.newArrayListWithExpectedSize(tableRowReturns.size());
+      for (TableRow tableRow : tableRowReturns) {
+        ByteArrayOutputStream output = new ByteArrayOutputStream();
+        TableRowJsonCoder.of().encode(tableRow, output, Context.OUTER);
+        serializedTableRowReturns.add(output.toByteArray());
+      }
       this.currIndex = UNSTARTED;
     }
 
@@ -72,20 +124,27 @@ class FakeBigQueryServices implements BigQueryServices {
     public boolean start() throws IOException {
       assertEquals(UNSTARTED, currIndex);
       currIndex = 0;
-      return currIndex < jsonTableRowReturns.length;
+      return currIndex < serializedTableRowReturns.size();
     }
 
     @Override
     public boolean advance() throws IOException {
-      return ++currIndex < jsonTableRowReturns.length;
+      return ++currIndex < serializedTableRowReturns.size();
     }
 
     @Override
     public TableRow getCurrent() throws NoSuchElementException {
-      if (currIndex >= jsonTableRowReturns.length) {
+      if (currIndex >= serializedTableRowReturns.size()) {
         throw new NoSuchElementException();
       }
-      return fromJsonString(jsonTableRowReturns[currIndex], TableRow.class);
+
+      ByteArrayInputStream input = new ByteArrayInputStream(
+          serializedTableRowReturns.get(currIndex));
+      try {
+        return convertNumbers(TableRowJsonCoder.of().decode(input, Context.OUTER));
+      } catch (IOException e) {
+        return null;
+      }
     }
 
     @Override
@@ -93,4 +152,15 @@ class FakeBigQueryServices implements BigQueryServices {
       currIndex = CLOSED;
     }
   }
+
+
+  // Longs tend to get converted back to Integers due to JSON serialization. Convert them back.
+  static TableRow convertNumbers(TableRow tableRow) {
+    for (TableRow.Entry entry : tableRow.entrySet()) {
+      if (entry.getValue() instanceof Integer) {
+        entry.setValue(new Long((Integer) entry.getValue()));
+      }
+    }
+    return tableRow;
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/b486137d/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeDatasetService.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeDatasetService.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeDatasetService.java
index 9b2cf63..5103adb 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeDatasetService.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeDatasetService.java
@@ -1,9 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.beam.sdk.io.gcp.bigquery;
 
-import static com.google.common.base.Preconditions.checkNotNull;
 import static org.junit.Assert.assertEquals;
 
+import com.google.api.client.googleapis.json.GoogleJsonResponseException;
+import com.google.api.client.http.HttpHeaders;
 import com.google.api.services.bigquery.model.Dataset;
+import com.google.api.services.bigquery.model.DatasetReference;
 import com.google.api.services.bigquery.model.Table;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
@@ -24,13 +44,13 @@ class FakeDatasetService implements DatasetService, Serializable {
       throws InterruptedException, IOException {
     synchronized (BigQueryIOTest.tables) {
       Map<String, TableContainer> dataset =
-          checkNotNull(
-              BigQueryIOTest.tables.get(tableRef.getProjectId(), tableRef.getDatasetId()),
-              "Tried to get a dataset %s:%s from %s, but no such dataset was set",
-              tableRef.getProjectId(),
-              tableRef.getDatasetId(),
-              tableRef.getTableId(),
-              FakeDatasetService.class.getSimpleName());
+              BigQueryIOTest.tables.get(tableRef.getProjectId(), tableRef.getDatasetId());
+      if (dataset == null) {
+        throwNotFound(
+            "Tried to get a dataset %s:%s from, but no such dataset was set",
+            tableRef.getProjectId(),
+            tableRef.getDatasetId());
+      }
       TableContainer tableContainer = dataset.get(tableRef.getTableId());
       return tableContainer == null ? null : tableContainer.getTable();
     }
@@ -44,27 +64,40 @@ class FakeDatasetService implements DatasetService, Serializable {
   }
 
   private TableContainer getTableContainer(String projectId, String datasetId, String tableId)
-          throws InterruptedException, IOException {
-     synchronized (BigQueryIOTest.tables) {
-       Map<String, TableContainer> dataset =
-           checkNotNull(
-               BigQueryIOTest.tables.get(projectId, datasetId),
-               "Tried to get a dataset %s:%s from %s, but no such dataset was set",
-               projectId,
-               datasetId,
-               FakeDatasetService.class.getSimpleName());
-       return checkNotNull(dataset.get(tableId),
-           "Tried to get a table %s:%s.%s from %s, but no such table was set",
-           projectId,
-           datasetId,
-           tableId,
-           FakeDatasetService.class.getSimpleName());
-     }
+      throws InterruptedException, IOException {
+    synchronized (BigQueryIOTest.tables) {
+      Map<String, TableContainer> dataset = BigQueryIOTest.tables.get(projectId, datasetId);
+      if (dataset == null) {
+        throwNotFound(
+            "Tried to get a dataset %s:%s, but no such dataset was set",
+            projectId,
+            datasetId);
+      }
+      TableContainer tableContainer = dataset.get(tableId);
+      if (tableContainer == null) {
+        throwNotFound(
+            "Tried to get a table %s:%s.%s, but no such table was set",
+            projectId,
+            datasetId,
+            tableId);
+      }
+      return tableContainer;
+    }
   }
 
   @Override
   public void deleteTable(TableReference tableRef) throws IOException, InterruptedException {
-    throw new UnsupportedOperationException("Unsupported");
+    synchronized (BigQueryIOTest.tables) {
+      Map<String, TableContainer> dataset =
+          BigQueryIOTest.tables.get(tableRef.getProjectId(), tableRef.getDatasetId());
+      if (dataset == null) {
+        throwNotFound(
+            "Tried to get a dataset %s:%s, but no such table was set",
+            tableRef.getProjectId(),
+            tableRef.getDatasetId());
+      }
+      dataset.remove(tableRef.getTableId());
+    }
   }
 
 
@@ -73,13 +106,13 @@ class FakeDatasetService implements DatasetService, Serializable {
     TableReference tableReference = table.getTableReference();
     synchronized (BigQueryIOTest.tables) {
       Map<String, TableContainer> dataset =
-          checkNotNull(
-              BigQueryIOTest.tables.get(tableReference.getProjectId(),
-                  tableReference.getDatasetId()),
-              "Tried to get a dataset %s:%s from %s, but no such table was set",
-              tableReference.getProjectId(),
-              tableReference.getDatasetId(),
-              FakeDatasetService.class.getSimpleName());
+          BigQueryIOTest.tables.get(tableReference.getProjectId(), tableReference.getDatasetId());
+      if (dataset == null) {
+        throwNotFound(
+            "Tried to get a dataset %s:%s, but no such table was set",
+            tableReference.getProjectId(),
+            tableReference.getDatasetId());
+      }
       TableContainer tableContainer = dataset.get(tableReference.getTableId());
       if (tableContainer == null) {
         tableContainer = new TableContainer(table);
@@ -98,7 +131,16 @@ class FakeDatasetService implements DatasetService, Serializable {
   @Override
   public Dataset getDataset(
       String projectId, String datasetId) throws IOException, InterruptedException {
-    throw new UnsupportedOperationException("Unsupported");
+    synchronized (BigQueryIOTest.tables) {
+      Map<String, TableContainer> dataset = BigQueryIOTest.tables.get(projectId, datasetId);
+      if (dataset == null) {
+        throwNotFound("Tried to get a dataset %s:%s, but no such table was set",
+                    projectId, datasetId);
+      }
+      return new Dataset().setDatasetReference(new DatasetReference()
+          .setDatasetId(datasetId)
+          .setProjectId(projectId));
+    }
   }
 
   @Override
@@ -117,7 +159,9 @@ class FakeDatasetService implements DatasetService, Serializable {
   @Override
   public void deleteDataset(String projectId, String datasetId)
       throws IOException, InterruptedException {
-    throw new UnsupportedOperationException("Unsupported");
+    synchronized (BigQueryIOTest.tables) {
+      BigQueryIOTest.tables.remove(projectId, datasetId);
+    }
   }
 
   @Override
@@ -138,8 +182,7 @@ class FakeDatasetService implements DatasetService, Serializable {
       TableContainer tableContainer = getTableContainer(
           ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
       for (int i = 0; i < rowList.size(); ++i) {
-        tableContainer.addRow(rowList.get(i), insertIdList.get(i));
-        dataSize += rowList.get(i).toString().length();
+        dataSize += tableContainer.addRow(rowList.get(i), insertIdList.get(i));
       }
       return dataSize;
     }
@@ -150,23 +193,16 @@ class FakeDatasetService implements DatasetService, Serializable {
                                      @Nullable String tableDescription)
       throws IOException, InterruptedException {
     synchronized (BigQueryIOTest.tables) {
-      Map<String, TableContainer> dataset =
-          checkNotNull(
-              BigQueryIOTest.tables.get(tableReference.getProjectId(),
-                  tableReference.getDatasetId()),
-              "Tried to get a dataset %s:%s from %s, but no such dataset was set",
-              tableReference.getProjectId(),
-              tableReference.getDatasetId(),
-              tableReference.getTableId(),
-              FakeDatasetService.class.getSimpleName());
-      TableContainer tableContainer = checkNotNull(dataset.get(tableReference.getTableId()),
-          "Tried to patch a table %s:%s.%s from %s, but no such table was set",
-          tableReference.getProjectId(),
-          tableReference.getDatasetId(),
-          tableReference.getTableId(),
-          FakeDatasetService.class.getSimpleName());
+      TableContainer tableContainer = getTableContainer(tableReference.getProjectId(),
+          tableReference.getDatasetId(), tableReference.getTableId());
       tableContainer.getTable().setDescription(tableDescription);
       return tableContainer.getTable();
     }
   }
+
+  void throwNotFound(String format, Object... args) throws IOException {
+    throw new IOException(
+        new GoogleJsonResponseException.Builder(404,
+            String.format(format, args), new HttpHeaders()).build());
+  }
 }

[43/50] [abbrv] beam git commit: This closes #2544

Posted by dh...@apache.org.

This closes #2544


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/714fdd29
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/714fdd29
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/714fdd29

Branch: refs/heads/DSL_SQL
Commit: 714fdd2923ed379eba1de9aaae5d76cb02d69b20
Parents: 8319369 97c6678
Author: chamikara@google.com <ch...@google.com>
Authored: Wed Apr 19 09:56:39 2017 -0700
Committer: chamikara@google.com <ch...@google.com>
Committed: Wed Apr 19 09:56:39 2017 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/io/fileio.py | 90 -------------------------------
 1 file changed, 90 deletions(-)
----------------------------------------------------------------------

[24/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/UnboundedSourceWrapperTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/UnboundedSourceWrapperTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/UnboundedSourceWrapperTest.java
new file mode 100644
index 0000000..90f95d6
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/UnboundedSourceWrapperTest.java
@@ -0,0 +1,464 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.streaming;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.io.UnboundedSource;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.KV;
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.accumulators.Accumulator;
+import org.apache.flink.api.common.state.ListState;
+import org.apache.flink.api.common.state.ListStateDescriptor;
+import org.apache.flink.api.common.state.OperatorStateStore;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.runtime.execution.Environment;
+import org.apache.flink.runtime.operators.testutils.DummyEnvironment;
+import org.apache.flink.runtime.state.StateInitializationContext;
+import org.apache.flink.runtime.state.StateSnapshotContextSynchronousImpl;
+import org.apache.flink.streaming.api.TimeCharacteristic;
+import org.apache.flink.streaming.api.graph.StreamConfig;
+import org.apache.flink.streaming.api.operators.Output;
+import org.apache.flink.streaming.api.operators.StreamSource;
+import org.apache.flink.streaming.api.watermark.Watermark;
+import org.apache.flink.streaming.runtime.streamrecord.LatencyMarker;
+import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+import org.apache.flink.streaming.runtime.tasks.StreamTask;
+import org.apache.flink.streaming.runtime.tasks.TestProcessingTimeService;
+import org.apache.flink.util.InstantiationUtil;
+import org.junit.Test;
+import org.junit.experimental.runners.Enclosed;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.mockito.Matchers;
+
+/**
+ * Tests for {@link UnboundedSourceWrapper}.
+ */
+@RunWith(Enclosed.class)
+public class UnboundedSourceWrapperTest {
+
+  /**
+   * Parameterized tests.
+   */
+  @RunWith(Parameterized.class)
+  public static class UnboundedSourceWrapperTestWithParams {
+    private final int numTasks;
+    private final int numSplits;
+
+    public UnboundedSourceWrapperTestWithParams(int numTasks, int numSplits) {
+      this.numTasks = numTasks;
+      this.numSplits = numSplits;
+    }
+
+    @Parameterized.Parameters
+    public static Collection<Object[]> data() {
+      /*
+       * Parameters for initializing the tests:
+       * {numTasks, numSplits}
+       * The test currently assumes powers of two for some assertions.
+       */
+      return Arrays.asList(new Object[][]{
+          {1, 1}, {1, 2}, {1, 4},
+          {2, 1}, {2, 2}, {2, 4},
+          {4, 1}, {4, 2}, {4, 4}
+      });
+    }
+
+    /**
+     * Creates a {@link UnboundedSourceWrapper} that has one or multiple readers per source.
+     * If numSplits > numTasks the source has one source will manage multiple readers.
+     */
+    @Test
+    public void testReaders() throws Exception {
+      final int numElements = 20;
+      final Object checkpointLock = new Object();
+      PipelineOptions options = PipelineOptionsFactory.create();
+
+      // this source will emit exactly NUM_ELEMENTS across all parallel readers,
+      // afterwards it will stall. We check whether we also receive NUM_ELEMENTS
+      // elements later.
+      TestCountingSource source = new TestCountingSource(numElements);
+      UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> flinkWrapper =
+          new UnboundedSourceWrapper<>(options, source, numSplits);
+
+      assertEquals(numSplits, flinkWrapper.getSplitSources().size());
+
+      StreamSource<WindowedValue<
+          KV<Integer, Integer>>,
+          UnboundedSourceWrapper<
+              KV<Integer, Integer>,
+              TestCountingSource.CounterMark>> sourceOperator = new StreamSource<>(flinkWrapper);
+
+      setupSourceOperator(sourceOperator, numTasks);
+
+      try {
+        sourceOperator.open();
+        sourceOperator.run(checkpointLock,
+            new Output<StreamRecord<WindowedValue<KV<Integer, Integer>>>>() {
+              private int count = 0;
+
+              @Override
+              public void emitWatermark(Watermark watermark) {
+              }
+
+              @Override
+              public void emitLatencyMarker(LatencyMarker latencyMarker) {
+              }
+
+              @Override
+              public void collect(
+                  StreamRecord<WindowedValue<KV<Integer, Integer>>> windowedValueStreamRecord) {
+
+                count++;
+                if (count >= numElements) {
+                  throw new SuccessException();
+                }
+              }
+
+              @Override
+              public void close() {
+
+              }
+            });
+      } catch (SuccessException e) {
+
+        assertEquals(Math.max(1, numSplits / numTasks), flinkWrapper.getLocalSplitSources().size());
+
+        // success
+        return;
+      }
+      fail("Read terminated without producing expected number of outputs");
+    }
+
+    /**
+     * Verify that snapshot/restore work as expected. We bring up a source and cancel
+     * after seeing a certain number of elements. Then we snapshot that source,
+     * bring up a completely new source that we restore from the snapshot and verify
+     * that we see all expected elements in the end.
+     */
+    @Test
+    public void testRestore() throws Exception {
+      final int numElements = 20;
+      final Object checkpointLock = new Object();
+      PipelineOptions options = PipelineOptionsFactory.create();
+
+      // this source will emit exactly NUM_ELEMENTS across all parallel readers,
+      // afterwards it will stall. We check whether we also receive NUM_ELEMENTS
+      // elements later.
+      TestCountingSource source = new TestCountingSource(numElements);
+      UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> flinkWrapper =
+          new UnboundedSourceWrapper<>(options, source, numSplits);
+
+      assertEquals(numSplits, flinkWrapper.getSplitSources().size());
+
+      StreamSource<
+          WindowedValue<KV<Integer, Integer>>,
+          UnboundedSourceWrapper<
+              KV<Integer, Integer>,
+              TestCountingSource.CounterMark>> sourceOperator = new StreamSource<>(flinkWrapper);
+
+
+      OperatorStateStore backend = mock(OperatorStateStore.class);
+
+      TestingListState<KV<UnboundedSource, TestCountingSource.CounterMark>>
+          listState = new TestingListState<>();
+
+      when(backend.getOperatorState(Matchers.any(ListStateDescriptor.class)))
+          .thenReturn(listState);
+
+      StateInitializationContext initializationContext = mock(StateInitializationContext.class);
+
+      when(initializationContext.getOperatorStateStore()).thenReturn(backend);
+      when(initializationContext.isRestored()).thenReturn(false, true);
+
+      flinkWrapper.initializeState(initializationContext);
+
+      setupSourceOperator(sourceOperator, numTasks);
+
+      final Set<KV<Integer, Integer>> emittedElements = new HashSet<>();
+
+      boolean readFirstBatchOfElements = false;
+
+      try {
+        sourceOperator.open();
+        sourceOperator.run(checkpointLock,
+            new Output<StreamRecord<WindowedValue<KV<Integer, Integer>>>>() {
+              private int count = 0;
+
+              @Override
+              public void emitWatermark(Watermark watermark) {
+              }
+
+              @Override
+              public void emitLatencyMarker(LatencyMarker latencyMarker) {
+              }
+
+              @Override
+              public void collect(
+                  StreamRecord<WindowedValue<KV<Integer, Integer>>> windowedValueStreamRecord) {
+
+                emittedElements.add(windowedValueStreamRecord.getValue().getValue());
+                count++;
+                if (count >= numElements / 2) {
+                  throw new SuccessException();
+                }
+              }
+
+              @Override
+              public void close() {
+
+              }
+            });
+      } catch (SuccessException e) {
+        // success
+        readFirstBatchOfElements = true;
+      }
+
+      assertTrue("Did not successfully read first batch of elements.", readFirstBatchOfElements);
+
+      // draw a snapshot
+      flinkWrapper.snapshotState(new StateSnapshotContextSynchronousImpl(0, 0));
+
+      // test snapshot offsets
+      assertEquals(flinkWrapper.getLocalSplitSources().size(),
+          listState.getList().size());
+      int totalEmit = 0;
+      for (KV<UnboundedSource, TestCountingSource.CounterMark> kv : listState.get()) {
+        totalEmit += kv.getValue().current + 1;
+      }
+      assertEquals(numElements / 2, totalEmit);
+
+      // test that finalizeCheckpoint on CheckpointMark is called
+      final ArrayList<Integer> finalizeList = new ArrayList<>();
+      TestCountingSource.setFinalizeTracker(finalizeList);
+      flinkWrapper.notifyCheckpointComplete(0);
+      assertEquals(flinkWrapper.getLocalSplitSources().size(), finalizeList.size());
+
+      // create a completely new source but restore from the snapshot
+      TestCountingSource restoredSource = new TestCountingSource(numElements);
+      UnboundedSourceWrapper<
+          KV<Integer, Integer>, TestCountingSource.CounterMark> restoredFlinkWrapper =
+          new UnboundedSourceWrapper<>(options, restoredSource, numSplits);
+
+      assertEquals(numSplits, restoredFlinkWrapper.getSplitSources().size());
+
+      StreamSource<
+          WindowedValue<KV<Integer, Integer>>,
+          UnboundedSourceWrapper<
+              KV<Integer, Integer>,
+              TestCountingSource.CounterMark>> restoredSourceOperator =
+          new StreamSource<>(restoredFlinkWrapper);
+
+      setupSourceOperator(restoredSourceOperator, numTasks);
+
+      // restore snapshot
+      restoredFlinkWrapper.initializeState(initializationContext);
+
+      boolean readSecondBatchOfElements = false;
+
+      // run again and verify that we see the other elements
+      try {
+        restoredSourceOperator.open();
+        restoredSourceOperator.run(checkpointLock,
+            new Output<StreamRecord<WindowedValue<KV<Integer, Integer>>>>() {
+              private int count = 0;
+
+              @Override
+              public void emitWatermark(Watermark watermark) {
+              }
+
+              @Override
+              public void emitLatencyMarker(LatencyMarker latencyMarker) {
+              }
+
+              @Override
+              public void collect(
+                  StreamRecord<WindowedValue<KV<Integer, Integer>>> windowedValueStreamRecord) {
+                emittedElements.add(windowedValueStreamRecord.getValue().getValue());
+                count++;
+                if (count >= numElements / 2) {
+                  throw new SuccessException();
+                }
+              }
+
+              @Override
+              public void close() {
+
+              }
+            });
+      } catch (SuccessException e) {
+        // success
+        readSecondBatchOfElements = true;
+      }
+
+      assertEquals(Math.max(1, numSplits / numTasks), flinkWrapper.getLocalSplitSources().size());
+
+      assertTrue("Did not successfully read second batch of elements.", readSecondBatchOfElements);
+
+      // verify that we saw all NUM_ELEMENTS elements
+      assertTrue(emittedElements.size() == numElements);
+    }
+
+    @Test
+    public void testNullCheckpoint() throws Exception {
+      final int numElements = 20;
+      PipelineOptions options = PipelineOptionsFactory.create();
+
+      TestCountingSource source = new TestCountingSource(numElements) {
+        @Override
+        public Coder<CounterMark> getCheckpointMarkCoder() {
+          return null;
+        }
+      };
+      UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> flinkWrapper =
+          new UnboundedSourceWrapper<>(options, source, numSplits);
+
+      OperatorStateStore backend = mock(OperatorStateStore.class);
+
+      TestingListState<KV<UnboundedSource, TestCountingSource.CounterMark>>
+          listState = new TestingListState<>();
+
+      when(backend.getOperatorState(Matchers.any(ListStateDescriptor.class)))
+          .thenReturn(listState);
+
+      StateInitializationContext initializationContext = mock(StateInitializationContext.class);
+
+      when(initializationContext.getOperatorStateStore()).thenReturn(backend);
+      when(initializationContext.isRestored()).thenReturn(false, true);
+
+      flinkWrapper.initializeState(initializationContext);
+
+      StreamSource sourceOperator = new StreamSource<>(flinkWrapper);
+      setupSourceOperator(sourceOperator, numTasks);
+      sourceOperator.open();
+
+      flinkWrapper.snapshotState(new StateSnapshotContextSynchronousImpl(0, 0));
+
+      assertEquals(0, listState.getList().size());
+
+      UnboundedSourceWrapper<
+          KV<Integer, Integer>, TestCountingSource.CounterMark> restoredFlinkWrapper =
+          new UnboundedSourceWrapper<>(options, new TestCountingSource(numElements),
+              numSplits);
+
+      StreamSource restoredSourceOperator = new StreamSource<>(flinkWrapper);
+      setupSourceOperator(restoredSourceOperator, numTasks);
+      sourceOperator.open();
+
+      restoredFlinkWrapper.initializeState(initializationContext);
+
+      assertEquals(Math.max(1, numSplits / numTasks), flinkWrapper.getLocalSplitSources().size());
+
+    }
+
+    @SuppressWarnings("unchecked")
+    private static <T> void setupSourceOperator(StreamSource<T, ?> operator, int numSubTasks) {
+      ExecutionConfig executionConfig = new ExecutionConfig();
+      StreamConfig cfg = new StreamConfig(new Configuration());
+
+      cfg.setTimeCharacteristic(TimeCharacteristic.EventTime);
+
+      Environment env = new DummyEnvironment("MockTwoInputTask", numSubTasks, 0);
+
+      StreamTask<?, ?> mockTask = mock(StreamTask.class);
+      when(mockTask.getName()).thenReturn("Mock Task");
+      when(mockTask.getCheckpointLock()).thenReturn(new Object());
+      when(mockTask.getConfiguration()).thenReturn(cfg);
+      when(mockTask.getEnvironment()).thenReturn(env);
+      when(mockTask.getExecutionConfig()).thenReturn(executionConfig);
+      when(mockTask.getAccumulatorMap())
+          .thenReturn(Collections.<String, Accumulator<?, ?>>emptyMap());
+      TestProcessingTimeService testProcessingTimeService = new TestProcessingTimeService();
+      when(mockTask.getProcessingTimeService()).thenReturn(testProcessingTimeService);
+
+      operator.setup(mockTask, cfg, (Output<StreamRecord<T>>) mock(Output.class));
+    }
+
+    /**
+     * A special {@link RuntimeException} that we throw to signal that the test was successful.
+     */
+    private static class SuccessException extends RuntimeException {
+    }
+  }
+
+  /**
+   * Not parameterized tests.
+   */
+  public static class BasicTest {
+
+    /**
+     * Check serialization a {@link UnboundedSourceWrapper}.
+     */
+    @Test
+    public void testSerialization() throws Exception {
+      final int parallelism = 1;
+      final int numElements = 20;
+      PipelineOptions options = PipelineOptionsFactory.create();
+
+      TestCountingSource source = new TestCountingSource(numElements);
+      UnboundedSourceWrapper<KV<Integer, Integer>, TestCountingSource.CounterMark> flinkWrapper =
+          new UnboundedSourceWrapper<>(options, source, parallelism);
+
+      InstantiationUtil.serializeObject(flinkWrapper);
+    }
+
+  }
+
+  private static final class TestingListState<T> implements ListState<T> {
+
+    private final List<T> list = new ArrayList<>();
+
+    @Override
+    public void clear() {
+      list.clear();
+    }
+
+    @Override
+    public Iterable<T> get() throws Exception {
+      return list;
+    }
+
+    @Override
+    public void add(T value) throws Exception {
+      list.add(value);
+    }
+
+    public List<T> getList() {
+      return list;
+    }
+
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/package-info.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/package-info.java
new file mode 100644
index 0000000..08a1e03
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/streaming/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internal implementation of the Beam runner for Apache Flink.
+ */
+package org.apache.beam.runners.flink.streaming;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/resources/log4j-test.properties
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/resources/log4j-test.properties b/runners/flink/src/test/resources/log4j-test.properties
new file mode 100644
index 0000000..4c74d85
--- /dev/null
+++ b/runners/flink/src/test/resources/log4j-test.properties
@@ -0,0 +1,27 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+# Set root logger level to OFF to not flood build logs
+# set manually to INFO for debugging purposes
+log4j.rootLogger=OFF, testlogger
+
+# A1 is set to be a ConsoleAppender.
+log4j.appender.testlogger=org.apache.log4j.ConsoleAppender
+log4j.appender.testlogger.target = System.err
+log4j.appender.testlogger.layout=org.apache.log4j.PatternLayout
+log4j.appender.testlogger.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n

[40/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

[BEAM-1994] Remove Flink examples package


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/cdd2544b
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/cdd2544b
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/cdd2544b

Branch: refs/heads/DSL_SQL
Commit: cdd2544ba6dd6ac4aa80c65ecd8e01ab3cf664aa
Parents: 8a00f22
Author: Isma�l Mej�a <ie...@apache.org>
Authored: Tue Apr 18 17:31:07 2017 +0200
Committer: Isma�l Mej�a <ie...@apache.org>
Committed: Wed Apr 19 13:37:06 2017 +0200

----------------------------------------------------------------------
 ...PostCommit_Java_ValidatesRunner_Flink.groovy |    2 +-
 runners/flink/examples/pom.xml                  |  130 ---
 .../beam/runners/flink/examples/TFIDF.java      |  455 --------
 .../beam/runners/flink/examples/WordCount.java  |  129 ---
 .../runners/flink/examples/package-info.java    |   22 -
 .../flink/examples/streaming/AutoComplete.java  |  400 -------
 .../flink/examples/streaming/JoinExamples.java  |  154 ---
 .../examples/streaming/WindowedWordCount.java   |  141 ---
 .../flink/examples/streaming/package-info.java  |   22 -
 runners/flink/pom.xml                           |  275 ++++-
 runners/flink/runner/pom.xml                    |  330 ------
 .../flink/DefaultParallelismFactory.java        |   39 -
 .../flink/FlinkBatchPipelineTranslator.java     |  139 ---
 .../flink/FlinkBatchTransformTranslators.java   |  723 ------------
 .../flink/FlinkBatchTranslationContext.java     |  153 ---
 .../flink/FlinkDetachedRunnerResult.java        |   75 --
 .../FlinkPipelineExecutionEnvironment.java      |  241 ----
 .../runners/flink/FlinkPipelineOptions.java     |  101 --
 .../runners/flink/FlinkPipelineTranslator.java  |   53 -
 .../apache/beam/runners/flink/FlinkRunner.java  |  232 ----
 .../runners/flink/FlinkRunnerRegistrar.java     |   62 --
 .../beam/runners/flink/FlinkRunnerResult.java   |   98 --
 .../flink/FlinkStreamingPipelineTranslator.java |  276 -----
 .../FlinkStreamingTransformTranslators.java     | 1044 -----------------
 .../flink/FlinkStreamingTranslationContext.java |  130 ---
 .../flink/FlinkStreamingViewOverrides.java      |  372 -------
 .../flink/PipelineTranslationOptimizer.java     |   72 --
 .../beam/runners/flink/TestFlinkRunner.java     |   84 --
 .../beam/runners/flink/TranslationMode.java     |   31 -
 .../apache/beam/runners/flink/package-info.java |   22 -
 .../functions/FlinkAggregatorFactory.java       |   53 -
 .../functions/FlinkAssignContext.java           |   63 --
 .../functions/FlinkAssignWindows.java           |   49 -
 .../functions/FlinkDoFnFunction.java            |  161 ---
 .../FlinkMergingNonShuffleReduceFunction.java   |  228 ----
 .../FlinkMergingPartialReduceFunction.java      |  201 ----
 .../functions/FlinkMergingReduceFunction.java   |  199 ----
 .../FlinkMultiOutputPruningFunction.java        |   50 -
 .../functions/FlinkNoOpStepContext.java         |   73 --
 .../functions/FlinkPartialReduceFunction.java   |  172 ---
 .../functions/FlinkReduceFunction.java          |  173 ---
 .../functions/FlinkSideInputReader.java         |   80 --
 .../functions/FlinkStatefulDoFnFunction.java    |  198 ----
 .../functions/SideInputInitializer.java         |   73 --
 .../translation/functions/package-info.java     |   22 -
 .../runners/flink/translation/package-info.java |   22 -
 .../translation/types/CoderTypeInformation.java |  120 --
 .../translation/types/CoderTypeSerializer.java  |  132 ---
 .../types/EncodedValueComparator.java           |  195 ----
 .../types/EncodedValueSerializer.java           |  113 --
 .../types/EncodedValueTypeInformation.java      |   98 --
 .../types/InspectableByteArrayOutputStream.java |   34 -
 .../flink/translation/types/KvKeySelector.java  |   50 -
 .../flink/translation/types/package-info.java   |   22 -
 .../utils/SerializedPipelineOptions.java        |   67 --
 .../flink/translation/utils/package-info.java   |   22 -
 .../wrappers/DataInputViewWrapper.java          |   58 -
 .../wrappers/DataOutputViewWrapper.java         |   51 -
 .../SerializableFnAggregatorWrapper.java        |   98 --
 .../translation/wrappers/SourceInputFormat.java |  150 ---
 .../translation/wrappers/SourceInputSplit.java  |   52 -
 .../translation/wrappers/package-info.java      |   22 -
 .../wrappers/streaming/DoFnOperator.java        |  774 -------------
 .../streaming/KvToByteBufferKeySelector.java    |   56 -
 .../streaming/SingletonKeyedWorkItem.java       |   56 -
 .../streaming/SingletonKeyedWorkItemCoder.java  |  126 ---
 .../streaming/SplittableDoFnOperator.java       |  150 ---
 .../wrappers/streaming/WindowDoFnOperator.java  |  117 --
 .../wrappers/streaming/WorkItemKeySelector.java |   56 -
 .../streaming/io/BoundedSourceWrapper.java      |  218 ----
 .../streaming/io/UnboundedSocketSource.java     |  249 -----
 .../streaming/io/UnboundedSourceWrapper.java    |  476 --------
 .../wrappers/streaming/io/package-info.java     |   22 -
 .../wrappers/streaming/package-info.java        |   22 -
 .../state/FlinkBroadcastStateInternals.java     |  865 --------------
 .../state/FlinkKeyGroupStateInternals.java      |  487 --------
 .../state/FlinkSplitStateInternals.java         |  260 -----
 .../streaming/state/FlinkStateInternals.java    | 1053 ------------------
 .../state/KeyGroupCheckpointedOperator.java     |   35 -
 .../state/KeyGroupRestoringOperator.java        |   32 -
 .../wrappers/streaming/state/package-info.java  |   22 -
 .../runner/src/main/resources/log4j.properties  |   23 -
 .../flink/EncodedValueComparatorTest.java       |   70 --
 .../runners/flink/FlinkRunnerRegistrarTest.java |   48 -
 .../beam/runners/flink/FlinkTestPipeline.java   |   72 --
 .../beam/runners/flink/PipelineOptionsTest.java |  184 ---
 .../beam/runners/flink/ReadSourceITCase.java    |   85 --
 .../flink/ReadSourceStreamingITCase.java        |   74 --
 .../beam/runners/flink/WriteSinkITCase.java     |  192 ----
 .../flink/streaming/DoFnOperatorTest.java       |  600 ----------
 .../FlinkBroadcastStateInternalsTest.java       |  245 ----
 .../FlinkKeyGroupStateInternalsTest.java        |  262 -----
 .../streaming/FlinkSplitStateInternalsTest.java |  101 --
 .../streaming/FlinkStateInternalsTest.java      |  395 -------
 .../flink/streaming/GroupByNullKeyTest.java     |  124 ---
 .../flink/streaming/TestCountingSource.java     |  254 -----
 .../streaming/TopWikipediaSessionsITCase.java   |  133 ---
 .../streaming/UnboundedSourceWrapperTest.java   |  464 --------
 .../runners/flink/streaming/package-info.java   |   22 -
 .../src/test/resources/log4j-test.properties    |   27 -
 .../flink/DefaultParallelismFactory.java        |   39 +
 .../flink/FlinkBatchPipelineTranslator.java     |  139 +++
 .../flink/FlinkBatchTransformTranslators.java   |  723 ++++++++++++
 .../flink/FlinkBatchTranslationContext.java     |  153 +++
 .../flink/FlinkDetachedRunnerResult.java        |   75 ++
 .../FlinkPipelineExecutionEnvironment.java      |  241 ++++
 .../runners/flink/FlinkPipelineOptions.java     |  101 ++
 .../runners/flink/FlinkPipelineTranslator.java  |   53 +
 .../apache/beam/runners/flink/FlinkRunner.java  |  232 ++++
 .../runners/flink/FlinkRunnerRegistrar.java     |   62 ++
 .../beam/runners/flink/FlinkRunnerResult.java   |   98 ++
 .../flink/FlinkStreamingPipelineTranslator.java |  276 +++++
 .../FlinkStreamingTransformTranslators.java     | 1044 +++++++++++++++++
 .../flink/FlinkStreamingTranslationContext.java |  130 +++
 .../flink/FlinkStreamingViewOverrides.java      |  372 +++++++
 .../flink/PipelineTranslationOptimizer.java     |   72 ++
 .../beam/runners/flink/TestFlinkRunner.java     |   84 ++
 .../beam/runners/flink/TranslationMode.java     |   31 +
 .../apache/beam/runners/flink/package-info.java |   22 +
 .../functions/FlinkAggregatorFactory.java       |   53 +
 .../functions/FlinkAssignContext.java           |   63 ++
 .../functions/FlinkAssignWindows.java           |   49 +
 .../functions/FlinkDoFnFunction.java            |  161 +++
 .../FlinkMergingNonShuffleReduceFunction.java   |  228 ++++
 .../FlinkMergingPartialReduceFunction.java      |  201 ++++
 .../functions/FlinkMergingReduceFunction.java   |  199 ++++
 .../FlinkMultiOutputPruningFunction.java        |   50 +
 .../functions/FlinkNoOpStepContext.java         |   73 ++
 .../functions/FlinkPartialReduceFunction.java   |  172 +++
 .../functions/FlinkReduceFunction.java          |  173 +++
 .../functions/FlinkSideInputReader.java         |   80 ++
 .../functions/FlinkStatefulDoFnFunction.java    |  198 ++++
 .../functions/SideInputInitializer.java         |   73 ++
 .../translation/functions/package-info.java     |   22 +
 .../runners/flink/translation/package-info.java |   22 +
 .../translation/types/CoderTypeInformation.java |  120 ++
 .../translation/types/CoderTypeSerializer.java  |  132 +++
 .../types/EncodedValueComparator.java           |  195 ++++
 .../types/EncodedValueSerializer.java           |  113 ++
 .../types/EncodedValueTypeInformation.java      |   98 ++
 .../types/InspectableByteArrayOutputStream.java |   34 +
 .../flink/translation/types/KvKeySelector.java  |   50 +
 .../flink/translation/types/package-info.java   |   22 +
 .../utils/SerializedPipelineOptions.java        |   67 ++
 .../flink/translation/utils/package-info.java   |   22 +
 .../wrappers/DataInputViewWrapper.java          |   58 +
 .../wrappers/DataOutputViewWrapper.java         |   51 +
 .../SerializableFnAggregatorWrapper.java        |   98 ++
 .../translation/wrappers/SourceInputFormat.java |  150 +++
 .../translation/wrappers/SourceInputSplit.java  |   52 +
 .../translation/wrappers/package-info.java      |   22 +
 .../wrappers/streaming/DoFnOperator.java        |  774 +++++++++++++
 .../streaming/KvToByteBufferKeySelector.java    |   56 +
 .../streaming/SingletonKeyedWorkItem.java       |   56 +
 .../streaming/SingletonKeyedWorkItemCoder.java  |  126 +++
 .../streaming/SplittableDoFnOperator.java       |  150 +++
 .../wrappers/streaming/WindowDoFnOperator.java  |  117 ++
 .../wrappers/streaming/WorkItemKeySelector.java |   56 +
 .../streaming/io/BoundedSourceWrapper.java      |  218 ++++
 .../streaming/io/UnboundedSocketSource.java     |  249 +++++
 .../streaming/io/UnboundedSourceWrapper.java    |  476 ++++++++
 .../wrappers/streaming/io/package-info.java     |   22 +
 .../wrappers/streaming/package-info.java        |   22 +
 .../state/FlinkBroadcastStateInternals.java     |  865 ++++++++++++++
 .../state/FlinkKeyGroupStateInternals.java      |  487 ++++++++
 .../state/FlinkSplitStateInternals.java         |  260 +++++
 .../streaming/state/FlinkStateInternals.java    | 1053 ++++++++++++++++++
 .../state/KeyGroupCheckpointedOperator.java     |   35 +
 .../state/KeyGroupRestoringOperator.java        |   32 +
 .../wrappers/streaming/state/package-info.java  |   22 +
 .../flink/src/main/resources/log4j.properties   |   23 +
 .../flink/EncodedValueComparatorTest.java       |   70 ++
 .../runners/flink/FlinkRunnerRegistrarTest.java |   48 +
 .../beam/runners/flink/FlinkTestPipeline.java   |   72 ++
 .../beam/runners/flink/PipelineOptionsTest.java |  184 +++
 .../beam/runners/flink/ReadSourceITCase.java    |   85 ++
 .../flink/ReadSourceStreamingITCase.java        |   74 ++
 .../beam/runners/flink/WriteSinkITCase.java     |  192 ++++
 .../flink/streaming/DoFnOperatorTest.java       |  600 ++++++++++
 .../FlinkBroadcastStateInternalsTest.java       |  245 ++++
 .../FlinkKeyGroupStateInternalsTest.java        |  262 +++++
 .../streaming/FlinkSplitStateInternalsTest.java |  101 ++
 .../streaming/FlinkStateInternalsTest.java      |  395 +++++++
 .../flink/streaming/GroupByNullKeyTest.java     |  124 +++
 .../flink/streaming/TestCountingSource.java     |  254 +++++
 .../streaming/TopWikipediaSessionsITCase.java   |  133 +++
 .../streaming/UnboundedSourceWrapperTest.java   |  464 ++++++++
 .../runners/flink/streaming/package-info.java   |   22 +
 .../src/test/resources/log4j-test.properties    |   27 +
 189 files changed, 15765 insertions(+), 17293 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy
----------------------------------------------------------------------
diff --git a/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy
index 411106d..5b228bc 100644
--- a/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy
+++ b/.test-infra/jenkins/job_beam_PostCommit_Java_ValidatesRunner_Flink.groovy
@@ -39,5 +39,5 @@ mavenJob('beam_PostCommit_Java_ValidatesRunner_Flink') {
     'Run Flink ValidatesRunner')
 
   // Maven goals for this job.
-  goals('-B -e clean verify -am -pl runners/flink/runner -Plocal-validates-runner-tests -Pvalidates-runner-tests')
+  goals('-B -e clean verify -am -pl runners/flink -Plocal-validates-runner-tests -Pvalidates-runner-tests')
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/examples/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/examples/pom.xml b/runners/flink/examples/pom.xml
deleted file mode 100644
index aaf76d9..0000000
--- a/runners/flink/examples/pom.xml
+++ /dev/null
@@ -1,130 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    (the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.beam</groupId>
-    <artifactId>beam-runners-flink-parent</artifactId>
-    <version>0.7.0-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>beam-runners-flink_2.10-examples</artifactId>
-
-  <name>Apache Beam :: Runners :: Flink :: Examples</name>
-
-  <packaging>jar</packaging>
-
-  <properties>
-    <!-- Default parameters for mvn exec:java -->
-    <flink.examples.input>kinglear.txt</flink.examples.input>
-    <flink.examples.output>wordcounts.txt</flink.examples.output>
-    <flink.examples.parallelism>-1</flink.examples.parallelism>
-  </properties>
-
-  <profiles>
-    <profile>
-      <id>disable-validates-runner-tests</id>
-      <activation>
-        <activeByDefault>true</activeByDefault>
-      </activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <executions>
-              <execution>
-                <id>validates-runner-tests</id>
-                <configuration>
-                  <skip>true</skip>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>beam-sdks-java-extensions-gcp-core</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>beam-runners-flink_2.10</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.flink</groupId>
-      <artifactId>flink-connector-kafka-0.8_2.10</artifactId>
-      <version>${flink.version}</version>
-    </dependency>
-
-  </dependencies>
-
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-      </plugin>
-
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-dependency-plugin</artifactId>
-        <executions>
-          <execution>
-            <goals><goal>analyze-only</goal></goals>
-            <configuration>
-              <!-- disable for now until dependencies are cleaned up -->
-              <failOnWarning>false</failOnWarning>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>exec-maven-plugin</artifactId>
-        <configuration>
-          <executable>java</executable>
-          <arguments>
-            <argument>--runner=org.apache.beam.runners.flink.FlinkRunner</argument>
-            <argument>--parallelism=${flink.examples.parallelism}</argument>
-            <argument>--input=${flink.examples.input}</argument>
-            <argument>--output=${flink.examples.output}</argument>
-          </arguments>
-        </configuration>
-      </plugin>
-
-    </plugins>
-
-  </build>
-
-</project>

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/TFIDF.java
----------------------------------------------------------------------
diff --git a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/TFIDF.java b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/TFIDF.java
deleted file mode 100644
index 8e1df08..0000000
--- a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/TFIDF.java
+++ /dev/null
@@ -1,455 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.examples;
-
-import java.io.File;
-import java.io.IOException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.util.HashSet;
-import java.util.Set;
-import org.apache.beam.runners.flink.FlinkPipelineOptions;
-import org.apache.beam.runners.flink.FlinkRunner;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.coders.StringDelegateCoder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.GcsOptions;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.options.Validation;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.Distinct;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.Flatten;
-import org.apache.beam.sdk.transforms.Keys;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.Values;
-import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.transforms.WithKeys;
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGroupByKey;
-import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
-import org.apache.beam.sdk.util.GcsUtil;
-import org.apache.beam.sdk.util.gcsfs.GcsPath;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PBegin;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionList;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.PDone;
-import org.apache.beam.sdk.values.TupleTag;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * An example that computes a basic TF-IDF search table for a directory or GCS prefix.
- *
- * <p>Concepts: joining data; side inputs; logging
- *
- * <p>To execute this pipeline locally, specify general pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- * }</pre>
- * and a local output file or output prefix on GCS:
- * <pre>{@code
- *   --output=[YOUR_LOCAL_FILE | gs://YOUR_OUTPUT_PREFIX]
- * }</pre>
- *
- * <p>To execute this pipeline using the Dataflow service, specify pipeline configuration:
- * <pre>{@code
- *   --project=YOUR_PROJECT_ID
- *   --stagingLocation=gs://YOUR_STAGING_DIRECTORY
- *   --runner=BlockingDataflowRunner
- * and an output prefix on GCS:
- *   --output=gs://YOUR_OUTPUT_PREFIX
- * }</pre>
- *
- * <p>The default input is {@code gs://dataflow-samples/shakespeare/} and can be overridden with
- * {@code --input}.
- */
-public class TFIDF {
-  /**
-   * Options supported by {@link TFIDF}.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  private interface Options extends PipelineOptions, FlinkPipelineOptions {
-    @Description("Path to the directory or GCS prefix containing files to read from")
-    @Default.String("gs://dataflow-samples/shakespeare/")
-    String getInput();
-    void setInput(String value);
-
-    @Description("Prefix of output URI to write to")
-    @Validation.Required
-    String getOutput();
-    void setOutput(String value);
-  }
-
-  /**
-   * Lists documents contained beneath the {@code options.input} prefix/directory.
-   */
-  public static Set<URI> listInputDocuments(Options options)
-      throws URISyntaxException, IOException {
-    URI baseUri = new URI(options.getInput());
-
-    // List all documents in the directory or GCS prefix.
-    URI absoluteUri;
-    if (baseUri.getScheme() != null) {
-      absoluteUri = baseUri;
-    } else {
-      absoluteUri = new URI(
-          "file",
-          baseUri.getAuthority(),
-          baseUri.getPath(),
-          baseUri.getQuery(),
-          baseUri.getFragment());
-    }
-
-    Set<URI> uris = new HashSet<>();
-    if (absoluteUri.getScheme().equals("file")) {
-      File directory = new File(absoluteUri);
-      String[] directoryListing = directory.list();
-      if (directoryListing == null) {
-        throw new IOException(
-            "Directory " + absoluteUri + " is not a valid path or IO Error occurred.");
-      }
-      for (String entry : directoryListing) {
-        File path = new File(directory, entry);
-        uris.add(path.toURI());
-      }
-    } else if (absoluteUri.getScheme().equals("gs")) {
-      GcsUtil gcsUtil = options.as(GcsOptions.class).getGcsUtil();
-      URI gcsUriGlob = new URI(
-          absoluteUri.getScheme(),
-          absoluteUri.getAuthority(),
-          absoluteUri.getPath() + "*",
-          absoluteUri.getQuery(),
-          absoluteUri.getFragment());
-      for (GcsPath entry : gcsUtil.expand(GcsPath.fromUri(gcsUriGlob))) {
-        uris.add(entry.toUri());
-      }
-    }
-
-    return uris;
-  }
-
-  /**
-   * Reads the documents at the provided uris and returns all lines
-   * from the documents tagged with which document they are from.
-   */
-  public static class ReadDocuments
-      extends PTransform<PBegin, PCollection<KV<URI, String>>> {
-    private static final long serialVersionUID = 0;
-
-    // transient because PTransform is not really meant to be serialized.
-    // see note on PTransform
-    private final transient Iterable<URI> uris;
-
-    public ReadDocuments(Iterable<URI> uris) {
-      this.uris = uris;
-    }
-
-    @Override
-    public Coder<?> getDefaultOutputCoder() {
-      return KvCoder.of(StringDelegateCoder.of(URI.class), StringUtf8Coder.of());
-    }
-
-    @Override
-    public PCollection<KV<URI, String>> expand(PBegin input) {
-      Pipeline pipeline = input.getPipeline();
-
-      // Create one TextIO.Read transform for each document
-      // and add its output to a PCollectionList
-      PCollectionList<KV<URI, String>> urisToLines =
-          PCollectionList.empty(pipeline);
-
-      // TextIO.Read supports:
-      //  - file: URIs and paths locally
-      //  - gs: URIs on the service
-      for (final URI uri : uris) {
-        String uriString;
-        if (uri.getScheme().equals("file")) {
-          uriString = new File(uri).getPath();
-        } else {
-          uriString = uri.toString();
-        }
-
-        PCollection<KV<URI, String>> oneUriToLines = pipeline
-            .apply("TextIO.Read(" + uriString + ")", TextIO.Read.from(uriString))
-            .apply("WithKeys(" + uriString + ")", WithKeys.<URI, String>of(uri));
-
-        urisToLines = urisToLines.and(oneUriToLines);
-      }
-
-      return urisToLines.apply(Flatten.<KV<URI, String>>pCollections());
-    }
-  }
-
-  /**
-   * A transform containing a basic TF-IDF pipeline. The input consists of KV objects
-   * where the key is the document's URI and the value is a piece
-   * of the document's content. The output is mapping from terms to
-   * scores for each document URI.
-   */
-  public static class ComputeTfIdf
-      extends PTransform<PCollection<KV<URI, String>>, PCollection<KV<String, KV<URI, Double>>>> {
-    private static final long serialVersionUID = 0;
-
-    public ComputeTfIdf() { }
-
-    @Override
-    public PCollection<KV<String, KV<URI, Double>>> expand(
-        PCollection<KV<URI, String>> uriToContent) {
-
-      // Compute the total number of documents, and
-      // prepare this singleton PCollectionView for
-      // use as a side input.
-      final PCollectionView<Long> totalDocuments =
-          uriToContent
-              .apply("GetURIs", Keys.<URI>create())
-              .apply("DistinctDocs", Distinct.<URI>create())
-              .apply(Count.<URI>globally())
-              .apply(View.<Long>asSingleton());
-
-      // Create a collection of pairs mapping a URI to each
-      // of the words in the document associated with that that URI.
-      PCollection<KV<URI, String>> uriToWords = uriToContent
-          .apply("SplitWords", ParDo.of(new DoFn<KV<URI, String>, KV<URI, String>>() {
-            private static final long serialVersionUID = 0;
-
-            @ProcessElement
-            public void processElement(ProcessContext c) {
-              URI uri = c.element().getKey();
-              String line = c.element().getValue();
-              for (String word : line.split("\\W+")) {
-                // Log INFO messages when the word \u201clove\u201d is found.
-                if (word.toLowerCase().equals("love")) {
-                  LOG.info("Found {}", word.toLowerCase());
-                }
-
-                if (!word.isEmpty()) {
-                  c.output(KV.of(uri, word.toLowerCase()));
-                }
-              }
-            }
-          }));
-
-      // Compute a mapping from each word to the total
-      // number of documents in which it appears.
-      PCollection<KV<String, Long>> wordToDocCount = uriToWords
-          .apply("DistinctWords", Distinct.<KV<URI, String>>create())
-          .apply(Values.<String>create())
-          .apply("CountDocs", Count.<String>perElement());
-
-      // Compute a mapping from each URI to the total
-      // number of words in the document associated with that URI.
-      PCollection<KV<URI, Long>> uriToWordTotal = uriToWords
-          .apply("GetURIs2", Keys.<URI>create())
-          .apply("CountWords", Count.<URI>perElement());
-
-      // Count, for each (URI, word) pair, the number of
-      // occurrences of that word in the document associated
-      // with the URI.
-      PCollection<KV<KV<URI, String>, Long>> uriAndWordToCount = uriToWords
-          .apply("CountWordDocPairs", Count.<KV<URI, String>>perElement());
-
-      // Adjust the above collection to a mapping from
-      // (URI, word) pairs to counts into an isomorphic mapping
-      // from URI to (word, count) pairs, to prepare for a join
-      // by the URI key.
-      PCollection<KV<URI, KV<String, Long>>> uriToWordAndCount = uriAndWordToCount
-          .apply("ShiftKeys", ParDo.of(
-              new DoFn<KV<KV<URI, String>, Long>, KV<URI, KV<String, Long>>>() {
-                private static final long serialVersionUID = 0;
-
-                @ProcessElement
-                public void processElement(ProcessContext c) {
-                  URI uri = c.element().getKey().getKey();
-                  String word = c.element().getKey().getValue();
-                  Long occurrences = c.element().getValue();
-                  c.output(KV.of(uri, KV.of(word, occurrences)));
-                }
-              }));
-
-      // Prepare to join the mapping of URI to (word, count) pairs with
-      // the mapping of URI to total word counts, by associating
-      // each of the input PCollection<KV<URI, ...>> with
-      // a tuple tag. Each input must have the same key type, URI
-      // in this case. The type parameter of the tuple tag matches
-      // the types of the values for each collection.
-      final TupleTag<Long> wordTotalsTag = new TupleTag<>();
-      final TupleTag<KV<String, Long>> wordCountsTag = new TupleTag<>();
-      KeyedPCollectionTuple<URI> coGbkInput = KeyedPCollectionTuple
-          .of(wordTotalsTag, uriToWordTotal)
-          .and(wordCountsTag, uriToWordAndCount);
-
-      // Perform a CoGroupByKey (a sort of pre-join) on the prepared
-      // inputs. This yields a mapping from URI to a CoGbkResult
-      // (CoGroupByKey Result). The CoGbkResult is a mapping
-      // from the above tuple tags to the values in each input
-      // associated with a particular URI. In this case, each
-      // KV<URI, CoGbkResult> group a URI with the total number of
-      // words in that document as well as all the (word, count)
-      // pairs for particular words.
-      PCollection<KV<URI, CoGbkResult>> uriToWordAndCountAndTotal = coGbkInput
-          .apply("CoGroupByUri", CoGroupByKey.<URI>create());
-
-      // Compute a mapping from each word to a (URI, term frequency)
-      // pair for each URI. A word's term frequency for a document
-      // is simply the number of times that word occurs in the document
-      // divided by the total number of words in the document.
-      PCollection<KV<String, KV<URI, Double>>> wordToUriAndTf = uriToWordAndCountAndTotal
-          .apply("ComputeTermFrequencies", ParDo.of(
-              new DoFn<KV<URI, CoGbkResult>, KV<String, KV<URI, Double>>>() {
-                private static final long serialVersionUID = 0;
-
-                @ProcessElement
-                public void processElement(ProcessContext c) {
-                  URI uri = c.element().getKey();
-                  Long wordTotal = c.element().getValue().getOnly(wordTotalsTag);
-
-                  for (KV<String, Long> wordAndCount
-                      : c.element().getValue().getAll(wordCountsTag)) {
-                    String word = wordAndCount.getKey();
-                    Long wordCount = wordAndCount.getValue();
-                    Double termFrequency = wordCount.doubleValue() / wordTotal.doubleValue();
-                    c.output(KV.of(word, KV.of(uri, termFrequency)));
-                  }
-                }
-              }));
-
-      // Compute a mapping from each word to its document frequency.
-      // A word's document frequency in a corpus is the number of
-      // documents in which the word appears divided by the total
-      // number of documents in the corpus. Note how the total number of
-      // documents is passed as a side input; the same value is
-      // presented to each invocation of the DoFn.
-      PCollection<KV<String, Double>> wordToDf = wordToDocCount
-          .apply("ComputeDocFrequencies", ParDo
-              .of(new DoFn<KV<String, Long>, KV<String, Double>>() {
-                private static final long serialVersionUID = 0;
-
-                @ProcessElement
-                public void processElement(ProcessContext c) {
-                  String word = c.element().getKey();
-                  Long documentCount = c.element().getValue();
-                  Long documentTotal = c.sideInput(totalDocuments);
-                  Double documentFrequency = documentCount.doubleValue()
-                      / documentTotal.doubleValue();
-
-                  c.output(KV.of(word, documentFrequency));
-                }
-              }).withSideInputs(totalDocuments));
-
-      // Join the term frequency and document frequency
-      // collections, each keyed on the word.
-      final TupleTag<KV<URI, Double>> tfTag = new TupleTag<>();
-      final TupleTag<Double> dfTag = new TupleTag<>();
-      PCollection<KV<String, CoGbkResult>> wordToUriAndTfAndDf = KeyedPCollectionTuple
-          .of(tfTag, wordToUriAndTf)
-          .and(dfTag, wordToDf)
-          .apply(CoGroupByKey.<String>create());
-
-      // Compute a mapping from each word to a (URI, TF-IDF) score
-      // for each URI. There are a variety of definitions of TF-IDF
-      // ("term frequency - inverse document frequency") score;
-      // here we use a basic version that is the term frequency
-      // divided by the log of the document frequency.
-
-      return wordToUriAndTfAndDf
-          .apply("ComputeTfIdf", ParDo.of(
-              new DoFn<KV<String, CoGbkResult>, KV<String, KV<URI, Double>>>() {
-                private static final long serialVersionUID = 0;
-
-                @ProcessElement
-                public void processElement(ProcessContext c) {
-                  String word = c.element().getKey();
-                  Double df = c.element().getValue().getOnly(dfTag);
-
-                  for (KV<URI, Double> uriAndTf : c.element().getValue().getAll(tfTag)) {
-                    URI uri = uriAndTf.getKey();
-                    Double tf = uriAndTf.getValue();
-                    Double tfIdf = tf * Math.log(1 / df);
-                    c.output(KV.of(word, KV.of(uri, tfIdf)));
-                  }
-                }
-              }));
-    }
-
-    // Instantiate Logger.
-    // It is suggested that the user specify the class name of the containing class
-    // (in this case ComputeTfIdf).
-    private static final Logger LOG = LoggerFactory.getLogger(ComputeTfIdf.class);
-  }
-
-  /**
-   * A {@link PTransform} to write, in CSV format, a mapping from term and URI
-   * to score.
-   */
-  public static class WriteTfIdf
-      extends PTransform<PCollection<KV<String, KV<URI, Double>>>, PDone> {
-    private static final long serialVersionUID = 0;
-
-    private String output;
-
-    public WriteTfIdf(String output) {
-      this.output = output;
-    }
-
-    @Override
-    public PDone expand(PCollection<KV<String, KV<URI, Double>>> wordToUriAndTfIdf) {
-      return wordToUriAndTfIdf
-          .apply("Format", ParDo.of(new DoFn<KV<String, KV<URI, Double>>, String>() {
-            private static final long serialVersionUID = 0;
-
-            @ProcessElement
-            public void processElement(ProcessContext c) {
-              c.output(String.format("%s,\t%s,\t%f",
-                  c.element().getKey(),
-                  c.element().getValue().getKey(),
-                  c.element().getValue().getValue()));
-            }
-          }))
-          .apply(TextIO.Write
-              .to(output)
-              .withSuffix(".csv"));
-    }
-  }
-
-  public static void main(String[] args) throws Exception {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-
-    options.setRunner(FlinkRunner.class);
-
-    Pipeline pipeline = Pipeline.create(options);
-    pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class));
-
-    pipeline
-        .apply(new ReadDocuments(listInputDocuments(options)))
-        .apply(new ComputeTfIdf())
-        .apply(new WriteTfIdf(options.getOutput()));
-
-    pipeline.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/WordCount.java
----------------------------------------------------------------------
diff --git a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/WordCount.java b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/WordCount.java
deleted file mode 100644
index 6ae4cf8..0000000
--- a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/WordCount.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.examples;
-
-import org.apache.beam.runners.flink.FlinkPipelineOptions;
-import org.apache.beam.runners.flink.FlinkRunner;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.options.Validation;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.MapElements;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.SimpleFunction;
-import org.apache.beam.sdk.transforms.Sum;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-
-/**
- * Wordcount pipeline.
- */
-public class WordCount {
-
-  /**
-   * Function to extract words.
-   */
-  public static class ExtractWordsFn extends DoFn<String, String> {
-    private final Aggregator<Long, Long> emptyLines =
-        createAggregator("emptyLines", Sum.ofLongs());
-
-    @ProcessElement
-    public void processElement(ProcessContext c) {
-      if (c.element().trim().isEmpty()) {
-        emptyLines.addValue(1L);
-      }
-
-      // Split the line into words.
-      String[] words = c.element().split("[^a-zA-Z']+");
-
-      // Output each word encountered into the output PCollection.
-      for (String word : words) {
-        if (!word.isEmpty()) {
-          c.output(word);
-        }
-      }
-    }
-  }
-
-  /**
-   * PTransform counting words.
-   */
-  public static class CountWords extends PTransform<PCollection<String>,
-                    PCollection<KV<String, Long>>> {
-    @Override
-    public PCollection<KV<String, Long>> expand(PCollection<String> lines) {
-
-      // Convert lines of text into individual words.
-      PCollection<String> words = lines.apply(
-          ParDo.of(new ExtractWordsFn()));
-
-      // Count the number of times each word occurs.
-      PCollection<KV<String, Long>> wordCounts =
-          words.apply(Count.<String>perElement());
-
-      return wordCounts;
-    }
-  }
-
-  /** A SimpleFunction that converts a Word and Count into a printable string. */
-  public static class FormatAsTextFn extends SimpleFunction<KV<String, Long>, String> {
-    @Override
-    public String apply(KV<String, Long> input) {
-      return input.getKey() + ": " + input.getValue();
-    }
-  }
-
-  /**
-   * Options supported by {@link WordCount}.
-   *
-   * <p>Inherits standard configuration options.
-   */
-  public interface Options extends PipelineOptions, FlinkPipelineOptions {
-    @Description("Path of the file to read from")
-    String getInput();
-    void setInput(String value);
-
-    @Description("Path of the file to write to")
-    @Validation.Required
-    String getOutput();
-    void setOutput(String value);
-  }
-
-  public static void main(String[] args) {
-
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
-        .as(Options.class);
-    options.setRunner(FlinkRunner.class);
-
-    Pipeline p = Pipeline.create(options);
-
-    p.apply("ReadLines", TextIO.Read.from(options.getInput()))
-        .apply(new CountWords())
-        .apply(MapElements.via(new FormatAsTextFn()))
-        .apply("WriteCounts", TextIO.Write.to(options.getOutput()));
-
-    p.run();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/package-info.java b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/package-info.java
deleted file mode 100644
index b0ecb56..0000000
--- a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Flink Beam runner exemple.
- */
-package org.apache.beam.runners.flink.examples;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/AutoComplete.java
----------------------------------------------------------------------
diff --git a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/AutoComplete.java b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/AutoComplete.java
deleted file mode 100644
index d07df29..0000000
--- a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/AutoComplete.java
+++ /dev/null
@@ -1,400 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.examples.streaming;
-
-import java.io.IOException;
-import java.util.List;
-import org.apache.beam.runners.flink.FlinkRunner;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSocketSource;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.AvroCoder;
-import org.apache.beam.sdk.coders.DefaultCoder;
-import org.apache.beam.sdk.io.Read;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.Filter;
-import org.apache.beam.sdk.transforms.Flatten;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.Partition;
-import org.apache.beam.sdk.transforms.Partition.PartitionFn;
-import org.apache.beam.sdk.transforms.SerializableFunction;
-import org.apache.beam.sdk.transforms.Sum;
-import org.apache.beam.sdk.transforms.Top;
-import org.apache.beam.sdk.transforms.windowing.AfterWatermark;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.FixedWindows;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.transforms.windowing.WindowFn;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionList;
-import org.joda.time.Duration;
-
-/**
- * To run the example, first open a socket on a terminal by executing the command:
- * <ul>
- *   <li><code>nc -lk 9999</code>
- * </ul>
- * and then launch the example. Now whatever you type in the terminal is going to be
- * the input to the program.
- * */
-public class AutoComplete {
-
-  /**
-   * A PTransform that takes as input a list of tokens and returns
-   * the most common tokens per prefix.
-   */
-  public static class ComputeTopCompletions
-      extends PTransform<PCollection<String>, PCollection<KV<String, List<CompletionCandidate>>>> {
-    private static final long serialVersionUID = 0;
-
-    private final int candidatesPerPrefix;
-    private final boolean recursive;
-
-    protected ComputeTopCompletions(int candidatesPerPrefix, boolean recursive) {
-      this.candidatesPerPrefix = candidatesPerPrefix;
-      this.recursive = recursive;
-    }
-
-    public static ComputeTopCompletions top(int candidatesPerPrefix, boolean recursive) {
-      return new ComputeTopCompletions(candidatesPerPrefix, recursive);
-    }
-
-    @Override
-    public PCollection<KV<String, List<CompletionCandidate>>> expand(PCollection<String> input) {
-      PCollection<CompletionCandidate> candidates = input
-        // First count how often each token appears.
-        .apply(Count.<String>perElement())
-
-        // Map the KV outputs of Count into our own CompletionCandiate class.
-        .apply("CreateCompletionCandidates", ParDo.of(
-            new DoFn<KV<String, Long>, CompletionCandidate>() {
-              private static final long serialVersionUID = 0;
-
-              @ProcessElement
-              public void processElement(ProcessContext c) {
-                CompletionCandidate cand = new CompletionCandidate(c.element().getKey(),
-                    c.element().getValue());
-                c.output(cand);
-              }
-            }));
-
-      // Compute the top via either a flat or recursive algorithm.
-      if (recursive) {
-        return candidates
-          .apply(new ComputeTopRecursive(candidatesPerPrefix, 1))
-          .apply(Flatten.<KV<String, List<CompletionCandidate>>>pCollections());
-      } else {
-        return candidates
-          .apply(new ComputeTopFlat(candidatesPerPrefix, 1));
-      }
-    }
-  }
-
-  /**
-   * Lower latency, but more expensive.
-   */
-  private static class ComputeTopFlat
-      extends PTransform<PCollection<CompletionCandidate>,
-                         PCollection<KV<String, List<CompletionCandidate>>>> {
-    private static final long serialVersionUID = 0;
-
-    private final int candidatesPerPrefix;
-    private final int minPrefix;
-
-    public ComputeTopFlat(int candidatesPerPrefix, int minPrefix) {
-      this.candidatesPerPrefix = candidatesPerPrefix;
-      this.minPrefix = minPrefix;
-    }
-
-    @Override
-    public PCollection<KV<String, List<CompletionCandidate>>> expand(
-        PCollection<CompletionCandidate> input) {
-      return input
-        // For each completion candidate, map it to all prefixes.
-        .apply(ParDo.of(new AllPrefixes(minPrefix)))
-
-        // Find and return the top candiates for each prefix.
-        .apply(Top.<String, CompletionCandidate>largestPerKey(candidatesPerPrefix)
-             .withHotKeyFanout(new HotKeyFanout()));
-    }
-
-    private static class HotKeyFanout implements SerializableFunction<String, Integer> {
-      private static final long serialVersionUID = 0;
-
-      @Override
-      public Integer apply(String input) {
-        return (int) Math.pow(4, 5 - input.length());
-      }
-    }
-  }
-
-  /**
-   * Cheaper but higher latency.
-   *
-   * <p>Returns two PCollections, the first is top prefixes of size greater
-   * than minPrefix, and the second is top prefixes of size exactly
-   * minPrefix.
-   */
-  private static class ComputeTopRecursive
-      extends PTransform<PCollection<CompletionCandidate>,
-                         PCollectionList<KV<String, List<CompletionCandidate>>>> {
-    private static final long serialVersionUID = 0;
-
-    private final int candidatesPerPrefix;
-    private final int minPrefix;
-
-    public ComputeTopRecursive(int candidatesPerPrefix, int minPrefix) {
-      this.candidatesPerPrefix = candidatesPerPrefix;
-      this.minPrefix = minPrefix;
-    }
-
-    private class KeySizePartitionFn implements PartitionFn<KV<String, List<CompletionCandidate>>> {
-      private static final long serialVersionUID = 0;
-
-      @Override
-      public int partitionFor(KV<String, List<CompletionCandidate>> elem, int numPartitions) {
-        return elem.getKey().length() > minPrefix ? 0 : 1;
-      }
-    }
-
-    private static class FlattenTops
-        extends DoFn<KV<String, List<CompletionCandidate>>, CompletionCandidate> {
-      private static final long serialVersionUID = 0;
-
-      @ProcessElement
-      public void processElement(ProcessContext c) {
-        for (CompletionCandidate cc : c.element().getValue()) {
-          c.output(cc);
-        }
-      }
-    }
-
-    @Override
-    public PCollectionList<KV<String, List<CompletionCandidate>>> expand(
-          PCollection<CompletionCandidate> input) {
-        if (minPrefix > 10) {
-          // Base case, partitioning to return the output in the expected format.
-          return input
-            .apply(new ComputeTopFlat(candidatesPerPrefix, minPrefix))
-            .apply(Partition.of(2, new KeySizePartitionFn()));
-        } else {
-          // If a candidate is in the top N for prefix a...b, it must also be in the top
-          // N for a...bX for every X, which is typlically a much smaller set to consider.
-          // First, compute the top candidate for prefixes of size at least minPrefix + 1.
-          PCollectionList<KV<String, List<CompletionCandidate>>> larger = input
-            .apply(new ComputeTopRecursive(candidatesPerPrefix, minPrefix + 1));
-          // Consider the top candidates for each prefix of length minPrefix + 1...
-          PCollection<KV<String, List<CompletionCandidate>>> small =
-            PCollectionList
-            .of(larger.get(1).apply(ParDo.of(new FlattenTops())))
-            // ...together with those (previously excluded) candidates of length
-            // exactly minPrefix...
-            .and(input.apply(Filter.by(new SerializableFunction<CompletionCandidate, Boolean>() {
-              private static final long serialVersionUID = 0;
-
-              @Override
-              public Boolean apply(CompletionCandidate c) {
-                return c.getValue().length() == minPrefix;
-              }
-            })))
-            .apply("FlattenSmall", Flatten.<CompletionCandidate>pCollections())
-            // ...set the key to be the minPrefix-length prefix...
-            .apply(ParDo.of(new AllPrefixes(minPrefix, minPrefix)))
-            // ...and (re)apply the Top operator to all of them together.
-            .apply(Top.<String, CompletionCandidate>largestPerKey(candidatesPerPrefix));
-
-          PCollection<KV<String, List<CompletionCandidate>>> flattenLarger = larger
-              .apply("FlattenLarge", Flatten.<KV<String, List<CompletionCandidate>>>pCollections());
-
-          return PCollectionList.of(flattenLarger).and(small);
-        }
-    }
-  }
-
-  /**
-   * A DoFn that keys each candidate by all its prefixes.
-   */
-  private static class AllPrefixes
-      extends DoFn<CompletionCandidate, KV<String, CompletionCandidate>> {
-    private static final long serialVersionUID = 0;
-
-    private final int minPrefix;
-    private final int maxPrefix;
-    public AllPrefixes(int minPrefix) {
-      this(minPrefix, Integer.MAX_VALUE);
-    }
-    public AllPrefixes(int minPrefix, int maxPrefix) {
-      this.minPrefix = minPrefix;
-      this.maxPrefix = maxPrefix;
-    }
-    @ProcessElement
-      public void processElement(ProcessContext c) {
-      String word = c.element().value;
-      for (int i = minPrefix; i <= Math.min(word.length(), maxPrefix); i++) {
-        KV<String, CompletionCandidate> kv = KV.of(word.substring(0, i), c.element());
-        c.output(kv);
-      }
-    }
-  }
-
-  /**
-   * Class used to store tag-count pairs.
-   */
-  @DefaultCoder(AvroCoder.class)
-  static class CompletionCandidate implements Comparable<CompletionCandidate> {
-    private long count;
-    private String value;
-
-    public CompletionCandidate(String value, long count) {
-      this.value = value;
-      this.count = count;
-    }
-
-    public String getValue() {
-      return value;
-    }
-
-    // Empty constructor required for Avro decoding.
-    @SuppressWarnings("unused")
-    public CompletionCandidate() {}
-
-    @Override
-    public int compareTo(CompletionCandidate o) {
-      if (this.count < o.count) {
-        return -1;
-      } else if (this.count == o.count) {
-        return this.value.compareTo(o.value);
-      } else {
-        return 1;
-      }
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if (other instanceof CompletionCandidate) {
-        CompletionCandidate that = (CompletionCandidate) other;
-        return this.count == that.count && this.value.equals(that.value);
-      } else {
-        return false;
-      }
-    }
-
-    @Override
-    public int hashCode() {
-      return Long.valueOf(count).hashCode() ^ value.hashCode();
-    }
-
-    @Override
-    public String toString() {
-      return "CompletionCandidate[" + value + ", " + count + "]";
-    }
-  }
-
-  static class ExtractWordsFn extends DoFn<String, String> {
-    private final Aggregator<Long, Long> emptyLines =
-            createAggregator("emptyLines", Sum.ofLongs());
-
-    @ProcessElement
-    public void processElement(ProcessContext c) {
-      if (c.element().trim().isEmpty()) {
-        emptyLines.addValue(1L);
-      }
-
-      // Split the line into words.
-      String[] words = c.element().split("[^a-zA-Z']+");
-
-      // Output each word encountered into the output PCollection.
-      for (String word : words) {
-        if (!word.isEmpty()) {
-          c.output(word);
-        }
-      }
-    }
-  }
-
-  /**
-   * Takes as input a the top candidates per prefix, and emits an entity suitable for writing to
-   * Datastore.
-   */
-  static class FormatForPerTaskLocalFile
-      extends DoFn<KV<String, List<CompletionCandidate>>, String> {
-
-    private static final long serialVersionUID = 0;
-
-    @ProcessElement
-    public void processElement(ProcessContext c, BoundedWindow window) {
-      StringBuilder str = new StringBuilder();
-      KV<String, List<CompletionCandidate>> elem = c.element();
-
-      str.append(elem.getKey() + " @ " + window + " -> ");
-      for (CompletionCandidate cand: elem.getValue()) {
-        str.append(cand.toString() + " ");
-      }
-      System.out.println(str.toString());
-      c.output(str.toString());
-    }
-  }
-
-  /**
-   * Options supported by this class.
-   *
-   * <p>Inherits standard Dataflow configuration options.
-   */
-  private interface Options extends WindowedWordCount.StreamingWordCountOptions {
-    @Description("Whether to use the recursive algorithm")
-    @Default.Boolean(true)
-    Boolean getRecursive();
-    void setRecursive(Boolean value);
-  }
-
-  public static void main(String[] args) throws IOException {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    options.setStreaming(true);
-    options.setCheckpointingInterval(1000L);
-    options.setNumberOfExecutionRetries(5);
-    options.setExecutionRetryDelay(3000L);
-    options.setRunner(FlinkRunner.class);
-
-
-    WindowFn<Object, ?> windowFn =
-        FixedWindows.of(Duration.standardSeconds(options.getWindowSize()));
-
-    // Create the pipeline.
-    Pipeline p = Pipeline.create(options);
-    PCollection<KV<String, List<CompletionCandidate>>> toWrite = p
-      .apply("WordStream", Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)))
-      .apply(ParDo.of(new ExtractWordsFn()))
-      .apply(Window.<String>into(windowFn)
-              .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
-            .discardingFiredPanes())
-      .apply(ComputeTopCompletions.top(10, options.getRecursive()));
-
-    toWrite
-      .apply("FormatForPerTaskFile", ParDo.of(new FormatForPerTaskLocalFile()))
-      .apply(TextIO.Write.to("./outputAutoComplete.txt"));
-
-    p.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/JoinExamples.java
----------------------------------------------------------------------
diff --git a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/JoinExamples.java b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/JoinExamples.java
deleted file mode 100644
index 8fefc9f..0000000
--- a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/JoinExamples.java
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.examples.streaming;
-
-import org.apache.beam.runners.flink.FlinkRunner;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSocketSource;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.Read;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.join.CoGbkResult;
-import org.apache.beam.sdk.transforms.join.CoGroupByKey;
-import org.apache.beam.sdk.transforms.join.KeyedPCollectionTuple;
-import org.apache.beam.sdk.transforms.windowing.AfterWatermark;
-import org.apache.beam.sdk.transforms.windowing.FixedWindows;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.transforms.windowing.WindowFn;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.TupleTag;
-import org.joda.time.Duration;
-
-/**
- * To run the example, first open two sockets on two terminals by executing the commands:
- * <ul>
- *   <li><code>nc -lk 9999</code>, and
- *   <li><code>nc -lk 9998</code>
- * </ul>
- * and then launch the example. Now whatever you type in the terminal is going to be
- * the input to the program.
- * */
-public class JoinExamples {
-
-  static PCollection<String> joinEvents(PCollection<String> streamA,
-                      PCollection<String> streamB) throws Exception {
-
-    final TupleTag<String> firstInfoTag = new TupleTag<>();
-    final TupleTag<String> secondInfoTag = new TupleTag<>();
-
-    // transform both input collections to tuple collections, where the keys are country
-    // codes in both cases.
-    PCollection<KV<String, String>> firstInfo = streamA.apply(
-        ParDo.of(new ExtractEventDataFn()));
-    PCollection<KV<String, String>> secondInfo = streamB.apply(
-        ParDo.of(new ExtractEventDataFn()));
-
-    // country code 'key' -> CGBKR (<event info>, <country name>)
-    PCollection<KV<String, CoGbkResult>> kvpCollection = KeyedPCollectionTuple
-        .of(firstInfoTag, firstInfo)
-        .and(secondInfoTag, secondInfo)
-        .apply(CoGroupByKey.<String>create());
-
-    // Process the CoGbkResult elements generated by the CoGroupByKey transform.
-    // country code 'key' -> string of <event info>, <country name>
-    PCollection<KV<String, String>> finalResultCollection =
-        kvpCollection.apply("Process", ParDo.of(
-            new DoFn<KV<String, CoGbkResult>, KV<String, String>>() {
-              private static final long serialVersionUID = 0;
-
-              @ProcessElement
-              public void processElement(ProcessContext c) {
-                KV<String, CoGbkResult> e = c.element();
-                String key = e.getKey();
-
-                String defaultA = "NO_VALUE";
-
-                // the following getOnly is a bit tricky because it expects to have
-                // EXACTLY ONE value in the corresponding stream and for the corresponding key.
-
-                String lineA = e.getValue().getOnly(firstInfoTag, defaultA);
-                for (String lineB : c.element().getValue().getAll(secondInfoTag)) {
-                  // Generate a string that combines information from both collection values
-                  c.output(KV.of(key, "Value A: " + lineA + " - Value B: " + lineB));
-                }
-              }
-            }));
-
-    return finalResultCollection
-        .apply("Format", ParDo.of(new DoFn<KV<String, String>, String>() {
-          private static final long serialVersionUID = 0;
-
-          @ProcessElement
-          public void processElement(ProcessContext c) {
-            String result = c.element().getKey() + " -> " + c.element().getValue();
-            System.out.println(result);
-            c.output(result);
-          }
-        }));
-  }
-
-  static class ExtractEventDataFn extends DoFn<String, KV<String, String>> {
-    private static final long serialVersionUID = 0;
-
-    @ProcessElement
-    public void processElement(ProcessContext c) {
-      String line = c.element().toLowerCase();
-      String key = line.split("\\s")[0];
-      c.output(KV.of(key, line));
-    }
-  }
-
-  private interface Options extends WindowedWordCount.StreamingWordCountOptions {
-
-  }
-
-  public static void main(String[] args) throws Exception {
-    Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
-    options.setStreaming(true);
-    options.setCheckpointingInterval(1000L);
-    options.setNumberOfExecutionRetries(5);
-    options.setExecutionRetryDelay(3000L);
-    options.setRunner(FlinkRunner.class);
-
-    WindowFn<Object, ?> windowFn = FixedWindows.of(
-        Duration.standardSeconds(options.getWindowSize()));
-
-    Pipeline p = Pipeline.create(options);
-
-    // the following two 'applys' create multiple inputs to our pipeline, one for each
-    // of our two input sources.
-    PCollection<String> streamA = p
-        .apply("FirstStream", Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)))
-        .apply(Window.<String>into(windowFn)
-            .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
-            .discardingFiredPanes());
-    PCollection<String> streamB = p
-        .apply("SecondStream", Read.from(new UnboundedSocketSource<>("localhost", 9998, '\n', 3)))
-        .apply(Window.<String>into(windowFn)
-            .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
-            .discardingFiredPanes());
-
-    PCollection<String> formattedResults = joinEvents(streamA, streamB);
-    formattedResults.apply(TextIO.Write.to("./outputJoin.txt"));
-    p.run();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/WindowedWordCount.java
----------------------------------------------------------------------
diff --git a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/WindowedWordCount.java b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/WindowedWordCount.java
deleted file mode 100644
index 792c214..0000000
--- a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/WindowedWordCount.java
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.examples.streaming;
-
-import java.io.IOException;
-import org.apache.beam.runners.flink.FlinkRunner;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSocketSource;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.Read;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.apache.beam.sdk.transforms.Count;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.Sum;
-import org.apache.beam.sdk.transforms.windowing.AfterWatermark;
-import org.apache.beam.sdk.transforms.windowing.SlidingWindows;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.joda.time.Duration;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * To run the example, first open a socket on a terminal by executing the command:
- * <ul>
- *   <li><code>nc -lk 9999</code>
- * </ul>
- * and then launch the example. Now whatever you type in the terminal is going to be
- * the input to the program.
- * */
-public class WindowedWordCount {
-
-  private static final Logger LOG = LoggerFactory.getLogger(WindowedWordCount.class);
-
-  static final long WINDOW_SIZE = 10;  // Default window duration in seconds
-  static final long SLIDE_SIZE = 5;  // Default window slide in seconds
-
-  static class FormatAsStringFn extends DoFn<KV<String, Long>, String> {
-    @ProcessElement
-    public void processElement(ProcessContext c) {
-      String row = c.element().getKey() + " - " + c.element().getValue() + " @ "
-          + c.timestamp().toString();
-      c.output(row);
-    }
-  }
-
-  static class ExtractWordsFn extends DoFn<String, String> {
-    private final Aggregator<Long, Long> emptyLines =
-        createAggregator("emptyLines", Sum.ofLongs());
-
-    @ProcessElement
-    public void processElement(ProcessContext c) {
-      if (c.element().trim().isEmpty()) {
-        emptyLines.addValue(1L);
-      }
-
-      // Split the line into words.
-      String[] words = c.element().split("[^a-zA-Z']+");
-
-      // Output each word encountered into the output PCollection.
-      for (String word : words) {
-        if (!word.isEmpty()) {
-          c.output(word);
-        }
-      }
-    }
-  }
-
-  /**
-   * Pipeline options.
-   */
-  public interface StreamingWordCountOptions
-      extends org.apache.beam.runners.flink.examples.WordCount.Options {
-    @Description("Sliding window duration, in seconds")
-    @Default.Long(WINDOW_SIZE)
-    Long getWindowSize();
-
-    void setWindowSize(Long value);
-
-    @Description("Window slide, in seconds")
-    @Default.Long(SLIDE_SIZE)
-    Long getSlide();
-
-    void setSlide(Long value);
-  }
-
-  public static void main(String[] args) throws IOException {
-    StreamingWordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
-        .as(StreamingWordCountOptions.class);
-    options.setStreaming(true);
-    options.setWindowSize(10L);
-    options.setSlide(5L);
-    options.setCheckpointingInterval(1000L);
-    options.setNumberOfExecutionRetries(5);
-    options.setExecutionRetryDelay(3000L);
-    options.setRunner(FlinkRunner.class);
-
-    LOG.info("Windpwed WordCount with Sliding Windows of " + options.getWindowSize()
-        + " sec. and a slide of " + options.getSlide());
-
-    Pipeline pipeline = Pipeline.create(options);
-
-    PCollection<String> words = pipeline
-        .apply("StreamingWordCount",
-            Read.from(new UnboundedSocketSource<>("localhost", 9999, '\n', 3)))
-        .apply(ParDo.of(new ExtractWordsFn()))
-        .apply(Window.<String>into(SlidingWindows.of(
-            Duration.standardSeconds(options.getWindowSize()))
-            .every(Duration.standardSeconds(options.getSlide())))
-            .triggering(AfterWatermark.pastEndOfWindow()).withAllowedLateness(Duration.ZERO)
-            .discardingFiredPanes());
-
-    PCollection<KV<String, Long>> wordCounts =
-        words.apply(Count.<String>perElement());
-
-    wordCounts.apply(ParDo.of(new FormatAsStringFn()))
-        .apply(TextIO.Write.to("./outputWordCount.txt"));
-
-    pipeline.run();
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/package-info.java b/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/package-info.java
deleted file mode 100644
index 58f41b6..0000000
--- a/runners/flink/examples/src/main/java/org/apache/beam/runners/flink/examples/streaming/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Flink Beam runner exemple.
- */
-package org.apache.beam.runners.flink.examples.streaming;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/pom.xml b/runners/flink/pom.xml
index a5c5ea0..351035e 100644
--- a/runners/flink/pom.xml
+++ b/runners/flink/pom.xml
@@ -26,22 +26,97 @@
     <relativePath>../pom.xml</relativePath>
   </parent>
 
-  <artifactId>beam-runners-flink-parent</artifactId>
+  <artifactId>beam-runners-flink</artifactId>
   <name>Apache Beam :: Runners :: Flink</name>
-
-  <packaging>pom</packaging>
-
-  <modules>
-    <module>runner</module>
-    <module>examples</module>
-  </modules>
+  <packaging>jar</packaging>
 
   <properties>
-    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
     <flink.version>1.2.0</flink.version>
   </properties>
 
+  <profiles>
+    <profile>
+      <id>local-validates-runner-tests</id>
+      <activation><activeByDefault>false</activeByDefault></activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-surefire-plugin</artifactId>
+            <executions>
+
+              <!-- This configures the inherited validates-runner-tests
+                   execution to execute with a local Flink instance. -->
+              <execution>
+                <id>validates-runner-tests</id>
+                <phase>integration-test</phase>
+                <goals>
+                  <goal>test</goal>
+                </goals>
+                <configuration>
+                  <groups>org.apache.beam.sdk.testing.ValidatesRunner</groups>
+                  <excludedGroups>
+                    org.apache.beam.sdk.testing.FlattenWithHeterogeneousCoders,
+                    org.apache.beam.sdk.testing.UsesSplittableParDo,
+                    org.apache.beam.sdk.testing.UsesAttemptedMetrics,
+                    org.apache.beam.sdk.testing.UsesCommittedMetrics,
+                    org.apache.beam.sdk.testing.UsesTestStream
+                  </excludedGroups>
+                  <parallel>none</parallel>
+                  <failIfNoTests>true</failIfNoTests>
+                  <dependenciesToScan>
+                    <dependency>org.apache.beam:beam-sdks-java-core</dependency>
+                  </dependenciesToScan>
+                  <systemPropertyVariables>
+                    <beamTestPipelineOptions>
+                      [
+                      "--runner=TestFlinkRunner",
+                      "--streaming=false"
+                      ]
+                    </beamTestPipelineOptions>
+                  </systemPropertyVariables>
+                </configuration>
+              </execution>
+
+              <!-- This second execution runs the tests in streaming mode -->
+              <execution>
+                <id>streaming-validates-runner-tests</id>
+                <phase>integration-test</phase>
+                <goals>
+                  <goal>test</goal>
+                </goals>
+                <configuration>
+                  <groups>org.apache.beam.sdk.testing.ValidatesRunner</groups>
+                  <excludedGroups>
+                    org.apache.beam.sdk.testing.FlattenWithHeterogeneousCoders,
+                    org.apache.beam.sdk.testing.UsesSetState,
+                    org.apache.beam.sdk.testing.UsesMapState,
+                    org.apache.beam.sdk.testing.UsesAttemptedMetrics,
+                    org.apache.beam.sdk.testing.UsesCommittedMetrics,
+                    org.apache.beam.sdk.testing.UsesTestStream
+                  </excludedGroups>
+                  <parallel>none</parallel>
+                  <failIfNoTests>true</failIfNoTests>
+                  <dependenciesToScan>
+                    <dependency>org.apache.beam:beam-sdks-java-core</dependency>
+                  </dependenciesToScan>
+                  <systemPropertyVariables>
+                    <beamTestPipelineOptions>
+                      [
+                      "--runner=TestFlinkRunner",
+                      "--streaming=true"
+                      ]
+                    </beamTestPipelineOptions>
+                  </systemPropertyVariables>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+
   <build>
     <pluginManagement>
       <plugins>
@@ -89,19 +164,103 @@
     <!-- Flink dependencies -->
     <dependency>
       <groupId>org.apache.flink</groupId>
+      <artifactId>flink-clients_2.10</artifactId>
+      <version>${flink.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.flink</groupId>
+      <artifactId>flink-core</artifactId>
+      <version>${flink.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.flink</groupId>
+      <artifactId>flink-java</artifactId>
+      <version>${flink.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.flink</groupId>
+      <artifactId>flink-runtime_2.10</artifactId>
+      <version>${flink.version}</version>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.flink</groupId>
       <artifactId>flink-streaming-java_2.10</artifactId>
       <version>${flink.version}</version>
     </dependency>
 
+    <!-- For testing -->
     <dependency>
       <groupId>org.apache.flink</groupId>
       <artifactId>flink-core</artifactId>
       <version>${flink.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.flink</groupId>
+      <artifactId>flink-runtime_2.10</artifactId>
+      <version>${flink.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
     </dependency>
 
+    <!-- Beam -->
     <dependency>
       <groupId>org.apache.beam</groupId>
       <artifactId>beam-sdks-java-core</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-jdk14</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-runners-core-java</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-jdk14</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-runners-core-construction-java</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-jdk14</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-annotations</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-databind</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
     </dependency>
 
     <dependency>
@@ -113,5 +272,101 @@
       <groupId>joda-time</groupId>
       <artifactId>joda-time</artifactId>
     </dependency>
+
+    <!--
+    Force an upgrade on the version of Apache Commons from Flink to support DEFLATE compression.
+    -->
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-compress</artifactId>
+      <scope>runtime</scope>
+    </dependency>
+
+    <!-- Test scoped -->
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-lang3</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.hamcrest</groupId>
+      <artifactId>hamcrest-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-all</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <!-- Depend on test jar to scan for ValidatesRunner tests -->
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-sdks-java-core</artifactId>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.slf4j</groupId>
+          <artifactId>slf4j-jdk14</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <dependency>
+      <groupId>com.google.apis</groupId>
+      <artifactId>google-api-services-bigquery</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.flink</groupId>
+      <artifactId>flink-streaming-java_2.10</artifactId>
+      <version>${flink.version}</version>
+      <scope>test</scope>
+      <type>test-jar</type>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.flink</groupId>
+      <artifactId>flink-test-utils_2.10</artifactId>
+      <version>${flink.version}</version>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <artifactId>apacheds-jdbm1</artifactId>
+          <groupId>org.apache.directory.jdbm</groupId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+
+    <!-- Optional Pipeline Registration -->
+    <dependency>
+      <groupId>com.google.auto.service</groupId>
+      <artifactId>auto-service</artifactId>
+      <optional>true</optional>
+    </dependency>
+
+    <!-- transitive test dependencies from beam-sdk-java-core -->
+    <dependency>
+      <groupId>com.fasterxml.jackson.dataformat</groupId>
+      <artifactId>jackson-dataformat-yaml</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <dependency>
+      <groupId>org.apache.beam</groupId>
+      <artifactId>beam-sdks-common-fn-api</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 </project>

[31/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java
new file mode 100644
index 0000000..ba00036
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import java.util.List;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.flink.api.common.JobExecutionResult;
+import org.apache.flink.api.java.CollectionEnvironment;
+import org.apache.flink.api.java.ExecutionEnvironment;
+import org.apache.flink.runtime.state.AbstractStateBackend;
+import org.apache.flink.streaming.api.TimeCharacteristic;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * The class that instantiates and manages the execution of a given job.
+ * Depending on if the job is a Streaming or Batch processing one, it creates
+ * the adequate execution environment ({@link ExecutionEnvironment}
+ * or {@link StreamExecutionEnvironment}), the necessary {@link FlinkPipelineTranslator}
+ * ({@link FlinkBatchPipelineTranslator} or {@link FlinkStreamingPipelineTranslator}) to
+ * transform the Beam job into a Flink one, and executes the (translated) job.
+ */
+class FlinkPipelineExecutionEnvironment {
+
+  private static final Logger LOG =
+      LoggerFactory.getLogger(FlinkPipelineExecutionEnvironment.class);
+
+  private final FlinkPipelineOptions options;
+
+  /**
+   * The Flink Batch execution environment. This is instantiated to either a
+   * {@link org.apache.flink.api.java.CollectionEnvironment},
+   * a {@link org.apache.flink.api.java.LocalEnvironment} or
+   * a {@link org.apache.flink.api.java.RemoteEnvironment}, depending on the configuration
+   * options.
+   */
+  private ExecutionEnvironment flinkBatchEnv;
+
+  /**
+   * The Flink Streaming execution environment. This is instantiated to either a
+   * {@link org.apache.flink.streaming.api.environment.LocalStreamEnvironment} or
+   * a {@link org.apache.flink.streaming.api.environment.RemoteStreamEnvironment}, depending
+   * on the configuration options, and more specifically, the url of the master.
+   */
+  private StreamExecutionEnvironment flinkStreamEnv;
+
+  /**
+   * Creates a {@link FlinkPipelineExecutionEnvironment} with the user-specified parameters in the
+   * provided {@link FlinkPipelineOptions}.
+   *
+   * @param options the user-defined pipeline options.
+   * */
+  FlinkPipelineExecutionEnvironment(FlinkPipelineOptions options) {
+    this.options = checkNotNull(options);
+  }
+
+  /**
+   * Depending on if the job is a Streaming or a Batch one, this method creates
+   * the necessary execution environment and pipeline translator, and translates
+   * the {@link org.apache.beam.sdk.values.PCollection} program into
+   * a {@link org.apache.flink.api.java.DataSet}
+   * or {@link org.apache.flink.streaming.api.datastream.DataStream} one.
+   * */
+  public void translate(FlinkRunner flinkRunner, Pipeline pipeline) {
+    this.flinkBatchEnv = null;
+    this.flinkStreamEnv = null;
+
+    PipelineTranslationOptimizer optimizer =
+        new PipelineTranslationOptimizer(TranslationMode.BATCH, options);
+
+    optimizer.translate(pipeline);
+    TranslationMode translationMode = optimizer.getTranslationMode();
+
+    FlinkPipelineTranslator translator;
+    if (translationMode == TranslationMode.STREAMING) {
+      this.flinkStreamEnv = createStreamExecutionEnvironment();
+      translator = new FlinkStreamingPipelineTranslator(flinkRunner, flinkStreamEnv, options);
+    } else {
+      this.flinkBatchEnv = createBatchExecutionEnvironment();
+      translator = new FlinkBatchPipelineTranslator(flinkBatchEnv, options);
+    }
+
+    translator.translate(pipeline);
+  }
+
+  /**
+   * Launches the program execution.
+   * */
+  public JobExecutionResult executePipeline() throws Exception {
+    final String jobName = options.getJobName();
+
+    if (flinkBatchEnv != null) {
+      return flinkBatchEnv.execute(jobName);
+    } else if (flinkStreamEnv != null) {
+      return flinkStreamEnv.execute(jobName);
+    } else {
+      throw new IllegalStateException("The Pipeline has not yet been translated.");
+    }
+  }
+
+  /**
+   * If the submitted job is a batch processing job, this method creates the adequate
+   * Flink {@link org.apache.flink.api.java.ExecutionEnvironment} depending
+   * on the user-specified options.
+   */
+  private ExecutionEnvironment createBatchExecutionEnvironment() {
+
+    LOG.info("Creating the required Batch Execution Environment.");
+
+    String masterUrl = options.getFlinkMaster();
+    ExecutionEnvironment flinkBatchEnv;
+
+    // depending on the master, create the right environment.
+    if (masterUrl.equals("[local]")) {
+      flinkBatchEnv = ExecutionEnvironment.createLocalEnvironment();
+    } else if (masterUrl.equals("[collection]")) {
+      flinkBatchEnv = new CollectionEnvironment();
+    } else if (masterUrl.equals("[auto]")) {
+      flinkBatchEnv = ExecutionEnvironment.getExecutionEnvironment();
+    } else if (masterUrl.matches(".*:\\d*")) {
+      String[] parts = masterUrl.split(":");
+      List<String> stagingFiles = options.getFilesToStage();
+      flinkBatchEnv = ExecutionEnvironment.createRemoteEnvironment(parts[0],
+          Integer.parseInt(parts[1]),
+          stagingFiles.toArray(new String[stagingFiles.size()]));
+    } else {
+      LOG.warn("Unrecognized Flink Master URL {}. Defaulting to [auto].", masterUrl);
+      flinkBatchEnv = ExecutionEnvironment.getExecutionEnvironment();
+    }
+
+    // set the correct parallelism.
+    if (options.getParallelism() != -1 && !(flinkBatchEnv instanceof CollectionEnvironment)) {
+      flinkBatchEnv.setParallelism(options.getParallelism());
+    }
+
+    // set parallelism in the options (required by some execution code)
+    options.setParallelism(flinkBatchEnv.getParallelism());
+
+    if (options.getObjectReuse()) {
+      flinkBatchEnv.getConfig().enableObjectReuse();
+    } else {
+      flinkBatchEnv.getConfig().disableObjectReuse();
+    }
+
+    return flinkBatchEnv;
+  }
+
+  /**
+   * If the submitted job is a stream processing job, this method creates the adequate
+   * Flink {@link org.apache.flink.streaming.api.environment.StreamExecutionEnvironment} depending
+   * on the user-specified options.
+   */
+  private StreamExecutionEnvironment createStreamExecutionEnvironment() {
+
+    LOG.info("Creating the required Streaming Environment.");
+
+    String masterUrl = options.getFlinkMaster();
+    StreamExecutionEnvironment flinkStreamEnv = null;
+
+    // depending on the master, create the right environment.
+    if (masterUrl.equals("[local]")) {
+      flinkStreamEnv = StreamExecutionEnvironment.createLocalEnvironment();
+    } else if (masterUrl.equals("[auto]")) {
+      flinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
+    } else if (masterUrl.matches(".*:\\d*")) {
+      String[] parts = masterUrl.split(":");
+      List<String> stagingFiles = options.getFilesToStage();
+      flinkStreamEnv = StreamExecutionEnvironment.createRemoteEnvironment(parts[0],
+          Integer.parseInt(parts[1]), stagingFiles.toArray(new String[stagingFiles.size()]));
+    } else {
+      LOG.warn("Unrecognized Flink Master URL {}. Defaulting to [auto].", masterUrl);
+      flinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
+    }
+
+    // set the correct parallelism.
+    if (options.getParallelism() != -1) {
+      flinkStreamEnv.setParallelism(options.getParallelism());
+    }
+
+    // set parallelism in the options (required by some execution code)
+    options.setParallelism(flinkStreamEnv.getParallelism());
+
+    if (options.getObjectReuse()) {
+      flinkStreamEnv.getConfig().enableObjectReuse();
+    } else {
+      flinkStreamEnv.getConfig().disableObjectReuse();
+    }
+
+    // default to event time
+    flinkStreamEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
+
+    // for the following 2 parameters, a value of -1 means that Flink will use
+    // the default values as specified in the configuration.
+    int numRetries = options.getNumberOfExecutionRetries();
+    if (numRetries != -1) {
+      flinkStreamEnv.setNumberOfExecutionRetries(numRetries);
+    }
+    long retryDelay = options.getExecutionRetryDelay();
+    if (retryDelay != -1) {
+      flinkStreamEnv.getConfig().setExecutionRetryDelay(retryDelay);
+    }
+
+    // A value of -1 corresponds to disabled checkpointing (see CheckpointConfig in Flink).
+    // If the value is not -1, then the validity checks are applied.
+    // By default, checkpointing is disabled.
+    long checkpointInterval = options.getCheckpointingInterval();
+    if (checkpointInterval != -1) {
+      if (checkpointInterval < 1) {
+        throw new IllegalArgumentException("The checkpoint interval must be positive");
+      }
+      flinkStreamEnv.enableCheckpointing(checkpointInterval);
+    }
+
+    // State backend
+    final AbstractStateBackend stateBackend = options.getStateBackend();
+    if (stateBackend != null) {
+      flinkStreamEnv.setStateBackend(stateBackend);
+    }
+
+    return flinkStreamEnv;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
new file mode 100644
index 0000000..ef9afea
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import java.util.List;
+import org.apache.beam.sdk.options.ApplicationNameOptions;
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.StreamingOptions;
+import org.apache.flink.runtime.state.AbstractStateBackend;
+
+/**
+ * Options which can be used to configure a Flink PipelineRunner.
+ */
+public interface FlinkPipelineOptions
+    extends PipelineOptions, ApplicationNameOptions, StreamingOptions {
+
+  /**
+   * List of local files to make available to workers.
+   *
+   * <p>Jars are placed on the worker's classpath.
+   *
+   * <p>The default value is the list of jars from the main program's classpath.
+   */
+  @Description("Jar-Files to send to all workers and put on the classpath. "
+      + "The default value is all files from the classpath.")
+  @JsonIgnore
+  List<String> getFilesToStage();
+  void setFilesToStage(List<String> value);
+
+  /**
+   * The url of the Flink JobManager on which to execute pipelines. This can either be
+   * the the address of a cluster JobManager, in the form "host:port" or one of the special
+   * Strings "[local]", "[collection]" or "[auto]". "[local]" will start a local Flink
+   * Cluster in the JVM, "[collection]" will execute the pipeline on Java Collections while
+   * "[auto]" will let the system decide where to execute the pipeline based on the environment.
+   */
+  @Description("Address of the Flink Master where the Pipeline should be executed. Can"
+      + " either be of the form \"host:port\" or one of the special values [local], "
+      + "[collection] or [auto].")
+  String getFlinkMaster();
+  void setFlinkMaster(String value);
+
+  @Description("The degree of parallelism to be used when distributing operations onto workers.")
+  @Default.InstanceFactory(DefaultParallelismFactory.class)
+  Integer getParallelism();
+  void setParallelism(Integer value);
+
+  @Description("The interval between consecutive checkpoints (i.e. snapshots of the current"
+      + "pipeline state used for fault tolerance).")
+  @Default.Long(-1L)
+  Long getCheckpointingInterval();
+  void setCheckpointingInterval(Long interval);
+
+  @Description("Sets the number of times that failed tasks are re-executed. "
+      + "A value of zero effectively disables fault tolerance. A value of -1 indicates "
+      + "that the system default value (as defined in the configuration) should be used.")
+  @Default.Integer(-1)
+  Integer getNumberOfExecutionRetries();
+  void setNumberOfExecutionRetries(Integer retries);
+
+  @Description("Sets the delay between executions. A value of {@code -1} "
+      + "indicates that the default value should be used.")
+  @Default.Long(-1L)
+  Long getExecutionRetryDelay();
+  void setExecutionRetryDelay(Long delay);
+
+  @Description("Sets the behavior of reusing objects.")
+  @Default.Boolean(false)
+  Boolean getObjectReuse();
+  void setObjectReuse(Boolean reuse);
+
+  /**
+   * State backend to store Beam's state during computation.
+   * Note: Only applicable when executing in streaming mode.
+   */
+  @Description("Sets the state backend to use in streaming mode. "
+      + "Otherwise the default is read from the Flink config.")
+  @JsonIgnore
+  AbstractStateBackend getStateBackend();
+  void setStateBackend(AbstractStateBackend stateBackend);
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineTranslator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineTranslator.java
new file mode 100644
index 0000000..65f416d
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineTranslator.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import org.apache.beam.sdk.Pipeline;
+
+/**
+ * The role of this class is to translate the Beam operators to
+ * their Flink counterparts. If we have a streaming job, this is instantiated as a
+ * {@link FlinkStreamingPipelineTranslator}. In other case, i.e. for a batch job,
+ * a {@link FlinkBatchPipelineTranslator} is created. Correspondingly, the
+ * {@link org.apache.beam.sdk.values.PCollection}-based user-provided job is translated into
+ * a {@link org.apache.flink.streaming.api.datastream.DataStream} (for streaming) or a
+ * {@link org.apache.flink.api.java.DataSet} (for batch) one.
+ */
+abstract class FlinkPipelineTranslator extends Pipeline.PipelineVisitor.Defaults {
+
+  /**
+   * Translates the pipeline by passing this class as a visitor.
+   * @param pipeline The pipeline to be translated
+   */
+  public void translate(Pipeline pipeline) {
+    pipeline.traverseTopologically(this);
+  }
+
+  /**
+   * Utility formatting method.
+   * @param n number of spaces to generate
+   * @return String with "|" followed by n spaces
+   */
+  protected static String genSpaces(int n) {
+    StringBuilder builder = new StringBuilder();
+    for (int i = 0; i < n; i++) {
+      builder.append("|   ");
+    }
+    return builder.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java
new file mode 100644
index 0000000..096f030
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import com.google.common.base.Joiner;
+import java.io.File;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsValidator;
+import org.apache.beam.sdk.runners.PipelineRunner;
+import org.apache.beam.sdk.runners.TransformHierarchy;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.View;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.flink.api.common.JobExecutionResult;
+import org.apache.flink.client.program.DetachedEnvironment;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * A {@link PipelineRunner} that executes the operations in the
+ * pipeline by first translating them to a Flink Plan and then executing them either locally
+ * or on a Flink cluster, depending on the configuration.
+ */
+public class FlinkRunner extends PipelineRunner<PipelineResult> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(FlinkRunner.class);
+
+  /**
+   * Provided options.
+   */
+  private final FlinkPipelineOptions options;
+
+  /**
+   * Construct a runner from the provided options.
+   *
+   * @param options Properties which configure the runner.
+   * @return The newly created runner.
+   */
+  public static FlinkRunner fromOptions(PipelineOptions options) {
+    FlinkPipelineOptions flinkOptions =
+        PipelineOptionsValidator.validate(FlinkPipelineOptions.class, options);
+    ArrayList<String> missing = new ArrayList<>();
+
+    if (flinkOptions.getAppName() == null) {
+      missing.add("appName");
+    }
+    if (missing.size() > 0) {
+      throw new IllegalArgumentException(
+          "Missing required values: " + Joiner.on(',').join(missing));
+    }
+
+    if (flinkOptions.getFilesToStage() == null) {
+      flinkOptions.setFilesToStage(detectClassPathResourcesToStage(
+          FlinkRunner.class.getClassLoader()));
+      LOG.info("PipelineOptions.filesToStage was not specified. "
+              + "Defaulting to files from the classpath: will stage {} files. "
+              + "Enable logging at DEBUG level to see which files will be staged.",
+          flinkOptions.getFilesToStage().size());
+      LOG.debug("Classpath elements: {}", flinkOptions.getFilesToStage());
+    }
+
+    // Set Flink Master to [auto] if no option was specified.
+    if (flinkOptions.getFlinkMaster() == null) {
+      flinkOptions.setFlinkMaster("[auto]");
+    }
+
+    return new FlinkRunner(flinkOptions);
+  }
+
+  private FlinkRunner(FlinkPipelineOptions options) {
+    this.options = options;
+    this.ptransformViewsWithNonDeterministicKeyCoders = new HashSet<>();
+  }
+
+  @Override
+  public PipelineResult run(Pipeline pipeline) {
+    logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline);
+
+    LOG.info("Executing pipeline using FlinkRunner.");
+
+    FlinkPipelineExecutionEnvironment env = new FlinkPipelineExecutionEnvironment(options);
+
+    LOG.info("Translating pipeline to Flink program.");
+    env.translate(this, pipeline);
+
+    JobExecutionResult result;
+    try {
+      LOG.info("Starting execution of Flink program.");
+      result = env.executePipeline();
+    } catch (Exception e) {
+      LOG.error("Pipeline execution failed", e);
+      throw new RuntimeException("Pipeline execution failed", e);
+    }
+
+    if (result instanceof DetachedEnvironment.DetachedJobExecutionResult) {
+      LOG.info("Pipeline submitted in Detached mode");
+      return new FlinkDetachedRunnerResult();
+    } else {
+      LOG.info("Execution finished in {} msecs", result.getNetRuntime());
+      Map<String, Object> accumulators = result.getAllAccumulatorResults();
+      if (accumulators != null && !accumulators.isEmpty()) {
+        LOG.info("Final aggregator values:");
+
+        for (Map.Entry<String, Object> entry : result.getAllAccumulatorResults().entrySet()) {
+          LOG.info("{} : {}", entry.getKey(), entry.getValue());
+        }
+      }
+
+      return new FlinkRunnerResult(accumulators, result.getNetRuntime());
+    }
+  }
+
+  /**
+   * For testing.
+   */
+  public FlinkPipelineOptions getPipelineOptions() {
+    return options;
+  }
+
+  @Override
+  public String toString() {
+    return "FlinkRunner#" + hashCode();
+  }
+
+  /**
+   * Attempts to detect all the resources the class loader has access to. This does not recurse
+   * to class loader parents stopping it from pulling in resources from the system class loader.
+   *
+   * @param classLoader The URLClassLoader to use to detect resources to stage.
+   * @return A list of absolute paths to the resources the class loader uses.
+   * @throws IllegalArgumentException If either the class loader is not a URLClassLoader or one
+   *   of the resources the class loader exposes is not a file resource.
+   */
+  protected static List<String> detectClassPathResourcesToStage(
+      ClassLoader classLoader) {
+    if (!(classLoader instanceof URLClassLoader)) {
+      String message = String.format("Unable to use ClassLoader to detect classpath elements. "
+          + "Current ClassLoader is %s, only URLClassLoaders are supported.", classLoader);
+      LOG.error(message);
+      throw new IllegalArgumentException(message);
+    }
+
+    List<String> files = new ArrayList<>();
+    for (URL url : ((URLClassLoader) classLoader).getURLs()) {
+      try {
+        files.add(new File(url.toURI()).getAbsolutePath());
+      } catch (IllegalArgumentException | URISyntaxException e) {
+        String message = String.format("Unable to convert url (%s) to file.", url);
+        LOG.error(message);
+        throw new IllegalArgumentException(message, e);
+      }
+    }
+    return files;
+  }
+
+  /** A set of {@link View}s with non-deterministic key coders. */
+  Set<PTransform<?, ?>> ptransformViewsWithNonDeterministicKeyCoders;
+
+  /**
+   * Records that the {@link PTransform} requires a deterministic key coder.
+   */
+  void recordViewUsesNonDeterministicKeyCoder(PTransform<?, ?> ptransform) {
+    ptransformViewsWithNonDeterministicKeyCoders.add(ptransform);
+  }
+
+  /** Outputs a warning about PCollection views without deterministic key coders. */
+  private void logWarningIfPCollectionViewHasNonDeterministicKeyCoder(Pipeline pipeline) {
+    // We need to wait till this point to determine the names of the transforms since only
+    // at this time do we know the hierarchy of the transforms otherwise we could
+    // have just recorded the full names during apply time.
+    if (!ptransformViewsWithNonDeterministicKeyCoders.isEmpty()) {
+      final SortedSet<String> ptransformViewNamesWithNonDeterministicKeyCoders = new TreeSet<>();
+      pipeline.traverseTopologically(new Pipeline.PipelineVisitor() {
+        @Override
+        public void visitValue(PValue value, TransformHierarchy.Node producer) {
+        }
+
+        @Override
+        public void visitPrimitiveTransform(TransformHierarchy.Node node) {
+          if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) {
+            ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName());
+          }
+        }
+
+        @Override
+        public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) {
+          if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) {
+            ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName());
+          }
+          return CompositeBehavior.ENTER_TRANSFORM;
+        }
+
+        @Override
+        public void leaveCompositeTransform(TransformHierarchy.Node node) {
+        }
+      });
+
+      LOG.warn("Unable to use indexed implementation for View.AsMap and View.AsMultimap for {} "
+          + "because the key coder is not deterministic. Falling back to singleton implementation "
+          + "which may cause memory and/or performance problems. Future major versions of "
+          + "the Flink runner will require deterministic key coders.",
+          ptransformViewNamesWithNonDeterministicKeyCoders);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
new file mode 100644
index 0000000..681459a
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.flink;
+
+import com.google.auto.service.AutoService;
+import com.google.common.collect.ImmutableList;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsRegistrar;
+import org.apache.beam.sdk.runners.PipelineRunner;
+import org.apache.beam.sdk.runners.PipelineRunnerRegistrar;
+
+
+/**
+ * AutoService registrar - will register FlinkRunner and FlinkOptions
+ * as possible pipeline runner services.
+ *
+ * <p>It ends up in META-INF/services and gets picked up by Beam.
+ *
+ */
+public class FlinkRunnerRegistrar {
+  private FlinkRunnerRegistrar() { }
+
+  /**
+   * Pipeline runner registrar.
+   */
+  @AutoService(PipelineRunnerRegistrar.class)
+  public static class Runner implements PipelineRunnerRegistrar {
+    @Override
+    public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
+      return ImmutableList.<Class<? extends PipelineRunner<?>>>of(
+          FlinkRunner.class,
+          TestFlinkRunner.class);
+    }
+  }
+
+  /**
+   * Pipeline options registrar.
+   */
+  @AutoService(PipelineOptionsRegistrar.class)
+  public static class Options implements PipelineOptionsRegistrar {
+    @Override
+    public Iterable<Class<? extends PipelineOptions>> getPipelineOptions() {
+      return ImmutableList.<Class<? extends PipelineOptions>>of(FlinkPipelineOptions.class);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunnerResult.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunnerResult.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunnerResult.java
new file mode 100644
index 0000000..0682b56
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkRunnerResult.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Map;
+import org.apache.beam.sdk.AggregatorRetrievalException;
+import org.apache.beam.sdk.AggregatorValues;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.metrics.MetricResults;
+import org.apache.beam.sdk.transforms.Aggregator;
+import org.joda.time.Duration;
+
+/**
+ * Result of executing a {@link org.apache.beam.sdk.Pipeline} with Flink. This
+ * has methods to query to job runtime and the final values of
+ * {@link org.apache.beam.sdk.transforms.Aggregator}s.
+ */
+public class FlinkRunnerResult implements PipelineResult {
+
+  private final Map<String, Object> aggregators;
+
+  private final long runtime;
+
+  FlinkRunnerResult(Map<String, Object> aggregators, long runtime) {
+    this.aggregators = (aggregators == null || aggregators.isEmpty())
+        ? Collections.<String, Object>emptyMap()
+        : Collections.unmodifiableMap(aggregators);
+    this.runtime = runtime;
+  }
+
+  @Override
+  public State getState() {
+    return State.DONE;
+  }
+
+  @Override
+  public <T> AggregatorValues<T> getAggregatorValues(final Aggregator<?, T> aggregator)
+      throws AggregatorRetrievalException {
+    // TODO provide a list of all accumulator step values
+    Object value = aggregators.get(aggregator.getName());
+    if (value != null) {
+      return new AggregatorValues<T>() {
+        @Override
+        public Map<String, T> getValuesAtSteps() {
+          return (Map<String, T>) aggregators;
+        }
+      };
+    } else {
+      throw new AggregatorRetrievalException("Accumulator results not found.",
+          new RuntimeException("Accumulator does not exist."));
+    }
+  }
+
+  @Override
+  public String toString() {
+    return "FlinkRunnerResult{"
+        + "aggregators=" + aggregators
+        + ", runtime=" + runtime
+        + '}';
+  }
+
+  @Override
+  public State cancel() throws IOException {
+    throw new UnsupportedOperationException("FlinkRunnerResult does not support cancel.");
+  }
+
+  @Override
+  public State waitUntilFinish() {
+    return State.DONE;
+  }
+
+  @Override
+  public State waitUntilFinish(Duration duration) {
+    return State.DONE;
+  }
+
+  @Override
+  public MetricResults metrics() {
+    throw new UnsupportedOperationException("The FlinkRunner does not currently support metrics.");
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java
new file mode 100644
index 0000000..0459ef7
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java
@@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import com.google.common.collect.ImmutableList;
+import java.util.List;
+import java.util.Map;
+import org.apache.beam.runners.core.SplittableParDo;
+import org.apache.beam.runners.core.construction.PTransformMatchers;
+import org.apache.beam.runners.core.construction.PTransformReplacements;
+import org.apache.beam.runners.core.construction.ReplacementOutputs;
+import org.apache.beam.runners.core.construction.SingleInputOutputOverrideFactory;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.runners.PTransformOverride;
+import org.apache.beam.sdk.runners.PTransformOverrideFactory;
+import org.apache.beam.sdk.runners.TransformHierarchy;
+import org.apache.beam.sdk.transforms.AppliedPTransform;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.ParDo.MultiOutput;
+import org.apache.beam.sdk.transforms.View;
+import org.apache.beam.sdk.util.InstanceBuilder;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PCollectionTuple;
+import org.apache.beam.sdk.values.PValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * This is a {@link FlinkPipelineTranslator} for streaming jobs. Its role is to translate
+ * the user-provided {@link org.apache.beam.sdk.values.PCollection}-based job into a
+ * {@link org.apache.flink.streaming.api.datastream.DataStream} one.
+ *
+ */
+class FlinkStreamingPipelineTranslator extends FlinkPipelineTranslator {
+
+  private static final Logger LOG = LoggerFactory.getLogger(FlinkStreamingPipelineTranslator.class);
+
+  /** The necessary context in the case of a straming job. */
+  private final FlinkStreamingTranslationContext streamingContext;
+
+  private int depth = 0;
+
+  private FlinkRunner flinkRunner;
+
+  public FlinkStreamingPipelineTranslator(
+      FlinkRunner flinkRunner,
+      StreamExecutionEnvironment env,
+      PipelineOptions options) {
+    this.streamingContext = new FlinkStreamingTranslationContext(env, options);
+    this.flinkRunner = flinkRunner;
+  }
+
+  @Override
+  public void translate(Pipeline pipeline) {
+    List<PTransformOverride> transformOverrides =
+        ImmutableList.<PTransformOverride>builder()
+            .add(
+                PTransformOverride.of(
+                    PTransformMatchers.splittableParDoMulti(),
+                    new SplittableParDoOverrideFactory()))
+            .add(
+                PTransformOverride.of(
+                    PTransformMatchers.classEqualTo(View.AsIterable.class),
+                    new ReflectiveOneToOneOverrideFactory(
+                        FlinkStreamingViewOverrides.StreamingViewAsIterable.class, flinkRunner)))
+            .add(
+                PTransformOverride.of(
+                    PTransformMatchers.classEqualTo(View.AsList.class),
+                    new ReflectiveOneToOneOverrideFactory(
+                        FlinkStreamingViewOverrides.StreamingViewAsList.class, flinkRunner)))
+            .add(
+                PTransformOverride.of(
+                    PTransformMatchers.classEqualTo(View.AsMap.class),
+                    new ReflectiveOneToOneOverrideFactory(
+                        FlinkStreamingViewOverrides.StreamingViewAsMap.class, flinkRunner)))
+            .add(
+                PTransformOverride.of(
+                    PTransformMatchers.classEqualTo(View.AsMultimap.class),
+                    new ReflectiveOneToOneOverrideFactory(
+                        FlinkStreamingViewOverrides.StreamingViewAsMultimap.class, flinkRunner)))
+            .add(
+                PTransformOverride.of(
+                    PTransformMatchers.classEqualTo(View.AsSingleton.class),
+                    new ReflectiveOneToOneOverrideFactory(
+                        FlinkStreamingViewOverrides.StreamingViewAsSingleton.class, flinkRunner)))
+            // this has to be last since the ViewAsSingleton override
+            // can expand to a Combine.GloballyAsSingletonView
+            .add(
+                PTransformOverride.of(
+                    PTransformMatchers.classEqualTo(Combine.GloballyAsSingletonView.class),
+                    new ReflectiveOneToOneOverrideFactory(
+                        FlinkStreamingViewOverrides.StreamingCombineGloballyAsSingletonView.class,
+                        flinkRunner)))
+            .build();
+
+    pipeline.replaceAll(transformOverrides);
+    super.translate(pipeline);
+  }
+
+  // --------------------------------------------------------------------------------------------
+  //  Pipeline Visitor Methods
+  // --------------------------------------------------------------------------------------------
+
+  @Override
+  public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) {
+    LOG.info("{} enterCompositeTransform- {}", genSpaces(this.depth), node.getFullName());
+    this.depth++;
+
+    PTransform<?, ?> transform = node.getTransform();
+    if (transform != null) {
+      StreamTransformTranslator<?> translator =
+          FlinkStreamingTransformTranslators.getTranslator(transform);
+
+      if (translator != null && applyCanTranslate(transform, node, translator)) {
+        applyStreamingTransform(transform, node, translator);
+        LOG.info("{} translated- {}", genSpaces(this.depth), node.getFullName());
+        return CompositeBehavior.DO_NOT_ENTER_TRANSFORM;
+      }
+    }
+    return CompositeBehavior.ENTER_TRANSFORM;
+  }
+
+  @Override
+  public void leaveCompositeTransform(TransformHierarchy.Node node) {
+    this.depth--;
+    LOG.info("{} leaveCompositeTransform- {}", genSpaces(this.depth), node.getFullName());
+  }
+
+  @Override
+  public void visitPrimitiveTransform(TransformHierarchy.Node node) {
+    LOG.info("{} visitPrimitiveTransform- {}", genSpaces(this.depth), node.getFullName());
+    // get the transformation corresponding to hte node we are
+    // currently visiting and translate it into its Flink alternative.
+
+    PTransform<?, ?> transform = node.getTransform();
+    StreamTransformTranslator<?> translator =
+        FlinkStreamingTransformTranslators.getTranslator(transform);
+
+    if (translator == null || !applyCanTranslate(transform, node, translator)) {
+      LOG.info(node.getTransform().getClass().toString());
+      throw new UnsupportedOperationException(
+          "The transform " + transform + " is currently not supported.");
+    }
+    applyStreamingTransform(transform, node, translator);
+  }
+
+  @Override
+  public void visitValue(PValue value, TransformHierarchy.Node producer) {
+    // do nothing here
+  }
+
+  private <T extends PTransform<?, ?>> void applyStreamingTransform(
+      PTransform<?, ?> transform,
+      TransformHierarchy.Node node,
+      StreamTransformTranslator<?> translator) {
+
+    @SuppressWarnings("unchecked")
+    T typedTransform = (T) transform;
+
+    @SuppressWarnings("unchecked")
+    StreamTransformTranslator<T> typedTranslator = (StreamTransformTranslator<T>) translator;
+
+    // create the applied PTransform on the streamingContext
+    streamingContext.setCurrentTransform(node.toAppliedPTransform());
+    typedTranslator.translateNode(typedTransform, streamingContext);
+  }
+
+  private <T extends PTransform<?, ?>> boolean applyCanTranslate(
+      PTransform<?, ?> transform,
+      TransformHierarchy.Node node,
+      StreamTransformTranslator<?> translator) {
+
+    @SuppressWarnings("unchecked")
+    T typedTransform = (T) transform;
+
+    @SuppressWarnings("unchecked")
+    StreamTransformTranslator<T> typedTranslator = (StreamTransformTranslator<T>) translator;
+
+    streamingContext.setCurrentTransform(node.toAppliedPTransform());
+
+    return typedTranslator.canTranslate(typedTransform, streamingContext);
+  }
+
+  /**
+   * The interface that every Flink translator of a Beam operator should implement.
+   * This interface is for <b>streaming</b> jobs. For examples of such translators see
+   * {@link FlinkStreamingTransformTranslators}.
+   */
+  abstract static class StreamTransformTranslator<T extends PTransform> {
+
+    /**
+     * Translate the given transform.
+     */
+    abstract void translateNode(T transform, FlinkStreamingTranslationContext context);
+
+    /**
+     * Returns true iff this translator can translate the given transform.
+     */
+    boolean canTranslate(T transform, FlinkStreamingTranslationContext context) {
+      return true;
+    }
+  }
+
+  private static class ReflectiveOneToOneOverrideFactory<
+          InputT, OutputT, TransformT extends PTransform<PCollection<InputT>, PCollection<OutputT>>>
+      extends SingleInputOutputOverrideFactory<
+          PCollection<InputT>, PCollection<OutputT>, TransformT> {
+    private final Class<PTransform<PCollection<InputT>, PCollection<OutputT>>> replacement;
+    private final FlinkRunner runner;
+
+    private ReflectiveOneToOneOverrideFactory(
+        Class<PTransform<PCollection<InputT>, PCollection<OutputT>>> replacement,
+        FlinkRunner runner) {
+      this.replacement = replacement;
+      this.runner = runner;
+    }
+
+    @Override
+    public PTransformReplacement<PCollection<InputT>, PCollection<OutputT>> getReplacementTransform(
+        AppliedPTransform<PCollection<InputT>, PCollection<OutputT>, TransformT> transform) {
+      return PTransformReplacement.of(
+          PTransformReplacements.getSingletonMainInput(transform),
+          InstanceBuilder.ofType(replacement)
+              .withArg(FlinkRunner.class, runner)
+              .withArg(
+                  (Class<PTransform<PCollection<InputT>, PCollection<OutputT>>>)
+                      transform.getTransform().getClass(),
+                  transform.getTransform())
+              .build());
+    }
+  }
+
+  /**
+   * A {@link PTransformOverrideFactory} that overrides a <a
+   * href="https://s.apache.org/splittable-do-fn">Splittable DoFn</a> with {@link SplittableParDo}.
+   */
+  static class SplittableParDoOverrideFactory<InputT, OutputT>
+      implements PTransformOverrideFactory<
+          PCollection<InputT>, PCollectionTuple, MultiOutput<InputT, OutputT>> {
+    @Override
+    public PTransformReplacement<PCollection<InputT>, PCollectionTuple>
+        getReplacementTransform(
+            AppliedPTransform<
+                    PCollection<InputT>, PCollectionTuple, MultiOutput<InputT, OutputT>>
+                transform) {
+      return PTransformReplacement.of(
+          PTransformReplacements.getSingletonMainInput(transform),
+          new SplittableParDo<>(transform.getTransform()));
+    }
+
+    @Override
+    public Map<PValue, ReplacementOutput> mapOutputs(
+        Map<TupleTag<?>, PValue> outputs, PCollectionTuple newOutput) {
+      return ReplacementOutputs.tagged(outputs, newOutput);
+    }
+  }
+}

[18/50] [abbrv] beam git commit: Refactor batch loads, and add support for windowed writes.

Posted by dh...@apache.org.

Refactor batch loads, and add support for windowed writes.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/760a9458
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/760a9458
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/760a9458

Branch: refs/heads/DSL_SQL
Commit: 760a94580d7561bb63a3eea67d8e5443c233a541
Parents: 8581caf
Author: Reuven Lax <re...@google.com>
Authored: Fri Mar 31 11:19:25 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 21:12:50 2017 -0700

----------------------------------------------------------------------
 .../apache/beam/sdk/util/IOChannelUtils.java    |   9 +
 .../beam/sdk/io/gcp/bigquery/BatchLoads.java    |  49 +-
 .../beam/sdk/io/gcp/bigquery/ShardedKey.java    |  24 +-
 .../sdk/io/gcp/bigquery/TableDestination.java   |  10 +-
 .../io/gcp/bigquery/WriteBundlesToFiles.java    |  54 +-
 .../sdk/io/gcp/bigquery/WritePartition.java     |  28 +-
 .../beam/sdk/io/gcp/bigquery/WriteRename.java   |  13 +-
 .../beam/sdk/io/gcp/bigquery/WriteTables.java   |  14 +-
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 838 +++++--------------
 .../io/gcp/bigquery/FakeBigQueryServices.java   |  96 +++
 .../sdk/io/gcp/bigquery/FakeDatasetService.java | 172 ++++
 .../sdk/io/gcp/bigquery/FakeJobService.java     | 273 ++++++
 .../sdk/io/gcp/bigquery/TableContainer.java     |  36 +
 13 files changed, 948 insertions(+), 668 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/core/src/main/java/org/apache/beam/sdk/util/IOChannelUtils.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/IOChannelUtils.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/IOChannelUtils.java
index ea53527..9d3dd23 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/IOChannelUtils.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/IOChannelUtils.java
@@ -28,6 +28,7 @@ import com.google.common.collect.Sets;
 import com.google.common.collect.TreeMultimap;
 import java.io.FileNotFoundException;
 import java.io.IOException;
+import java.nio.channels.ReadableByteChannel;
 import java.nio.channels.WritableByteChannel;
 import java.text.DecimalFormat;
 import java.util.Arrays;
@@ -181,6 +182,14 @@ public class IOChannelUtils {
   }
 
   /**
+   * Creates a read channel for the given filename.
+   */
+  public static ReadableByteChannel open(String filename)
+      throws IOException {
+    return getFactory(filename).open(filename);
+  }
+
+  /**
    * Creates a write channel for the given file components.
    *
    * <p>If numShards is specified, then a ShardingWritableByteChannel is

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
index 8594211..5e80fae 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BatchLoads.java
@@ -26,6 +26,10 @@ import java.util.List;
 import java.util.Map;
 import javax.annotation.Nullable;
 import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.KvCoder;
+import org.apache.beam.sdk.coders.ListCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
 import org.apache.beam.sdk.options.BigQueryOptions;
@@ -61,16 +65,17 @@ class BatchLoads<T> extends
   private static class ConstantSchemaFunction implements
       SerializableFunction<TableDestination, TableSchema> {
     private final @Nullable
-    String jsonSchema;
+    ValueProvider<String> jsonSchema;
 
-    ConstantSchemaFunction(TableSchema schema) {
-      this.jsonSchema = BigQueryHelpers.toJsonString(schema);
+    ConstantSchemaFunction(ValueProvider<String> jsonSchema) {
+      this.jsonSchema = jsonSchema;
     }
 
     @Override
     @Nullable
     public TableSchema apply(TableDestination table) {
-      return BigQueryHelpers.fromJsonString(jsonSchema, TableSchema.class);
+      return BigQueryHelpers.fromJsonString(
+          jsonSchema == null ? null : jsonSchema.get(), TableSchema.class);
     }
   }
 
@@ -114,7 +119,7 @@ class BatchLoads<T> extends
         .apply(View.<String>asSingleton());
 
     PCollection<KV<TableDestination, TableRow>> inputInGlobalWindow =
-        input.apply(
+        input.apply("rewindowIntoGlobal",
             Window.<KV<TableDestination, TableRow>>into(new GlobalWindows())
                 .triggering(DefaultTrigger.of())
                 .discardingFiredPanes());
@@ -122,12 +127,13 @@ class BatchLoads<T> extends
     // PCollection of filename, file byte size, and table destination.
     PCollection<WriteBundlesToFiles.Result> results = inputInGlobalWindow
         .apply("WriteBundlesToFiles",
-            ParDo.of(new WriteBundlesToFiles(tempFilePrefix)));
+            ParDo.of(new WriteBundlesToFiles(tempFilePrefix)))
+        .setCoder(WriteBundlesToFiles.ResultCoder.of());
 
-    TupleTag<KV<KV<TableDestination, Integer>, List<String>>> multiPartitionsTag =
-        new TupleTag<KV<KV<TableDestination, Integer>, List<String>>>("multiPartitionsTag") {};
-    TupleTag<KV<KV<TableDestination, Integer>, List<String>>> singlePartitionTag =
-        new TupleTag<KV<KV<TableDestination, Integer>, List<String>>>("singlePartitionTag") {};
+    TupleTag<KV<ShardedKey<TableDestination>, List<String>>> multiPartitionsTag =
+        new TupleTag<KV<ShardedKey<TableDestination>, List<String>>>("multiPartitionsTag") {};
+    TupleTag<KV<ShardedKey<TableDestination>, List<String>>> singlePartitionTag =
+        new TupleTag<KV<ShardedKey<TableDestination>, List<String>>>("singlePartitionTag") {};
 
     // Turn the list of files and record counts in a PCollectionView that can be used as a
     // side input.
@@ -136,9 +142,9 @@ class BatchLoads<T> extends
     // This transform will look at the set of files written for each table, and if any table has
     // too many files or bytes, will partition that table's files into multiple partitions for
     // loading.
-    PCollectionTuple partitions = singleton.apply(ParDo
-        .of(new WritePartition(
-            write.getTable(),
+    PCollectionTuple partitions = singleton.apply("WritePartition",
+        ParDo.of(new WritePartition(
+            write.getJsonTableRef(),
             write.getTableDescription(),
             resultsView,
             multiPartitionsTag,
@@ -148,17 +154,22 @@ class BatchLoads<T> extends
 
     // Since BigQueryIO.java does not yet have support for per-table schemas, inject a constant
     // schema function here. If no schema is specified, this function will return null.
+    // TODO: Turn this into a side-input instead.
     SerializableFunction<TableDestination, TableSchema> schemaFunction =
-        new ConstantSchemaFunction(write.getSchema());
+        new ConstantSchemaFunction(write.getJsonSchema());
 
+    Coder<KV<ShardedKey<TableDestination>, List<String>>> partitionsCoder =
+        KvCoder.of(ShardedKeyCoder.of(TableDestinationCoder.of()),
+            ListCoder.of(StringUtf8Coder.of()));
     // If WriteBundlesToFiles produced more than MAX_NUM_FILES files or MAX_SIZE_BYTES bytes, then
     // the import needs to be split into multiple partitions, and those partitions will be
     // specified in multiPartitionsTag.
     PCollection<KV<TableDestination, String>> tempTables = partitions.get(multiPartitionsTag)
+        .setCoder(partitionsCoder)
         // What's this GroupByKey for? Is this so we have a deterministic temp tables? If so, maybe
         // Reshuffle is better here.
         .apply("MultiPartitionsGroupByKey",
-            GroupByKey.<KV<TableDestination, Integer>, List<String>>create())
+            GroupByKey.<ShardedKey<TableDestination>, List<String>>create())
         .apply("MultiPartitionsWriteTables", ParDo.of(new WriteTables(
             false,
             write.getBigQueryServices(),
@@ -174,20 +185,20 @@ class BatchLoads<T> extends
     PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView = tempTables
         .apply("TempTablesView", View.<TableDestination, String>asMultimap());
 
-    singleton.apply(ParDo
+    singleton.apply("WriteRename", ParDo
         .of(new WriteRename(
             write.getBigQueryServices(),
             jobIdTokenView,
             write.getWriteDisposition(),
             write.getCreateDisposition(),
-            tempTablesView,
-            write.getTableDescription()))
+            tempTablesView))
         .withSideInputs(tempTablesView, jobIdTokenView));
 
     // Write single partition to final table
     partitions.get(singlePartitionTag)
+        .setCoder(partitionsCoder)
         .apply("SinglePartitionGroupByKey",
-            GroupByKey.<KV<TableDestination, Integer>, List<String>>create())
+            GroupByKey.<ShardedKey<TableDestination>, List<String>>create())
         .apply("SinglePartitionWriteTables", ParDo.of(new WriteTables(
             true,
             write.getBigQueryServices(),

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
index 8c968df..ab57446 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/ShardedKey.java
@@ -18,10 +18,13 @@
 
 package org.apache.beam.sdk.io.gcp.bigquery;
 
+import java.io.Serializable;
+import java.util.Objects;
+
 /**
  * A key and a shard number.
  */
-class ShardedKey<K> {
+class ShardedKey<K> implements Serializable {
   private final K key;
   private final int shardNumber;
 
@@ -41,4 +44,23 @@ class ShardedKey<K> {
   public int getShardNumber() {
     return shardNumber;
   }
+
+  @Override
+  public String toString() {
+    return "key: " + key + " shard: " + shardNumber;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof ShardedKey)) {
+      return false;
+    }
+    ShardedKey<K> other = (ShardedKey<K>) o;
+    return (key == other.key) && (shardNumber == other.shardNumber);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(key, shardNumber);
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
index 1c2b256..e8538e0 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TableDestination.java
@@ -20,12 +20,13 @@ package org.apache.beam.sdk.io.gcp.bigquery;
 
 import com.google.api.services.bigquery.model.TableReference;
 
+import java.io.Serializable;
 import java.util.Objects;
 
 /**
  * Encapsulates a BigQuery table destination.
  */
-public class TableDestination {
+public class TableDestination implements Serializable {
   private final String tableSpec;
   private final String tableDescription;
 
@@ -53,12 +54,17 @@ public class TableDestination {
   }
 
   @Override
+  public String toString() {
+    return "tableSpec: " + tableSpec + " tableDescription: " + tableDescription;
+  }
+
+  @Override
   public boolean equals(Object o) {
     if (!(o instanceof TableDestination)) {
       return false;
     }
     TableDestination other = (TableDestination) o;
-    return tableSpec == other.tableSpec && tableDescription == other.tableDescription;
+    return (tableSpec == other.tableSpec) && (tableDescription == other.tableDescription);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
index 4e6167b..b8069f6 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteBundlesToFiles.java
@@ -20,10 +20,19 @@ package org.apache.beam.sdk.io.gcp.bigquery;
 
 import com.google.api.services.bigquery.model.TableRow;
 
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Serializable;
 import java.util.Map;
 import java.util.UUID;
 
 import com.google.common.collect.Maps;
+import org.apache.beam.sdk.coders.AtomicCoder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.coders.TableRowJsonCoder;
+import org.apache.beam.sdk.coders.VarLongCoder;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.values.KV;
@@ -41,7 +50,7 @@ class WriteBundlesToFiles extends DoFn<KV<TableDestination, TableRow>, WriteBund
   private transient Map<TableDestination, TableRowWriter> writers;
   private final String tempFilePrefix;
 
-  public static class Result {
+  public static class Result implements Serializable {
     public String filename;
     public Long fileByteSize;
     public TableDestination tableDestination;
@@ -52,15 +61,54 @@ class WriteBundlesToFiles extends DoFn<KV<TableDestination, TableRow>, WriteBund
       this.tableDestination = tableDestination;
     }
   }
+
+  public static class ResultCoder extends AtomicCoder<Result> {
+    private static final ResultCoder INSTANCE = new ResultCoder();
+
+    public static ResultCoder of() {
+      return INSTANCE;
+    }
+
+    @Override
+    public void encode(Result value, OutputStream outStream, Context context)
+        throws IOException {
+      if (value == null) {
+        throw new CoderException("cannot encode a null value");
+      }
+      stringCoder.encode(value.filename, outStream, context.nested());
+      longCoder.encode(value.fileByteSize, outStream, context.nested());
+      tableDestinationCoder.encode(value.tableDestination, outStream, context.nested());
+    }
+
+    @Override
+    public Result decode(InputStream inStream, Context context)
+        throws IOException {
+      return new Result(stringCoder.decode(inStream, context.nested()),
+          longCoder.decode(inStream, context.nested()),
+          tableDestinationCoder.decode(inStream, context.nested()));
+    }
+
+    @Override
+    public void verifyDeterministic() throws NonDeterministicException {
+    }
+
+    StringUtf8Coder stringCoder = StringUtf8Coder.of();
+    VarLongCoder longCoder = VarLongCoder.of();
+    TableDestinationCoder tableDestinationCoder = TableDestinationCoder.of();
+  }
+
   WriteBundlesToFiles(String tempFilePrefix) {
     this.tempFilePrefix = tempFilePrefix;
+  }
+
+  @StartBundle
+  public void startBundle(Context c) {
     this.writers = Maps.newHashMap();
   }
 
   @ProcessElement
   public void processElement(ProcessContext c) throws Exception {
-    // ??? can we assume Java8?
-    TableRowWriter writer = writers.getOrDefault(c.element().getKey(), null);
+    TableRowWriter writer = writers.get(c.element().getKey());
     if (writer == null) {
       writer = new TableRowWriter(tempFilePrefix);
       writer.open(UUID.randomUUID().toString());

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
index 8e1b16d..c48955b 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WritePartition.java
@@ -37,20 +37,20 @@ import org.apache.beam.sdk.values.TupleTag;
  * Partitions temporary files based on number of files and file sizes. Output key is a pair of
  * tablespec and the list of files corresponding to each partition of that table.
  */
-class WritePartition extends DoFn<String, KV<KV<TableDestination, Integer>, List<String>>> {
-  private final ValueProvider<TableReference> singletonOutputTable;
+class WritePartition extends DoFn<String, KV<ShardedKey<TableDestination>, List<String>>> {
+  private final ValueProvider<String> singletonOutputJsonTableRef;
   private final String singletonOutputTableDescription;
   private final PCollectionView<Iterable<WriteBundlesToFiles.Result>> resultsView;
-  private TupleTag<KV<KV<TableDestination, Integer>, List<String>>> multiPartitionsTag;
-  private TupleTag<KV<KV<TableDestination, Integer>, List<String>>> singlePartitionTag;
+  private TupleTag<KV<ShardedKey<TableDestination>, List<String>>> multiPartitionsTag;
+  private TupleTag<KV<ShardedKey<TableDestination>, List<String>>> singlePartitionTag;
 
   public WritePartition(
-      ValueProvider<TableReference> singletonOutputTable,
+      ValueProvider<String> singletonOutputJsonTableRef,
       String singletonOutputTableDescription,
       PCollectionView<Iterable<WriteBundlesToFiles.Result>> resultsView,
-      TupleTag<KV<KV<TableDestination, Integer>, List<String>>> multiPartitionsTag,
-      TupleTag<KV<KV<TableDestination, Integer>, List<String>>> singlePartitionTag) {
-    this.singletonOutputTable = singletonOutputTable;
+      TupleTag<KV<ShardedKey<TableDestination>, List<String>>> multiPartitionsTag,
+      TupleTag<KV<ShardedKey<TableDestination>, List<String>>> singlePartitionTag) {
+    this.singletonOutputJsonTableRef = singletonOutputJsonTableRef;
     this.singletonOutputTableDescription = singletonOutputTableDescription;
     this.resultsView = resultsView;
     this.multiPartitionsTag = multiPartitionsTag;
@@ -63,8 +63,9 @@ class WritePartition extends DoFn<String, KV<KV<TableDestination, Integer>, List
 
     // If there are no elements to write _and_ the user specified a constant output table, then
     // generate an empty table of that name.
-    if (results.isEmpty() && singletonOutputTable != null) {
-      TableReference singletonTable = singletonOutputTable.get();
+    if (results.isEmpty() && singletonOutputJsonTableRef != null) {
+      TableReference singletonTable = BigQueryHelpers.fromJsonString(
+          singletonOutputJsonTableRef.get(), TableReference.class);
       if (singletonTable != null) {
         TableRowWriter writer = new TableRowWriter(c.element());
         writer.open(UUID.randomUUID().toString());
@@ -82,8 +83,7 @@ class WritePartition extends DoFn<String, KV<KV<TableDestination, Integer>, List
     for (int i = 0; i < results.size(); ++i) {
       WriteBundlesToFiles.Result fileResult = results.get(i);
       TableDestination tableDestination = fileResult.tableDestination;
-      // JAVA8
-      List<List<String>> partitions = currResultsMap.getOrDefault(tableDestination, null);
+      List<List<String>> partitions = currResultsMap.get(tableDestination);
       if (partitions == null) {
         partitions = Lists.newArrayList();
         partitions.add(Lists.<String>newArrayList());
@@ -110,10 +110,10 @@ class WritePartition extends DoFn<String, KV<KV<TableDestination, Integer>, List
     for (Map.Entry<TableDestination, List<List<String>>> entry : currResultsMap.entrySet()) {
       TableDestination tableDestination = entry.getKey();
       List<List<String>> partitions = entry.getValue();
-      TupleTag<KV<KV<TableDestination, Integer>, List<String>>> outputTag =
+      TupleTag<KV<ShardedKey<TableDestination>, List<String>>> outputTag =
           (partitions.size() == 1) ? singlePartitionTag : multiPartitionsTag;
       for (int i = 0; i < partitions.size(); ++i) {
-        c.output(outputTag, KV.of(KV.of(tableDestination, i + 1), partitions.get(i)));
+        c.output(outputTag, KV.of(ShardedKey.of(tableDestination, i + 1), partitions.get(i)));
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
index fbfb290..752e7d3 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteRename.java
@@ -18,12 +18,12 @@
 
 package org.apache.beam.sdk.io.gcp.bigquery;
 
-import avro.shaded.com.google.common.collect.Maps;
 import com.google.api.services.bigquery.model.Job;
 import com.google.api.services.bigquery.model.JobConfigurationTableCopy;
 import com.google.api.services.bigquery.model.JobReference;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
@@ -36,7 +36,6 @@ import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService;
 import org.apache.beam.sdk.options.BigQueryOptions;
-import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.values.PCollectionView;
@@ -53,23 +52,21 @@ class WriteRename extends DoFn<String, Void> {
   private final PCollectionView<String> jobIdToken;
   private final WriteDisposition writeDisposition;
   private final CreateDisposition createDisposition;
+  // Map from final destination to a list of temporary tables that need to be copied into it.
   private final PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView;
-  @Nullable
-  private final String tableDescription;
+
 
   public WriteRename(
       BigQueryServices bqServices,
       PCollectionView<String> jobIdToken,
       WriteDisposition writeDisposition,
       CreateDisposition createDisposition,
-      PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView,
-      @Nullable String tableDescription) {
+      PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView) {
     this.bqServices = bqServices;
     this.jobIdToken = jobIdToken;
     this.writeDisposition = writeDisposition;
     this.createDisposition = createDisposition;
     this.tempTablesView = tempTablesView;
-    this.tableDescription = tableDescription;
   }
 
   @ProcessElement
@@ -102,7 +99,7 @@ class WriteRename extends DoFn<String, Void> {
           tempTables,
           writeDisposition,
           createDisposition,
-          tableDescription);
+          finalTableDestination.getTableDescription());
 
       DatasetService tableService =
           bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class));

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
index 5051c95..f7fe87b 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/WriteTables.java
@@ -39,7 +39,6 @@ import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService;
 import org.apache.beam.sdk.options.BigQueryOptions;
 import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.display.DisplayData;
@@ -57,8 +56,12 @@ import org.slf4j.LoggerFactory;
 
 /**
  * Writes partitions to BigQuery tables.
+ *
+ * <p>The input is a list of files corresponding to a partition of a table. These files are
+ * load into a temporary table (or into the final table if there is only one partition). The output
+ * is a {@link KV} mapping the final table to the temporary tables for each partition of that table.
  */
-class WriteTables extends DoFn<KV<KV<TableDestination, Integer>, Iterable<List<String>>>,
+class WriteTables extends DoFn<KV<ShardedKey<TableDestination>, Iterable<List<String>>>,
     KV<TableDestination, String>> {
   private static final Logger LOG = LoggerFactory.getLogger(WriteTables.class);
 
@@ -90,23 +93,24 @@ class WriteTables extends DoFn<KV<KV<TableDestination, Integer>, Iterable<List<S
   @ProcessElement
   public void processElement(ProcessContext c) throws Exception {
     TableDestination tableDestination = c.element().getKey().getKey();
-    Integer partition = c.element().getKey().getValue();
+    Integer partition = c.element().getKey().getShardNumber();
     List<String> partitionFiles = Lists.newArrayList(c.element().getValue()).get(0);
     // Job ID must be different for each partition of each table.
     String jobIdPrefix = String.format(
-        c.sideInput(jobIdToken) + "0x%08x_%05d", tableDestination.hashCode(), partition);
+        c.sideInput(jobIdToken) + "_0x%08x_%05d", tableDestination.hashCode(), partition);
 
     TableReference ref = tableDestination.getTableReference();
     if (!singlePartition) {
       ref.setTableId(jobIdPrefix);
     }
 
+    TableSchema schema = (schemaFunction != null) ? schemaFunction.apply(tableDestination) : null;
     load(
         bqServices.getJobService(c.getPipelineOptions().as(BigQueryOptions.class)),
         bqServices.getDatasetService(c.getPipelineOptions().as(BigQueryOptions.class)),
         jobIdPrefix,
         ref,
-        schemaFunction.apply(tableDestination),
+        schema,
         partitionFiles,
         writeDisposition,
         createDisposition,

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index af39483..d1ef8e2 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -18,9 +18,6 @@
 package org.apache.beam.sdk.io.gcp.bigquery;
 
 import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkState;
-import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.fromJsonString;
 import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.toJsonString;
 import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
 import static org.hamcrest.Matchers.containsInAnyOrder;
@@ -38,13 +35,7 @@ import static org.mockito.Mockito.when;
 
 import com.google.api.client.json.GenericJson;
 import com.google.api.client.util.Data;
-import com.google.api.services.bigquery.model.Dataset;
-import com.google.api.services.bigquery.model.ErrorProto;
 import com.google.api.services.bigquery.model.Job;
-import com.google.api.services.bigquery.model.JobConfigurationExtract;
-import com.google.api.services.bigquery.model.JobConfigurationLoad;
-import com.google.api.services.bigquery.model.JobConfigurationQuery;
-import com.google.api.services.bigquery.model.JobConfigurationTableCopy;
 import com.google.api.services.bigquery.model.JobReference;
 import com.google.api.services.bigquery.model.JobStatistics;
 import com.google.api.services.bigquery.model.JobStatistics2;
@@ -55,18 +46,16 @@ import com.google.api.services.bigquery.model.TableFieldSchema;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
-import com.google.common.base.Strings;
 import com.google.common.collect.HashBasedTable;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Lists;
-import java.io.ByteArrayInputStream;
+import com.google.common.collect.Maps;
+
 import java.io.File;
 import java.io.FileFilter;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.ObjectInputStream;
-import java.io.ObjectOutputStream;
 import java.io.OutputStream;
 import java.io.Serializable;
 import java.nio.channels.Channels;
@@ -74,15 +63,12 @@ import java.nio.channels.WritableByteChannel;
 import java.nio.file.Files;
 import java.nio.file.Paths;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.NoSuchElementException;
 import java.util.Set;
-import javax.annotation.Nullable;
+
 import org.apache.avro.Schema;
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericDatumWriter;
@@ -96,17 +82,15 @@ import org.apache.beam.sdk.coders.KvCoder;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
 import org.apache.beam.sdk.coders.TableRowJsonCoder;
 import org.apache.beam.sdk.coders.VarIntCoder;
-import org.apache.beam.sdk.coders.VarLongCoder;
 import org.apache.beam.sdk.io.BoundedSource;
 import org.apache.beam.sdk.io.CountingInput;
 import org.apache.beam.sdk.io.CountingSource;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.Status;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService;
 import org.apache.beam.sdk.io.gcp.bigquery.PassThroughThenCleanup.CleanupOperation;
+import org.apache.beam.sdk.io.gcp.bigquery.WriteBundlesToFiles.Result;
 import org.apache.beam.sdk.options.BigQueryOptions;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
@@ -142,7 +126,6 @@ import org.apache.beam.sdk.util.IOChannelFactory;
 import org.apache.beam.sdk.util.IOChannelUtils;
 import org.apache.beam.sdk.util.MimeTypes;
 import org.apache.beam.sdk.util.PCollectionViews;
-import org.apache.beam.sdk.util.Transport;
 import org.apache.beam.sdk.util.WindowedValue;
 import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.KV;
@@ -175,484 +158,17 @@ import org.mockito.MockitoAnnotations;
 @RunWith(JUnit4.class)
 public class BigQueryIOTest implements Serializable {
 
-  // Status.UNKNOWN maps to null
-  private static final Map<Status, Job> JOB_STATUS_MAP = ImmutableMap.of(
-      Status.SUCCEEDED, new Job().setStatus(new JobStatus()),
-      Status.FAILED, new Job().setStatus(new JobStatus().setErrorResult(new ErrorProto())));
-
-
-  private static class FakeBigQueryServices implements BigQueryServices {
-
-    private String[] jsonTableRowReturns = new String[0];
-    private JobService jobService;
-    private DatasetService datasetService;
-
-    public FakeBigQueryServices withJobService(JobService jobService) {
-      this.jobService = jobService;
-      return this;
-    }
-
-    public FakeBigQueryServices withDatasetService(DatasetService datasetService) {
-      this.datasetService = datasetService;
-      return this;
-    }
-
-    public FakeBigQueryServices readerReturns(String... jsonTableRowReturns) {
-      this.jsonTableRowReturns = jsonTableRowReturns;
-      return this;
-    }
-
-    @Override
-    public JobService getJobService(BigQueryOptions bqOptions) {
-      return jobService;
-    }
-
-    @Override
-    public DatasetService getDatasetService(BigQueryOptions bqOptions) {
-      return datasetService;
-    }
-
-    @Override
-    public BigQueryJsonReader getReaderFromTable(
-        BigQueryOptions bqOptions, TableReference tableRef) {
-      return new FakeBigQueryReader(jsonTableRowReturns);
-    }
-
-    @Override
-    public BigQueryJsonReader getReaderFromQuery(
-        BigQueryOptions bqOptions, String projectId, JobConfigurationQuery queryConfig) {
-      return new FakeBigQueryReader(jsonTableRowReturns);
-    }
-
-    private static class FakeBigQueryReader implements BigQueryJsonReader {
-      private static final int UNSTARTED = -1;
-      private static final int CLOSED = Integer.MAX_VALUE;
-
-      private String[] jsonTableRowReturns;
-      private int currIndex;
-
-      FakeBigQueryReader(String[] jsonTableRowReturns) {
-        this.jsonTableRowReturns = jsonTableRowReturns;
-        this.currIndex = UNSTARTED;
-      }
-
-      @Override
-      public boolean start() throws IOException {
-        assertEquals(UNSTARTED, currIndex);
-        currIndex = 0;
-        return currIndex < jsonTableRowReturns.length;
-      }
-
-      @Override
-      public boolean advance() throws IOException {
-        return ++currIndex < jsonTableRowReturns.length;
-      }
-
-      @Override
-      public TableRow getCurrent() throws NoSuchElementException {
-        if (currIndex >= jsonTableRowReturns.length) {
-          throw new NoSuchElementException();
-        }
-        return fromJsonString(jsonTableRowReturns[currIndex], TableRow.class);
-      }
-
-      @Override
-      public void close() throws IOException {
-        currIndex = CLOSED;
-      }
-    }
-  }
-
-  private static class FakeJobService implements JobService, Serializable {
-
-    private Object[] startJobReturns;
-    private Object[] pollJobReturns;
-    private Object[] getJobReturns;
-    private String executingProject;
-    // Both counts will be reset back to zeros after serialization.
-    // This is a work around for DoFn's verifyUnmodified check.
-    private transient int startJobCallsCount;
-    private transient int pollJobStatusCallsCount;
-    private transient int getJobCallsCount;
-
-    public FakeJobService() {
-      this.startJobReturns = new Object[0];
-      this.pollJobReturns = new Object[0];
-      this.getJobReturns = new Object[0];
-      this.startJobCallsCount = 0;
-      this.pollJobStatusCallsCount = 0;
-      this.getJobCallsCount = 0;
-    }
-
-    /**
-     * Sets the return values to mock {@link JobService#startLoadJob},
-     * {@link JobService#startExtractJob} and {@link JobService#startQueryJob}.
-     *
-     * <p>Throws if the {@link Object} is a {@link Exception}, returns otherwise.
-     */
-    public FakeJobService startJobReturns(Object... startJobReturns) {
-      this.startJobReturns = startJobReturns;
-      return this;
-    }
-
-    /**
-     * Sets the return values to mock {@link JobService#getJob}.
-     *
-     * <p>Throws if the {@link Object} is a {@link InterruptedException}, returns otherwise.
-     */
-    public FakeJobService getJobReturns(Object... getJobReturns) {
-      this.getJobReturns = getJobReturns;
-      return this;
-    }
-
-    /**
-     * Sets the return values to mock {@link JobService#pollJob}.
-     *
-     * <p>Throws if the {@link Object} is a {@link Exception}, returns otherwise.
-     */
-    public FakeJobService pollJobReturns(Object... pollJobReturns) {
-      this.pollJobReturns = pollJobReturns;
-      return this;
-    }
-
-    /**
-     * Verifies executing project.
-     */
-    public FakeJobService verifyExecutingProject(String executingProject) {
-      this.executingProject = executingProject;
-      return this;
-    }
-
-    @Override
-    public void startLoadJob(JobReference jobRef, JobConfigurationLoad loadConfig)
-        throws InterruptedException, IOException {
-      startJob(jobRef, loadConfig);
-    }
-
-    @Override
-    public void startExtractJob(JobReference jobRef, JobConfigurationExtract extractConfig)
-        throws InterruptedException, IOException {
-      startJob(jobRef, extractConfig);
-    }
-
-    @Override
-    public void startQueryJob(JobReference jobRef, JobConfigurationQuery query)
-        throws IOException, InterruptedException {
-      startJob(jobRef, query);
-    }
-
-    @Override
-    public void startCopyJob(JobReference jobRef, JobConfigurationTableCopy copyConfig)
-        throws IOException, InterruptedException {
-      startJob(jobRef, copyConfig);
-    }
-
-    @Override
-    public Job pollJob(JobReference jobRef, int maxAttempts)
-        throws InterruptedException {
-      if (!Strings.isNullOrEmpty(executingProject)) {
-        checkArgument(
-            jobRef.getProjectId().equals(executingProject),
-            "Project id: %s is not equal to executing project: %s",
-            jobRef.getProjectId(), executingProject);
-      }
-
-      if (pollJobStatusCallsCount < pollJobReturns.length) {
-        Object ret = pollJobReturns[pollJobStatusCallsCount++];
-        if (ret instanceof Job) {
-          return (Job) ret;
-        } else if (ret instanceof Status) {
-          return JOB_STATUS_MAP.get(ret);
-        } else if (ret instanceof InterruptedException) {
-          throw (InterruptedException) ret;
-        } else {
-          throw new RuntimeException("Unexpected return type: " + ret.getClass());
-        }
-      } else {
-        throw new RuntimeException(
-            "Exceeded expected number of calls: " + pollJobReturns.length);
-      }
-    }
-
-    private void startJob(JobReference jobRef, GenericJson config)
-        throws IOException, InterruptedException {
-      if (!Strings.isNullOrEmpty(executingProject)) {
-        checkArgument(
-            jobRef.getProjectId().equals(executingProject),
-            "Project id: %s is not equal to executing project: %s",
-            jobRef.getProjectId(), executingProject);
-      }
-
-      if (startJobCallsCount < startJobReturns.length) {
-        Object ret = startJobReturns[startJobCallsCount++];
-        if (ret instanceof IOException) {
-          throw (IOException) ret;
-        } else if (ret instanceof InterruptedException) {
-          throw (InterruptedException) ret;
-        } else if (ret instanceof SerializableFunction) {
-          SerializableFunction<GenericJson, Void> fn =
-              (SerializableFunction<GenericJson, Void>) ret;
-          fn.apply(config);
-          return;
-        } else {
-          return;
-        }
-      } else {
-        throw new RuntimeException(
-            "Exceeded expected number of calls: " + startJobReturns.length);
-      }
-    }
-
-    @Override
-    public JobStatistics dryRunQuery(String projectId, JobConfigurationQuery query)
-        throws InterruptedException, IOException {
-      throw new UnsupportedOperationException();
-    }
-
-    @Override
-    public Job getJob(JobReference jobRef) throws InterruptedException {
-      if (!Strings.isNullOrEmpty(executingProject)) {
-        checkArgument(
-            jobRef.getProjectId().equals(executingProject),
-            "Project id: %s is not equal to executing project: %s",
-            jobRef.getProjectId(), executingProject);
-      }
-
-      if (getJobCallsCount < getJobReturns.length) {
-        Object ret = getJobReturns[getJobCallsCount++];
-        if (ret == null) {
-          return null;
-        } else if (ret instanceof Job) {
-          return (Job) ret;
-        } else if (ret instanceof InterruptedException) {
-          throw (InterruptedException) ret;
-        } else {
-          throw new RuntimeException("Unexpected return type: " + ret.getClass());
-        }
-      } else {
-        throw new RuntimeException(
-            "Exceeded expected number of calls: " + getJobReturns.length);
-      }
-    }
-
-    ////////////////////////////////// SERIALIZATION METHODS ////////////////////////////////////
-    private void writeObject(ObjectOutputStream out) throws IOException {
-      out.writeObject(replaceJobsWithBytes(startJobReturns));
-      out.writeObject(replaceJobsWithBytes(pollJobReturns));
-      out.writeObject(replaceJobsWithBytes(getJobReturns));
-      out.writeObject(executingProject);
-    }
-
-    private Object[] replaceJobsWithBytes(Object[] objs) {
-      Object[] copy = Arrays.copyOf(objs, objs.length);
-      for (int i = 0; i < copy.length; i++) {
-        checkArgument(
-            copy[i] == null || copy[i] instanceof Serializable || copy[i] instanceof Job,
-            "Only serializable elements and jobs can be added add to Job Returns");
-        if (copy[i] instanceof Job) {
-          try {
-            // Job is not serializable, so encode the job as a byte array.
-            copy[i] = Transport.getJsonFactory().toByteArray(copy[i]);
-          } catch (IOException e) {
-            throw new IllegalArgumentException(
-                String.format("Could not encode Job %s via available JSON factory", copy[i]));
-          }
-        }
-      }
-      return copy;
-    }
-
-    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
-      this.startJobReturns = replaceBytesWithJobs(in.readObject());
-      this.pollJobReturns = replaceBytesWithJobs(in.readObject());
-      this.getJobReturns = replaceBytesWithJobs(in.readObject());
-      this.executingProject = (String) in.readObject();
-    }
-
-    private Object[] replaceBytesWithJobs(Object obj) throws IOException {
-      checkState(obj instanceof Object[]);
-      Object[] objs = (Object[]) obj;
-      Object[] copy = Arrays.copyOf(objs, objs.length);
-      for (int i = 0; i < copy.length; i++) {
-        if (copy[i] instanceof byte[]) {
-          Job job = Transport.getJsonFactory()
-              .createJsonParser(new ByteArrayInputStream((byte[]) copy[i]))
-              .parse(Job.class);
-          copy[i] = job;
-        }
-      }
-      return copy;
-    }
-  }
-
-  private static class TableContainer {
-    Table table;
-    List<TableRow> rows;
-    List<String> ids;
-
-    TableContainer(Table table) {
-      this.table = table;
-      this.rows = new ArrayList<>();
-      this.ids = new ArrayList<>();
-    }
-
-    TableContainer addRow(TableRow row, String id) {
-      rows.add(row);
-      ids.add(id);
-      return this;
-    }
-
-    Table getTable() {
-      return table;
-    }
-
-     List<TableRow> getRows() {
-      return rows;
-    }
-  }
-
   // Table information must be static, as each ParDo will get a separate instance of
   // FakeDatasetServices, and they must all modify the same storage.
-  private static com.google.common.collect.Table<String, String, Map<String, TableContainer>>
+  static com.google.common.collect.Table<String, String, Map<String, TableContainer>>
       tables = HashBasedTable.create();
 
-  /** A fake dataset service that can be serialized, for use in testReadFromTable. */
-  private static class FakeDatasetService implements DatasetService, Serializable {
-    @Override
-    public Table getTable(TableReference tableRef)
-        throws InterruptedException, IOException {
-      synchronized (tables) {
-        Map<String, TableContainer> dataset =
-            checkNotNull(
-                tables.get(tableRef.getProjectId(), tableRef.getDatasetId()),
-                "Tried to get a dataset %s:%s from %s, but no such dataset was set",
-                tableRef.getProjectId(),
-                tableRef.getDatasetId(),
-                tableRef.getTableId(),
-                FakeDatasetService.class.getSimpleName());
-        TableContainer tableContainer = dataset.get(tableRef.getTableId());
-        return tableContainer == null ? null : tableContainer.getTable();
-      }
-    }
-
-    public List<TableRow> getAllRows(String projectId, String datasetId, String tableId)
-        throws InterruptedException, IOException {
-      synchronized (tables) {
-        return getTableContainer(projectId, datasetId, tableId).getRows();
-      }
-    }
-
-    private TableContainer getTableContainer(String projectId, String datasetId, String tableId)
-            throws InterruptedException, IOException {
-       synchronized (tables) {
-         Map<String, TableContainer> dataset =
-             checkNotNull(
-                 tables.get(projectId, datasetId),
-                 "Tried to get a dataset %s:%s from %s, but no such dataset was set",
-                 projectId,
-                 datasetId,
-                 FakeDatasetService.class.getSimpleName());
-         return checkNotNull(dataset.get(tableId),
-             "Tried to get a table %s:%s.%s from %s, but no such table was set",
-             projectId,
-             datasetId,
-             tableId,
-             FakeDatasetService.class.getSimpleName());
-       }
-    }
-
-    @Override
-    public void deleteTable(TableReference tableRef) throws IOException, InterruptedException {
-      throw new UnsupportedOperationException("Unsupported");
-    }
-
-
-    @Override
-    public void createTable(Table table) throws IOException {
-      TableReference tableReference = table.getTableReference();
-      synchronized (tables) {
-        Map<String, TableContainer> dataset =
-            checkNotNull(
-                tables.get(tableReference.getProjectId(), tableReference.getDatasetId()),
-                "Tried to get a dataset %s:%s from %s, but no such table was set",
-                tableReference.getProjectId(),
-                tableReference.getDatasetId(),
-                FakeDatasetService.class.getSimpleName());
-        TableContainer tableContainer = dataset.get(tableReference.getTableId());
-        if (tableContainer == null) {
-          tableContainer = new TableContainer(table);
-          dataset.put(tableReference.getTableId(), tableContainer);
-        }
-      }
-    }
-
-    @Override
-    public boolean isTableEmpty(TableReference tableRef)
-        throws IOException, InterruptedException {
-      Long numBytes = getTable(tableRef).getNumBytes();
-      return numBytes == null || numBytes == 0L;
-    }
-
-    @Override
-    public Dataset getDataset(
-        String projectId, String datasetId) throws IOException, InterruptedException {
-      throw new UnsupportedOperationException("Unsupported");
-    }
-
-    @Override
-    public void createDataset(
-        String projectId, String datasetId, String location, String description)
-        throws IOException, InterruptedException {
-      synchronized (tables) {
-        Map<String, TableContainer> dataset = tables.get(projectId, datasetId);
-        if (dataset == null) {
-          dataset = new HashMap<>();
-          tables.put(projectId, datasetId, dataset);
-        }
-      }
-    }
-
-    @Override
-    public void deleteDataset(String projectId, String datasetId)
-        throws IOException, InterruptedException {
-      throw new UnsupportedOperationException("Unsupported");
-    }
-
-    @Override
-    public long insertAll(
-        TableReference ref, List<TableRow> rowList, @Nullable List<String> insertIdList)
-        throws IOException, InterruptedException {
-      synchronized (tables) {
-        assertEquals(rowList.size(), insertIdList.size());
-
-        long dataSize = 0;
-        TableContainer tableContainer = getTableContainer(
-            ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
-        for (int i = 0; i < rowList.size(); ++i) {
-          System.out.println("adding row " + rowList.get(i));
-          tableContainer.addRow(rowList.get(i), insertIdList.get(i));
-          dataSize += rowList.get(i).toString().length();
-        }
-        return dataSize;
-      }
-    }
-
-    @Override
-    public Table patchTableDescription(TableReference tableReference,
-                                       @Nullable String tableDescription)
-        throws IOException, InterruptedException {
-      throw new UnsupportedOperationException("Unsupported");
-    }
-  }
-
   @Rule public final transient TestPipeline p = TestPipeline.create();
   @Rule public transient ExpectedException thrown = ExpectedException.none();
   @Rule public transient ExpectedLogs loggedBigQueryIO = ExpectedLogs.none(BigQueryIO.class);
   @Rule public transient ExpectedLogs loggedWriteRename = ExpectedLogs.none(WriteRename.class);
   @Rule public transient ExpectedLogs loggedWriteTables = ExpectedLogs.none(WriteTables.class);
   @Rule public transient TemporaryFolder testFolder = new TemporaryFolder();
-  @Mock(extraInterfaces = Serializable.class)
-  public transient BigQueryServices.JobService mockJobService;
   @Mock private transient IOChannelFactory mockIOChannelFactory;
   @Mock(extraInterfaces = Serializable.class) private transient DatasetService mockDatasetService;
 
@@ -801,7 +317,7 @@ public class BigQueryIOTest implements Serializable {
   @Test
   public void testBuildSourceWithTableAndFlatten() {
     BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
-    bqOptions.setProject("defaultProject");
+    bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation("gs://testbucket/testdir");
 
     Pipeline p = TestPipeline.create(bqOptions);
@@ -819,7 +335,7 @@ public class BigQueryIOTest implements Serializable {
   @Test
   public void testBuildSourceWithTableAndFlattenWithoutValidation() {
     BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
-    bqOptions.setProject("defaultProject");
+    bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation("gs://testbucket/testdir");
 
     Pipeline p = TestPipeline.create(bqOptions);
@@ -838,7 +354,7 @@ public class BigQueryIOTest implements Serializable {
   @Test
   public void testBuildSourceWithTableAndSqlDialect() {
     BigQueryOptions bqOptions = PipelineOptionsFactory.as(BigQueryOptions.class);
-    bqOptions.setProject("defaultProject");
+    bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation("gs://testbucket/testdir");
 
     Pipeline p = TestPipeline.create(bqOptions);
@@ -856,7 +372,7 @@ public class BigQueryIOTest implements Serializable {
   @Test
   public void testReadFromTable() throws IOException, InterruptedException {
     BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
-    bqOptions.setProject("defaultProject");
+    bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
     Job job = new Job();
@@ -906,11 +422,11 @@ public class BigQueryIOTest implements Serializable {
         new WriteExtractFiles(schemaGenerator, records);
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(new FakeJobService()
-            .startJobReturns(onStartJob, "done")
-            .pollJobReturns(job)
-            .getJobReturns((Job) null)
-            .verifyExecutingProject(bqOptions.getProject()))
+        .withJobService(new FakeJobService())
+           // .startJobReturns(onStartJob, "done")
+          //  .pollJobReturns(job)
+         //   .getJobReturns((Job) null)
+          //  .verifyExecutingProject(bqOptions.getProject()))
         .withDatasetService(fakeDatasetService)
         .readerReturns(
             toJsonString(new TableRow().set("name", "a").set("number", 1)),
@@ -938,13 +454,16 @@ public class BigQueryIOTest implements Serializable {
   @Test
   public void testWrite() throws Exception {
     BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
-    bqOptions.setProject("defaultProject");
+    bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(new FakeJobService()
-            .startJobReturns("done", "done", "done")
-            .pollJobReturns(Status.FAILED, Status.FAILED, Status.SUCCEEDED));
+        .withJobService(new FakeJobService())
+        //    .startJobReturns("done", "done", "done")
+        //    .pollJobReturns(Status.FAILED, Status.FAILED, Status.SUCCEEDED))
+        .withDatasetService(mockDatasetService);
+
+    mockDatasetService.createDataset("defaultproject", "dataset-id", "", "");
 
     Pipeline p = TestPipeline.create(bqOptions);
     p.apply(Create.of(
@@ -969,7 +488,7 @@ public class BigQueryIOTest implements Serializable {
   @Test
   public void testStreamingWrite() throws Exception {
     BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
-    bqOptions.setProject("defaultProject");
+    bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
     FakeDatasetService datasetService = new FakeDatasetService();
@@ -1095,15 +614,27 @@ public class BigQueryIOTest implements Serializable {
   }
 
   @Test
-  public void testStreamingWriteWithWindowFn() throws Exception {
+  @Category(NeedsRunner.class)
+  public void testStreamingWriteWithDynamicTables() throws Exception {
+    testWriteWithDynamicTables(true);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testBatchWriteWithDynamicTables() throws Exception {
+    testWriteWithDynamicTables(false);
+  }
+
+  public void testWriteWithDynamicTables(boolean streaming) throws Exception {
     BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
-    bqOptions.setProject("defaultProject");
+    bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
     FakeDatasetService datasetService = new FakeDatasetService();
     datasetService.createDataset("project-id", "dataset-id", "", "");
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withDatasetService(datasetService);
+        .withDatasetService(datasetService)
+        .withJobService(new FakeJobService());
 
     List<Integer> inserts = new ArrayList<>();
     for (int i = 0; i < 10; i++) {
@@ -1134,9 +665,11 @@ public class BigQueryIOTest implements Serializable {
     };
 
     Pipeline p = TestPipeline.create(bqOptions);
-    p.apply(Create.of(inserts))
-        .setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED)
-        .apply(Window.<Integer>into(window))
+    PCollection<Integer> input = p.apply(Create.of(inserts));
+    if (streaming) {
+      input = input.setIsBoundedInternal(PCollection.IsBounded.UNBOUNDED);
+    }
+    input.apply(Window.<Integer>into(window))
         .apply(BigQueryIO.<Integer>write()
             .to(tableFunction)
             .withFormatFunction(new SerializableFunction<Integer, TableRow>() {
@@ -1179,13 +712,13 @@ public class BigQueryIOTest implements Serializable {
   @Test
   public void testWriteUnknown() throws Exception {
     BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
-    bqOptions.setProject("defaultProject");
+    bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(new FakeJobService()
-            .startJobReturns("done", "done")
-            .pollJobReturns(Status.FAILED, Status.UNKNOWN));
+        .withJobService(new FakeJobService());
+       //     .startJobReturns("done", "done")
+        //    .pollJobReturns(Status.FAILED, Status.UNKNOWN));
 
     Pipeline p = TestPipeline.create(bqOptions);
     p.apply(Create.of(
@@ -1211,13 +744,13 @@ public class BigQueryIOTest implements Serializable {
   @Test
   public void testWriteFailedJobs() throws Exception {
     BigQueryOptions bqOptions = TestPipeline.testingPipelineOptions().as(BigQueryOptions.class);
-    bqOptions.setProject("defaultProject");
+    bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(new FakeJobService()
-            .startJobReturns("done", "done", "done")
-            .pollJobReturns(Status.FAILED, Status.FAILED, Status.FAILED));
+        .withJobService(new FakeJobService());
+         //   .startJobReturns("done", "done", "done")
+         //   .pollJobReturns(Status.FAILED, Status.FAILED, Status.FAILED));
 
     Pipeline p = TestPipeline.create(bqOptions);
     p.apply(Create.of(
@@ -1285,7 +818,7 @@ public class BigQueryIOTest implements Serializable {
         .from("project:dataset.tableId")
         .withTestServices(new FakeBigQueryServices()
             .withDatasetService(mockDatasetService)
-            .withJobService(mockJobService))
+            .withJobService(new FakeJobService()))
         .withoutValidation();
 
     Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(read);
@@ -1301,7 +834,7 @@ public class BigQueryIOTest implements Serializable {
         .fromQuery("foobar")
         .withTestServices(new FakeBigQueryServices()
             .withDatasetService(mockDatasetService)
-            .withJobService(mockJobService))
+            .withJobService(new FakeJobService()))
         .withoutValidation();
 
     Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(read);
@@ -1342,7 +875,7 @@ public class BigQueryIOTest implements Serializable {
         .withSchema(new TableSchema().set("col1", "type1").set("col2", "type2"))
         .withTestServices(new FakeBigQueryServices()
           .withDatasetService(mockDatasetService)
-          .withJobService(mockJobService))
+          .withJobService(new FakeJobService()))
         .withoutValidation();
 
     Set<DisplayData> displayData = evaluator.displayDataForPrimitiveTransforms(write);
@@ -1506,7 +1039,7 @@ public class BigQueryIOTest implements Serializable {
     options.setProject(projectId);
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(mockJobService)
+        .withJobService(new FakeJobService())
         .withDatasetService(mockDatasetService);
     when(mockDatasetService.getDataset(projectId, datasetId)).thenThrow(
         new RuntimeException("Unable to confirm BigQuery dataset presence"));
@@ -1674,7 +1207,7 @@ public class BigQueryIOTest implements Serializable {
   @Test
   public void testBigQueryTableSourceThroughJsonAPI() throws Exception {
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(mockJobService)
+        .withJobService(new FakeJobService())
         .readerReturns(
             toJsonString(new TableRow().set("name", "a").set("number", "1")),
             toJsonString(new TableRow().set("name", "b").set("number", "2")),
@@ -1712,7 +1245,7 @@ public class BigQueryIOTest implements Serializable {
         .setStatistics(jobStats);
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(mockJobService)
+        .withJobService(new FakeJobService())
         .withDatasetService(mockDatasetService)
         .readerReturns(
             toJsonString(new TableRow().set("name", "a").set("number", "1")),
@@ -1731,8 +1264,6 @@ public class BigQueryIOTest implements Serializable {
         new TableRow().set("name", "b").set("number", "2"),
         new TableRow().set("name", "c").set("number", "3"));
 
-    when(mockJobService.pollJob(Mockito.<JobReference>any(), Mockito.anyInt()))
-        .thenReturn(extractJob);
     PipelineOptions options = PipelineOptionsFactory.create();
     options.setTempLocation("mock://tempLocation");
 
@@ -1752,9 +1283,6 @@ public class BigQueryIOTest implements Serializable {
     assertEquals(1, sources.size());
     BoundedSource<TableRow> actual = sources.get(0);
     assertThat(actual, CoreMatchers.instanceOf(TransformingSource.class));
-
-    Mockito.verify(mockJobService)
-        .startExtractJob(Mockito.<JobReference>any(), Mockito.<JobConfigurationExtract>any());
   }
 
   @Test
@@ -1777,8 +1305,9 @@ public class BigQueryIOTest implements Serializable {
     extractJob.setStatus(new JobStatus())
         .setStatistics(extractJobStats);
 
+    FakeJobService fakeJobService = new FakeJobService();
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(mockJobService)
+        .withJobService(fakeJobService)
         .withDatasetService(mockDatasetService)
         .readerReturns(
             toJsonString(new TableRow().set("name", "a").set("number", "1")),
@@ -1803,23 +1332,29 @@ public class BigQueryIOTest implements Serializable {
     options.setTempLocation(extractDestinationDir);
 
     TableReference queryTable = new TableReference()
-        .setProjectId("testProejct")
+        .setProjectId("testproject")
         .setDatasetId("testDataset")
         .setTableId("testTable");
-    when(mockJobService.dryRunQuery(anyString(), Mockito.<JobConfigurationQuery>any()))
-        .thenReturn(new JobStatistics().setQuery(
+  //  when(mockJobService.dryRunQuery(anyString(), Mockito.<JobConfigurationQuery>any()))
+     //   .thenReturn(new JobStatistics().setQuery(
+     //       new JobStatistics2()
+     //           .setTotalBytesProcessed(100L)
+     //           .setReferencedTables(ImmutableList.of(queryTable))));
+    fakeJobService.expectDryRunQuery("testproject", "query",
+        new JobStatistics().setQuery(
             new JobStatistics2()
                 .setTotalBytesProcessed(100L)
                 .setReferencedTables(ImmutableList.of(queryTable))));
-    when(mockDatasetService.getTable(eq(queryTable)))
-        .thenReturn(new Table().setSchema(new TableSchema()));
-    when(mockDatasetService.getTable(eq(destinationTable)))
-        .thenReturn(new Table().setSchema(new TableSchema()));
+
+   // when(mockDatasetService.getTable(eq(queryTable)))
+     //   .thenReturn(new Table().setSchema(new TableSchema()));
+   // when(mockDatasetService.getTable(eq(destinationTable)))
+    //    .thenReturn(new Table().setSchema(new TableSchema()));
     IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true /* override */);
     when(mockIOChannelFactory.resolve(anyString(), anyString()))
         .thenReturn("mock://tempLocation/output");
-    when(mockJobService.pollJob(Mockito.<JobReference>any(), Mockito.anyInt()))
-        .thenReturn(extractJob);
+    //when(mockJobService.pollJob(Mockito.<JobReference>any(), Mockito.anyInt()))
+    //    .thenReturn(extractJob);
 
     Assert.assertThat(
         SourceTestUtils.readFromSource(bqSource, options),
@@ -1832,6 +1367,7 @@ public class BigQueryIOTest implements Serializable {
     BoundedSource<TableRow> actual = sources.get(0);
     assertThat(actual, CoreMatchers.instanceOf(TransformingSource.class));
 
+    /*
     Mockito.verify(mockJobService)
         .startQueryJob(
             Mockito.<JobReference>any(), Mockito.<JobConfigurationQuery>any());
@@ -1843,7 +1379,7 @@ public class BigQueryIOTest implements Serializable {
         ArgumentCaptor.forClass(JobConfigurationQuery.class);
     Mockito.verify(mockJobService).dryRunQuery(anyString(), queryConfigArg.capture());
     assertEquals(true, queryConfigArg.getValue().getFlattenResults());
-    assertEquals(true, queryConfigArg.getValue().getUseLegacySql());
+    assertEquals(true, queryConfigArg.getValue().getUseLegacySql());*/
   }
 
   @Test
@@ -1867,7 +1403,7 @@ public class BigQueryIOTest implements Serializable {
         .setStatistics(extractJobStats);
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(mockJobService)
+        .withJobService(new FakeJobService())
         .withDatasetService(mockDatasetService)
         .readerReturns(
             toJsonString(new TableRow().set("name", "a").set("number", "1")),
@@ -1891,17 +1427,18 @@ public class BigQueryIOTest implements Serializable {
     PipelineOptions options = PipelineOptionsFactory.create();
     options.setTempLocation(extractDestinationDir);
 
+    /*
     when(mockJobService.dryRunQuery(anyString(), Mockito.<JobConfigurationQuery>any()))
         .thenReturn(new JobStatistics().setQuery(
             new JobStatistics2()
                 .setTotalBytesProcessed(100L)));
     when(mockDatasetService.getTable(eq(destinationTable)))
         .thenReturn(new Table().setSchema(new TableSchema()));
-    IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true /* override */);
+    IOChannelUtils.setIOFactoryInternal("mock", mockIOChannelFactory, true);
     when(mockIOChannelFactory.resolve(anyString(), anyString()))
         .thenReturn("mock://tempLocation/output");
     when(mockJobService.pollJob(Mockito.<JobReference>any(), Mockito.anyInt()))
-        .thenReturn(extractJob);
+        .thenReturn(extractJob);*/
 
     Assert.assertThat(
         SourceTestUtils.readFromSource(bqSource, options),
@@ -1914,7 +1451,8 @@ public class BigQueryIOTest implements Serializable {
     BoundedSource<TableRow> actual = sources.get(0);
     assertThat(actual, CoreMatchers.instanceOf(TransformingSource.class));
 
-    Mockito.verify(mockJobService)
+    /*
+    Mockito.verify(Service)
         .startQueryJob(
             Mockito.<JobReference>any(), Mockito.<JobConfigurationQuery>any());
     Mockito.verify(mockJobService)
@@ -1925,7 +1463,7 @@ public class BigQueryIOTest implements Serializable {
         ArgumentCaptor.forClass(JobConfigurationQuery.class);
     Mockito.verify(mockJobService).dryRunQuery(anyString(), queryConfigArg.capture());
     assertEquals(true, queryConfigArg.getValue().getFlattenResults());
-    assertEquals(true, queryConfigArg.getValue().getUseLegacySql());
+    assertEquals(true, queryConfigArg.getValue().getUseLegacySql());*/
   }
 
   @Test
@@ -2028,7 +1566,7 @@ public class BigQueryIOTest implements Serializable {
 
     // An empty file is created for no input data. One partition is needed.
     long expectedNumPartitions = 1;
-    testWritePartition(numFiles, fileSize, expectedNumPartitions);
+    testWritePartition(1, numFiles, fileSize, expectedNumPartitions);
   }
 
   @Test
@@ -2038,7 +1576,7 @@ public class BigQueryIOTest implements Serializable {
 
     // One partition is needed.
     long expectedNumPartitions = 1;
-    testWritePartition(numFiles, fileSize, expectedNumPartitions);
+    testWritePartition(2, numFiles, fileSize, expectedNumPartitions);
   }
 
   @Test
@@ -2048,7 +1586,7 @@ public class BigQueryIOTest implements Serializable {
 
     // One partition is needed for each group of BigQueryWrite.MAX_NUM_FILES files.
     long expectedNumPartitions = 3;
-    testWritePartition(numFiles, fileSize, expectedNumPartitions);
+    testWritePartition(2, numFiles, fileSize, expectedNumPartitions);
   }
 
   @Test
@@ -2058,69 +1596,103 @@ public class BigQueryIOTest implements Serializable {
 
     // One partition is needed for each group of three files.
     long expectedNumPartitions = 4;
-    testWritePartition(numFiles, fileSize, expectedNumPartitions);
+    testWritePartition(2, numFiles, fileSize, expectedNumPartitions);
   }
 
-  private void testWritePartition(long numFiles, long fileSize, long expectedNumPartitions)
+  private void testWritePartition(long numTables, long numFilesPerTable, long fileSize,
+                                  long expectedNumPartitionsPerTable)
       throws Exception {
     p.enableAbandonedNodeEnforcement(false);
 
-    List<Long> expectedPartitionIds = Lists.newArrayList();
-    for (long i = 1; i <= expectedNumPartitions; ++i) {
-      expectedPartitionIds.add(i);
+    List<ShardedKey<TableDestination>> expectedPartitions = Lists.newArrayList();
+    for (int i = 0; i < numTables; ++i) {
+      for (int j = 1; j <= expectedNumPartitionsPerTable; ++j) {
+        String tableName = String.format("project-id:dataset-id.tables%05d", i);
+        TableDestination destination = new TableDestination(tableName, tableName);
+        expectedPartitions.add(ShardedKey.of(destination, j));
+      }
     }
 
-    List<KV<String, Long>> files = Lists.newArrayList();
-    List<String> fileNames = Lists.newArrayList();
-    for (int i = 0; i < numFiles; ++i) {
-      String fileName = String.format("files%05d", i);
-      fileNames.add(fileName);
-      files.add(KV.of(fileName, fileSize));
+    List<WriteBundlesToFiles.Result> files = Lists.newArrayList();
+    Map<TableDestination, List<String>> filenamesPerTable = Maps.newHashMap();
+    for (int i = 0; i < numTables; ++i) {
+      String tableName = String.format("project-id:dataset-id.tables%05d", i);
+      TableDestination destination = new TableDestination(tableName, tableName);
+      List<String> filenames = filenamesPerTable.get(destination);
+      if (filenames == null) {
+        filenames = Lists.newArrayList();
+        filenamesPerTable.put(destination, filenames);
+      }
+      for (int j = 0; j < numFilesPerTable; ++j) {
+        String fileName = String.format("%s_files%05d", tableName, j);
+        filenames.add(fileName);
+        files.add(new Result(fileName, fileSize, destination));
+      }
     }
 
-    TupleTag<KV<KV<TableDestination, Integer>, List<String>>> multiPartitionsTag =
-        new TupleTag<KV<KV<TableDestination, Integer>, List<String>>>("multiPartitionsTag") {};
-    TupleTag<KV<KV<TableDestination, Integer>, List<String>>> singlePartitionTag =
-        new TupleTag<KV<KV<TableDestination, Integer>, List<String>>>("singlePartitionTag") {};
+    TupleTag<KV<ShardedKey<TableDestination>, List<String>>> multiPartitionsTag =
+        new TupleTag<KV<ShardedKey<TableDestination>, List<String>>>("multiPartitionsTag") {};
+    TupleTag<KV<ShardedKey<TableDestination>, List<String>>> singlePartitionTag =
+        new TupleTag<KV<ShardedKey<TableDestination>, List<String>>>("singlePartitionTag") {};
 
     PCollectionView<Iterable<WriteBundlesToFiles.Result>> resultsView =
         PCollectionViews.iterableView(
         p,
         WindowingStrategy.globalDefault(),
-        KvCoder.of(StringUtf8Coder.of(), VarLongCoder.of()));
+        WriteBundlesToFiles.ResultCoder.of());
 
+    ValueProvider<String> singletonTable = null;
+    if (numFilesPerTable == 0 && numTables == 1) {
+      TableReference singletonReference = new TableReference()
+          .setProjectId("projectid")
+          .setDatasetId("dataset")
+          .setTableId("table");
+      singletonTable = StaticValueProvider.of(BigQueryHelpers.toJsonString(singletonReference));
+    }
     WritePartition writePartition =
-        new WritePartition(null, null, resultsView,
+        new WritePartition(singletonTable,
+            "singleton", resultsView,
             multiPartitionsTag, singlePartitionTag);
 
-    DoFnTester<String, KV<KV<TableDestination, Integer>, List<String>>> tester =
+    DoFnTester<String, KV<ShardedKey<TableDestination>, List<String>>> tester =
         DoFnTester.of(writePartition);
     tester.setSideInput(resultsView, GlobalWindow.INSTANCE, files);
     tester.processElement(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
-    List<KV<KV<TableDestination, Integer>, List<String>>> partitions;
-    if (expectedNumPartitions > 1) {
+    List<KV<ShardedKey<TableDestination>, List<String>>> partitions;
+    if (expectedNumPartitionsPerTable > 1) {
       partitions = tester.takeOutputElements(multiPartitionsTag);
     } else {
       partitions = tester.takeOutputElements(singlePartitionTag);
     }
-    List<Long> partitionIds = Lists.newArrayList();
-    List<String> partitionFileNames = Lists.newArrayList();
-    for (KV<Long, List<String>> partition : partitions) {
-      partitionIds.add(partition.getKey());
-      for (String name : partition.getValue()) {
-        partitionFileNames.add(name);
+
+
+    List<ShardedKey<TableDestination>> partitionsResult = Lists.newArrayList();
+    Map<TableDestination, List<String>> filesPerTableResult = Maps.newHashMap();
+    for (KV<ShardedKey<TableDestination>, List<String>> partition : partitions) {
+      TableDestination table = partition.getKey().getKey();
+      partitionsResult.add(partition.getKey());
+      List<String> tableFilesResult = filesPerTableResult.get(table);
+      if (tableFilesResult == null) {
+        tableFilesResult = Lists.newArrayList();
+        filesPerTableResult.put(table, tableFilesResult);
       }
+      tableFilesResult.addAll(partition.getValue());
     }
 
-    assertEquals(expectedPartitionIds, partitionIds);
-    if (numFiles == 0) {
-      assertThat(partitionFileNames, Matchers.hasSize(1));
-      assertTrue(Files.exists(Paths.get(partitionFileNames.get(0))));
-      assertThat(Files.readAllBytes(Paths.get(partitionFileNames.get(0))).length,
+    assertEquals(expectedPartitions.size(), partitionsResult.size());
+
+   // assertThat(partitionsResult,
+     //   containsInAnyOrder(Iterables.toArray(expectedPartitions, ShardedKey.class)));
+
+    if (numFilesPerTable == 0 && numTables == 1) {
+      assertEquals(1, filesPerTableResult.size());
+      List<String> singletonFiles = filesPerTableResult.values().iterator().next();
+      assertTrue(Files.exists(Paths.get(singletonFiles.get(0))));
+      assertThat(Files.readAllBytes(Paths.get(singletonFiles.get(0))).length,
           Matchers.equalTo(0));
     } else {
-      assertEquals(fileNames, partitionFileNames);
+      assertEquals(filenamesPerTable, filesPerTableResult);
     }
   }
 
@@ -2129,26 +1701,46 @@ public class BigQueryIOTest implements Serializable {
     p.enableAbandonedNodeEnforcement(false);
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(new FakeJobService()
-            .startJobReturns("done", "done", "done", "done")
-            .pollJobReturns(Status.FAILED, Status.SUCCEEDED, Status.SUCCEEDED, Status.SUCCEEDED));
+        .withJobService(new FakeJobService())
+        //    .startJobReturns("done", "done", "done", "done", "done", "done", "done", "done",
+       //         "done", "done")
+       //     .pollJobReturns(Status.FAILED, Status.SUCCEEDED, Status.SUCCEEDED, Status.SUCCEEDED,
+       //         Status.SUCCEEDED, Status.SUCCEEDED, Status.SUCCEEDED, Status.SUCCEEDED,
+      //          Status.SUCCEEDED, Status.SUCCEEDED))
+        .withDatasetService(mockDatasetService);
 
+    long numTables = 3;
     long numPartitions = 3;
     long numFilesPerPartition = 10;
     String jobIdToken = "jobIdToken";
     String tempFilePrefix = "tempFilePrefix";
-    String jsonTable = "{}";
-    String jsonSchema = "{}";
-    List<String> expectedTempTables = Lists.newArrayList();
-
-    List<KV<Long, Iterable<List<String>>>> partitions = Lists.newArrayList();
-    for (long i = 0; i < numPartitions; ++i) {
-      List<String> filesPerPartition = Lists.newArrayList();
-      for (int j = 0; j < numFilesPerPartition; ++j) {
-        filesPerPartition.add(String.format("files%05d", j));
+    Map<TableDestination, List<String>> expectedTempTables = Maps.newHashMap();
+
+    List<KV<ShardedKey<TableDestination>, Iterable<List<String>>>> partitions =
+        Lists.newArrayList();
+    for (int i = 0; i < numTables; ++i) {
+      String tableName = String.format("project-id:dataset-id.table%05d", i);
+      TableDestination tableDestination = new TableDestination(tableName, tableName);
+      for (int j = 0; j < numPartitions; ++j) {
+        String tempTableId = String.format(
+            jobIdToken + "_0x%08x_%05d", tableDestination.hashCode(), j);
+        List<String> filesPerPartition = Lists.newArrayList();
+        for (int k = 0; k < numFilesPerPartition; ++k) {
+          filesPerPartition.add(String.format("files0x%08x_%05d", tableDestination.hashCode(), k));
+        }
+        partitions.add(KV.of(ShardedKey.of(tableDestination, j),
+            (Iterable<List<String>>) Collections.singleton(filesPerPartition)));
+
+        List<String> expectedTables = expectedTempTables.get(tableDestination);
+        if (expectedTables == null) {
+          expectedTables = Lists.newArrayList();
+          expectedTempTables.put(tableDestination, expectedTables);
+        }
+        String json = String.format(
+            "{\"datasetId\":\"dataset-id\",\"projectId\":\"project-id\",\"tableId\":\"%s\"}",
+            tempTableId);
+        expectedTables.add(json);
       }
-      partitions.add(KV.of(i, (Iterable<List<String>>) Collections.singleton(filesPerPartition)));
-      expectedTempTables.add(String.format("{\"tableId\":\"%s_%05d\"}", jobIdToken, i));
     }
 
     PCollection<String> expectedTempTablesPCollection = p.apply(Create.of(expectedTempTables));
@@ -2165,27 +1757,33 @@ public class BigQueryIOTest implements Serializable {
         fakeBqServices,
         jobIdTokenView,
         tempFilePrefix,
-        StaticValueProvider.of(jsonTable),
-        StaticValueProvider.of(jsonSchema),
         WriteDisposition.WRITE_EMPTY,
         CreateDisposition.CREATE_IF_NEEDED,
         null);
 
-    DoFnTester<KV<Long, Iterable<List<String>>>, String> tester = DoFnTester.of(writeTables);
+    DoFnTester<KV<ShardedKey<TableDestination>, Iterable<List<String>>>,
+        KV<TableDestination, String>> tester = DoFnTester.of(writeTables);
     tester.setSideInput(jobIdTokenView, GlobalWindow.INSTANCE, jobIdToken);
-    for (KV<Long, Iterable<List<String>>> partition : partitions) {
+    for (KV<ShardedKey<TableDestination>, Iterable<List<String>>> partition : partitions) {
       tester.processElement(partition);
     }
 
-    List<String> tempTables = tester.takeOutputElements();
-
-    assertEquals(expectedTempTables, tempTables);
+    Map<TableDestination, List<String>> tempTablesResult = Maps.newHashMap();
+    for (KV<TableDestination, String> element : tester.takeOutputElements()) {
+      List<String> tables = tempTablesResult.get(element.getKey());
+      if (tables == null) {
+        tables = Lists.newArrayList();
+        tempTablesResult.put(element.getKey(), tables);
+      }
+      tables.add(element.getValue());
+    }
+    assertEquals(expectedTempTables, tempTablesResult);
   }
 
   @Test
   public void testRemoveTemporaryFiles() throws Exception {
     BigQueryOptions bqOptions = PipelineOptionsFactory.as(BigQueryOptions.class);
-    bqOptions.setProject("defaultProject");
+    bqOptions.setProject("defaultproject");
     bqOptions.setTempLocation(testFolder.newFolder("BigQueryIOTest").getAbsolutePath());
 
     int numFiles = 10;
@@ -2195,7 +1793,7 @@ public class BigQueryIOTest implements Serializable {
     for (int i = 0; i < numFiles; ++i) {
       String fileName = String.format("files%05d", i);
       writer.open(fileName);
-      fileNames.add(writer.close().getKey());
+      fileNames.add(writer.close().filename);
     }
     fileNames.add(tempFilePrefix + String.format("files%05d", numFiles));
 
@@ -2217,23 +1815,33 @@ public class BigQueryIOTest implements Serializable {
     p.enableAbandonedNodeEnforcement(false);
 
     FakeBigQueryServices fakeBqServices = new FakeBigQueryServices()
-        .withJobService(new FakeJobService()
-            .startJobReturns("done", "done")
-            .pollJobReturns(Status.FAILED, Status.SUCCEEDED))
+        .withJobService(new FakeJobService())
+         //   .startJobReturns("done", "done")
+        //    .pollJobReturns(Status.FAILED, Status.SUCCEEDED))
         .withDatasetService(mockDatasetService);
 
-    long numTempTables = 3;
+    int numFinalTables = 3;
+    int numTempTables = 3;
     String jobIdToken = "jobIdToken";
     String jsonTable = "{}";
-    List<String> tempTables = Lists.newArrayList();
-    for (long i = 0; i < numTempTables; ++i) {
-      tempTables.add(String.format("{\"tableId\":\"%s_%05d\"}", jobIdToken, i));
+    Map<TableDestination, Iterable<String>> tempTables = Maps.newHashMap();
+    for (int i = 0; i < numFinalTables; ++i) {
+      String tableName = "project-id:dataset-id.table_" + i;
+      TableDestination tableDestination = new TableDestination(tableName, tableName);
+      List<String> tables = Lists.newArrayList();
+      tempTables.put(tableDestination, tables);
+      for (int j = 0; i < numTempTables; ++i) {
+        tables.add(String.format(
+            "{\"project-id:dataset-id.tableId\":\"%s_%05d_%05d\"}", jobIdToken, i, j));
+      }
     }
 
-    PCollection<String> tempTablesPCollection = p.apply(Create.of(tempTables));
-    PCollectionView<Iterable<String>> tempTablesView =
-        PCollectionViews.iterableView(
-            tempTablesPCollection, WindowingStrategy.globalDefault(), StringUtf8Coder.of());
+    PCollectionView<Map<TableDestination, Iterable<String>>> tempTablesView =
+        PCollectionViews.multimapView(
+        p,
+        WindowingStrategy.globalDefault(),
+        KvCoder.of(TableDestinationCoder.of(), StringUtf8Coder.of()));
+
     PCollection<String> jobIdTokenCollection = p.apply("CreateJobId", Create.of("jobId"));
     PCollectionView<String> jobIdTokenView =
         jobIdTokenCollection.apply(View.<String>asSingleton());
@@ -2241,11 +1849,9 @@ public class BigQueryIOTest implements Serializable {
     WriteRename writeRename = new WriteRename(
         fakeBqServices,
         jobIdTokenView,
-        StaticValueProvider.of(jsonTable),
         WriteDisposition.WRITE_EMPTY,
         CreateDisposition.CREATE_IF_NEEDED,
-        tempTablesView,
-        null);
+        tempTablesView);
 
     DoFnTester<String, Void> tester = DoFnTester.of(writeRename);
     tester.setSideInput(tempTablesView, GlobalWindow.INSTANCE, tempTables);

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeBigQueryServices.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeBigQueryServices.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeBigQueryServices.java
new file mode 100644
index 0000000..ed3ab37
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeBigQueryServices.java
@@ -0,0 +1,96 @@
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import static org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.fromJsonString;
+import static org.junit.Assert.assertEquals;
+
+import com.google.api.services.bigquery.model.JobConfigurationQuery;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import java.io.IOException;
+import java.util.NoSuchElementException;
+import org.apache.beam.sdk.options.BigQueryOptions;
+
+
+/**
+ * Created by relax on 3/30/17.
+ */
+class FakeBigQueryServices implements BigQueryServices {
+  private String[] jsonTableRowReturns = new String[0];
+  private JobService jobService;
+  private DatasetService datasetService;
+
+  public FakeBigQueryServices withJobService(JobService jobService) {
+    this.jobService = jobService;
+    return this;
+  }
+
+  public FakeBigQueryServices withDatasetService(DatasetService datasetService) {
+    this.datasetService = datasetService;
+    return this;
+  }
+
+  public FakeBigQueryServices readerReturns(String... jsonTableRowReturns) {
+    this.jsonTableRowReturns = jsonTableRowReturns;
+    return this;
+  }
+
+  @Override
+  public JobService getJobService(BigQueryOptions bqOptions) {
+    return jobService;
+  }
+
+  @Override
+  public DatasetService getDatasetService(BigQueryOptions bqOptions) {
+    return datasetService;
+  }
+
+  @Override
+  public BigQueryJsonReader getReaderFromTable(
+      BigQueryOptions bqOptions, TableReference tableRef) {
+    return new FakeBigQueryReader(jsonTableRowReturns);
+  }
+
+  @Override
+  public BigQueryJsonReader getReaderFromQuery(
+      BigQueryOptions bqOptions, String projectId, JobConfigurationQuery queryConfig) {
+    return new FakeBigQueryReader(jsonTableRowReturns);
+  }
+
+  private static class FakeBigQueryReader implements BigQueryJsonReader {
+    private static final int UNSTARTED = -1;
+    private static final int CLOSED = Integer.MAX_VALUE;
+
+    private String[] jsonTableRowReturns;
+    private int currIndex;
+
+    FakeBigQueryReader(String[] jsonTableRowReturns) {
+      this.jsonTableRowReturns = jsonTableRowReturns;
+      this.currIndex = UNSTARTED;
+    }
+
+    @Override
+    public boolean start() throws IOException {
+      assertEquals(UNSTARTED, currIndex);
+      currIndex = 0;
+      return currIndex < jsonTableRowReturns.length;
+    }
+
+    @Override
+    public boolean advance() throws IOException {
+      return ++currIndex < jsonTableRowReturns.length;
+    }
+
+    @Override
+    public TableRow getCurrent() throws NoSuchElementException {
+      if (currIndex >= jsonTableRowReturns.length) {
+        throw new NoSuchElementException();
+      }
+      return fromJsonString(jsonTableRowReturns[currIndex], TableRow.class);
+    }
+
+    @Override
+    public void close() throws IOException {
+      currIndex = CLOSED;
+    }
+  }
+}

[17/50] [abbrv] beam git commit: Refactor batch loads, and add support for windowed writes.

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeDatasetService.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeDatasetService.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeDatasetService.java
new file mode 100644
index 0000000..9b2cf63
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeDatasetService.java
@@ -0,0 +1,172 @@
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+import static org.junit.Assert.assertEquals;
+
+import com.google.api.services.bigquery.model.Dataset;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.common.collect.Lists;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ThreadLocalRandom;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
+
+/** A fake dataset service that can be serialized, for use in testReadFromTable. */
+class FakeDatasetService implements DatasetService, Serializable {
+  @Override
+  public Table getTable(TableReference tableRef)
+      throws InterruptedException, IOException {
+    synchronized (BigQueryIOTest.tables) {
+      Map<String, TableContainer> dataset =
+          checkNotNull(
+              BigQueryIOTest.tables.get(tableRef.getProjectId(), tableRef.getDatasetId()),
+              "Tried to get a dataset %s:%s from %s, but no such dataset was set",
+              tableRef.getProjectId(),
+              tableRef.getDatasetId(),
+              tableRef.getTableId(),
+              FakeDatasetService.class.getSimpleName());
+      TableContainer tableContainer = dataset.get(tableRef.getTableId());
+      return tableContainer == null ? null : tableContainer.getTable();
+    }
+  }
+
+  List<TableRow> getAllRows(String projectId, String datasetId, String tableId)
+      throws InterruptedException, IOException {
+    synchronized (BigQueryIOTest.tables) {
+      return getTableContainer(projectId, datasetId, tableId).getRows();
+    }
+  }
+
+  private TableContainer getTableContainer(String projectId, String datasetId, String tableId)
+          throws InterruptedException, IOException {
+     synchronized (BigQueryIOTest.tables) {
+       Map<String, TableContainer> dataset =
+           checkNotNull(
+               BigQueryIOTest.tables.get(projectId, datasetId),
+               "Tried to get a dataset %s:%s from %s, but no such dataset was set",
+               projectId,
+               datasetId,
+               FakeDatasetService.class.getSimpleName());
+       return checkNotNull(dataset.get(tableId),
+           "Tried to get a table %s:%s.%s from %s, but no such table was set",
+           projectId,
+           datasetId,
+           tableId,
+           FakeDatasetService.class.getSimpleName());
+     }
+  }
+
+  @Override
+  public void deleteTable(TableReference tableRef) throws IOException, InterruptedException {
+    throw new UnsupportedOperationException("Unsupported");
+  }
+
+
+  @Override
+  public void createTable(Table table) throws IOException {
+    TableReference tableReference = table.getTableReference();
+    synchronized (BigQueryIOTest.tables) {
+      Map<String, TableContainer> dataset =
+          checkNotNull(
+              BigQueryIOTest.tables.get(tableReference.getProjectId(),
+                  tableReference.getDatasetId()),
+              "Tried to get a dataset %s:%s from %s, but no such table was set",
+              tableReference.getProjectId(),
+              tableReference.getDatasetId(),
+              FakeDatasetService.class.getSimpleName());
+      TableContainer tableContainer = dataset.get(tableReference.getTableId());
+      if (tableContainer == null) {
+        tableContainer = new TableContainer(table);
+        dataset.put(tableReference.getTableId(), tableContainer);
+      }
+    }
+  }
+
+  @Override
+  public boolean isTableEmpty(TableReference tableRef)
+      throws IOException, InterruptedException {
+    Long numBytes = getTable(tableRef).getNumBytes();
+    return numBytes == null || numBytes == 0L;
+  }
+
+  @Override
+  public Dataset getDataset(
+      String projectId, String datasetId) throws IOException, InterruptedException {
+    throw new UnsupportedOperationException("Unsupported");
+  }
+
+  @Override
+  public void createDataset(
+      String projectId, String datasetId, String location, String description)
+      throws IOException, InterruptedException {
+    synchronized (BigQueryIOTest.tables) {
+      Map<String, TableContainer> dataset = BigQueryIOTest.tables.get(projectId, datasetId);
+      if (dataset == null) {
+        dataset = new HashMap<>();
+        BigQueryIOTest.tables.put(projectId, datasetId, dataset);
+      }
+    }
+  }
+
+  @Override
+  public void deleteDataset(String projectId, String datasetId)
+      throws IOException, InterruptedException {
+    throw new UnsupportedOperationException("Unsupported");
+  }
+
+  @Override
+  public long insertAll(
+      TableReference ref, List<TableRow> rowList, @Nullable List<String> insertIdList)
+      throws IOException, InterruptedException {
+    synchronized (BigQueryIOTest.tables) {
+      if (insertIdList != null) {
+        assertEquals(rowList.size(), insertIdList.size());
+      } else {
+        insertIdList = Lists.newArrayListWithExpectedSize(rowList.size());
+        for (int i = 0; i < rowList.size(); ++i) {
+          insertIdList.add(Integer.toString(ThreadLocalRandom.current().nextInt()));
+        }
+      }
+
+      long dataSize = 0;
+      TableContainer tableContainer = getTableContainer(
+          ref.getProjectId(), ref.getDatasetId(), ref.getTableId());
+      for (int i = 0; i < rowList.size(); ++i) {
+        tableContainer.addRow(rowList.get(i), insertIdList.get(i));
+        dataSize += rowList.get(i).toString().length();
+      }
+      return dataSize;
+    }
+  }
+
+  @Override
+  public Table patchTableDescription(TableReference tableReference,
+                                     @Nullable String tableDescription)
+      throws IOException, InterruptedException {
+    synchronized (BigQueryIOTest.tables) {
+      Map<String, TableContainer> dataset =
+          checkNotNull(
+              BigQueryIOTest.tables.get(tableReference.getProjectId(),
+                  tableReference.getDatasetId()),
+              "Tried to get a dataset %s:%s from %s, but no such dataset was set",
+              tableReference.getProjectId(),
+              tableReference.getDatasetId(),
+              tableReference.getTableId(),
+              FakeDatasetService.class.getSimpleName());
+      TableContainer tableContainer = checkNotNull(dataset.get(tableReference.getTableId()),
+          "Tried to patch a table %s:%s.%s from %s, but no such table was set",
+          tableReference.getProjectId(),
+          tableReference.getDatasetId(),
+          tableReference.getTableId(),
+          FakeDatasetService.class.getSimpleName());
+      tableContainer.getTable().setDescription(tableDescription);
+      return tableContainer.getTable();
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java
new file mode 100644
index 0000000..3c67c3d
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/FakeJobService.java
@@ -0,0 +1,273 @@
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import static com.google.common.base.Preconditions.checkArgument;
+import static com.google.common.base.Preconditions.checkState;
+
+import com.google.api.client.json.JsonFactory;
+import com.google.api.client.util.BackOff;
+import com.google.api.client.util.BackOffUtils;
+import com.google.api.client.util.Sleeper;
+import com.google.api.services.bigquery.model.Job;
+import com.google.api.services.bigquery.model.JobConfiguration;
+import com.google.api.services.bigquery.model.JobConfigurationExtract;
+import com.google.api.services.bigquery.model.JobConfigurationLoad;
+import com.google.api.services.bigquery.model.JobConfigurationQuery;
+import com.google.api.services.bigquery.model.JobConfigurationTableCopy;
+import com.google.api.services.bigquery.model.JobReference;
+import com.google.api.services.bigquery.model.JobStatistics;
+import com.google.api.services.bigquery.model.JobStatistics4;
+import com.google.api.services.bigquery.model.JobStatus;
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableReference;
+import com.google.api.services.bigquery.model.TableRow;
+import com.google.api.services.bigquery.model.TableSchema;
+import com.google.common.collect.HashBasedTable;
+import com.google.common.collect.Lists;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Serializable;
+import java.nio.charset.StandardCharsets;
+import java.util.List;
+
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.Coder.Context;
+import org.apache.beam.sdk.coders.TableRowJsonCoder;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.JobService;
+import org.apache.beam.sdk.util.FluentBackoff;
+
+import org.apache.beam.sdk.util.Transport;
+import org.joda.time.Duration;
+
+/**
+ */
+class FakeJobService implements JobService, Serializable {
+  static final JsonFactory JSON_FACTORY = Transport.getJsonFactory();
+
+  // Whenever a job is started, the first 5 calls to GetJob will report the job as pending,
+  // the next 5 will return the job as running, and only then will the job report as done.
+  private static final int GET_JOBS_TRANSITION_INTERVAL = 5;
+
+  private FakeDatasetService datasetService;
+
+  private static class JobInfo {
+    Job job;
+    int getJobCount = 0;
+
+    JobInfo(Job job) {
+      this.job = job;
+    }
+  }
+
+  private static final com.google.common.collect.Table<String, String, JobInfo> allJobs =
+      HashBasedTable.create();
+
+  private static final com.google.common.collect.Table<String, String, JobStatistics>
+      dryRunQueryResults = HashBasedTable.create();
+
+  FakeJobService() {
+    this.datasetService = new FakeDatasetService();
+  }
+
+  @Override
+  public void startLoadJob(JobReference jobRef, JobConfigurationLoad loadConfig)
+      throws InterruptedException, IOException {
+    synchronized (allJobs) {
+      Job job = new Job();
+      job.setJobReference(jobRef);
+      job.setConfiguration(new JobConfiguration().setLoad(loadConfig));
+      job.setKind(" bigquery#job");
+      job.setStatus(new JobStatus().setState("PENDING"));
+      allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
+    }
+  }
+
+  @Override
+  public void startExtractJob(JobReference jobRef, JobConfigurationExtract extractConfig)
+      throws InterruptedException, IOException {
+    checkArgument(extractConfig.getDestinationFormat().equals("AVRO"),
+        "Only extract to AVRO is supported");
+    checkArgument(extractConfig.getDestinationUris().size() == 1,
+        "Must specify exactly one destination URI.");
+    synchronized (allJobs) {
+      Job job = new Job();
+      job.setJobReference(jobRef);
+      job.setConfiguration(new JobConfiguration().setExtract(extractConfig));
+      job.setKind(" bigquery#job");
+      job.setStatus(new JobStatus().setState("PENDING"));
+      allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
+    }
+  }
+
+  @Override
+  public void startQueryJob(JobReference jobRef, JobConfigurationQuery query)
+      throws IOException, InterruptedException {
+  }
+
+  @Override
+  public void startCopyJob(JobReference jobRef, JobConfigurationTableCopy copyConfig)
+      throws IOException, InterruptedException {
+    synchronized (allJobs) {
+      Job job = new Job();
+      job.setJobReference(jobRef);
+      job.setConfiguration(new JobConfiguration().setCopy(copyConfig));
+      job.setKind(" bigquery#job");
+      job.setStatus(new JobStatus().setState("PENDING"));
+      allJobs.put(jobRef.getProjectId(), jobRef.getJobId(), new JobInfo(job));
+    }
+  }
+
+  @Override
+  public Job pollJob(JobReference jobRef, int maxAttempts)
+      throws InterruptedException {
+    BackOff backoff =
+        FluentBackoff.DEFAULT
+            .withMaxRetries(maxAttempts)
+            .withInitialBackoff(Duration.millis(50))
+            .withMaxBackoff(Duration.standardMinutes(1))
+            .backoff();
+    Sleeper sleeper = Sleeper.DEFAULT;
+    try {
+      do {
+        Job job = getJob(jobRef);
+        if (job != null) {
+          JobStatus status = job.getStatus();
+          if (status != null && status.getState() != null && status.getState().equals("DONE")) {
+            return job;
+          }
+        }
+      } while (BackOffUtils.next(sleeper, backoff));
+    } catch (IOException e) {
+      return null;
+    }
+    return null;
+  }
+
+  public void expectDryRunQuery(String projectId, String query, JobStatistics result) {
+    synchronized (dryRunQueryResults) {
+      dryRunQueryResults.put(projectId, query, result);
+    }
+  }
+
+  @Override
+  public JobStatistics dryRunQuery(String projectId, JobConfigurationQuery query)
+      throws InterruptedException, IOException {
+    synchronized (dryRunQueryResults) {
+      JobStatistics result = dryRunQueryResults.get(projectId, query.getQuery());
+      if (result != null) {
+        return result;
+      }
+    }
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Job getJob(JobReference jobRef) throws InterruptedException {
+    try {
+      synchronized (allJobs) {
+        JobInfo job = allJobs.get(jobRef.getProjectId(), jobRef.getJobId());
+        if (job == null) {
+          return null;
+        }
+        ++job.getJobCount;
+        if (job.getJobCount == GET_JOBS_TRANSITION_INTERVAL + 1) {
+          job.job.getStatus().setState("RUNNING");
+        } else if (job.getJobCount == 2 * GET_JOBS_TRANSITION_INTERVAL + 1) {
+          runJob(job.job);
+          job.job.getStatus().setState("DONE");
+        }
+        return JSON_FACTORY.fromString(JSON_FACTORY.toString(job.job), Job.class);
+      }
+    } catch (IOException e) {
+      return null;
+    }
+  }
+
+  private void runJob(Job job) throws InterruptedException, IOException {
+    if (job.getConfiguration().getLoad() != null) {
+      runLoadJob(job.getConfiguration().getLoad());
+    } else if (job.getConfiguration().getCopy() != null) {
+      runCopyJob(job.getConfiguration().getCopy());
+    } else if (job.getConfiguration().getExtract() != null) {
+      runExtractJob(job, job.getConfiguration().getExtract());
+    }
+  }
+
+  private void validateDispositions(Table table, CreateDisposition createDisposition,
+                                    WriteDisposition writeDisposition)
+      throws InterruptedException, IOException {
+    if (table == null) {
+      checkState(createDisposition != CreateDisposition.CREATE_NEVER,
+          "CreateDisposition == CREATE_NEVER but the table doesn't exist.");
+    } else if (writeDisposition == WriteDisposition.WRITE_TRUNCATE) {
+      datasetService.deleteTable(table.getTableReference());
+    } else if (writeDisposition == WriteDisposition.WRITE_EMPTY) {
+      List<TableRow> allRows = datasetService.getAllRows(table.getTableReference().getProjectId(),
+          table.getTableReference().getDatasetId(), table.getTableReference().getTableId());
+      checkState(allRows.isEmpty(), "Write disposition was set to WRITE_EMPTY,"
+          + " but the table was not empty.");
+    }
+  }
+  private void runLoadJob(JobConfigurationLoad load)
+      throws InterruptedException, IOException {
+    TableReference destination = load.getDestinationTable();
+    TableSchema schema = load.getSchema();
+    List<String> sourceFiles = load.getSourceUris();
+    WriteDisposition writeDisposition = WriteDisposition.valueOf(load.getWriteDisposition());
+    CreateDisposition createDisposition = CreateDisposition.valueOf(load.getCreateDisposition());
+    checkArgument(load.getSourceFormat().equals("NEWLINE_DELIMITED_JSON"));
+    Table existingTable = datasetService.getTable(destination);
+    validateDispositions(existingTable, createDisposition, writeDisposition);
+
+    datasetService.createTable(new Table().setTableReference(destination).setSchema(schema));
+
+    List<TableRow> rows = Lists.newArrayList();
+    for (String filename : sourceFiles) {
+      rows.addAll(readRows(filename));
+    }
+    datasetService.insertAll(destination, rows, null);
+  }
+
+  private void runCopyJob(JobConfigurationTableCopy copy)
+      throws InterruptedException, IOException {
+    List<TableReference> sources = copy.getSourceTables();
+    TableReference destination = copy.getDestinationTable();
+    WriteDisposition writeDisposition = WriteDisposition.valueOf(copy.getWriteDisposition());
+    CreateDisposition createDisposition = CreateDisposition.valueOf(copy.getCreateDisposition());
+    Table existingTable = datasetService.getTable(destination);
+    validateDispositions(existingTable, createDisposition, writeDisposition);
+
+    List<TableRow> allRows = Lists.newArrayList();
+    for (TableReference source : sources) {
+      allRows.addAll(datasetService.getAllRows(
+          source.getProjectId(), source.getDatasetId(), source.getTableId()));
+    }
+    datasetService.insertAll(destination, allRows, null);
+  }
+
+  private void runExtractJob(Job job, JobConfigurationExtract extract) {
+    TableReference sourceTable = extract.getSourceTable();
+    extract.getDestinationUris().get(0);
+    List<Long> destinationFileCounts = Lists.newArrayList(0L);
+    job.setStatistics(new JobStatistics().setExtract(
+        new JobStatistics4().setDestinationUriFileCounts(destinationFileCounts)));
+  }
+
+  private List<TableRow> readRows(String filename) throws IOException {
+    Coder<TableRow> coder = TableRowJsonCoder.of();
+    List<TableRow> tableRows = Lists.newArrayList();
+    try (BufferedReader reader = new BufferedReader(new FileReader(filename))) {
+      String line;
+      while ((line = reader.readLine()) != null) {
+        TableRow tableRow = coder.decode(
+            new ByteArrayInputStream(line.getBytes(StandardCharsets.UTF_8)), Context.OUTER);
+        tableRows.add(tableRow);
+      }
+    }
+    return tableRows;
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/760a9458/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableContainer.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableContainer.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableContainer.java
new file mode 100644
index 0000000..b2fc170
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/TableContainer.java
@@ -0,0 +1,36 @@
+package org.apache.beam.sdk.io.gcp.bigquery;
+
+import com.google.api.services.bigquery.model.Table;
+import com.google.api.services.bigquery.model.TableRow;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Created by relax on 3/30/17.
+ */
+class TableContainer {
+  Table table;
+  List<TableRow> rows;
+  List<String> ids;
+
+  TableContainer(Table table) {
+    this.table = table;
+    this.rows = new ArrayList<>();
+    this.ids = new ArrayList<>();
+  }
+
+  TableContainer addRow(TableRow row, String id) {
+    rows.add(row);
+    ids.add(id);
+    return this;
+  }
+
+  Table getTable() {
+    return table;
+  }
+
+  List<TableRow> getRows() {
+    return rows;
+  }
+}

[11/50] [abbrv] beam git commit: Use tableRefFunction throughout BigQueryIO. Constant table writes use ConstantTableSpecFunction.

Posted by dh...@apache.org.

Use tableRefFunction throughout BigQueryIO. Constant table writes use ConstantTableSpecFunction.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/c939a436
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/c939a436
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/c939a436

Branch: refs/heads/DSL_SQL
Commit: c939a43617cdb37228625a34b3545377b142fc8a
Parents: e0df7d8
Author: Reuven Lax <re...@google.com>
Authored: Tue Mar 28 11:21:59 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 21:12:49 2017 -0700

----------------------------------------------------------------------
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    | 57 ++++++++++----------
 .../sdk/io/gcp/bigquery/StreamWithDeDup.java    |  4 +-
 .../gcp/bigquery/TagWithUniqueIdsAndTable.java  | 57 ++++++--------------
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 19 ++-----
 4 files changed, 50 insertions(+), 87 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/c939a436/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
index 9753da5..af0d561 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIO.java
@@ -700,7 +700,8 @@ public class BigQueryIO {
       abstract Builder<T> setJsonTableRef(ValueProvider<String> jsonTableRef);
       abstract Builder<T> setTableRefFunction(
           SerializableFunction<ValueInSingleWindow<T>, TableReference> tableRefFunction);
-      abstract Builder<T> setFormatFunction(SerializableFunction<T, TableRow> formatFunction);
+      abstract Builder<T> setFormatFunction(
+          SerializableFunction<T, TableRow> formatFunction);
       abstract Builder<T> setJsonSchema(ValueProvider<String> jsonSchema);
       abstract Builder<T> setCreateDisposition(CreateDisposition createDisposition);
       abstract Builder<T> setWriteDisposition(WriteDisposition writeDisposition);
@@ -781,7 +782,8 @@ public class BigQueryIO {
     /** Ensures that methods of the to() family are called at most once. */
     private void ensureToNotCalledYet() {
       checkState(
-          getJsonTableRef() == null && getTable() == null, "to() already called");
+          getJsonTableRef() == null && getTable() == null
+              && getTableRefFunction() == null, "to() already called");
     }
 
     /**
@@ -805,6 +807,8 @@ public class BigQueryIO {
               NestedValueProvider.of(
                   NestedValueProvider.of(tableSpec, new TableSpecToTableRef()),
                   new TableRefToJson()))
+          .setTableRefFunction(new TranslateTableSpecFunction<T>(
+              new ConstantTableSpecFunction<T>(tableSpec)))
           .build();
     }
 
@@ -812,7 +816,8 @@ public class BigQueryIO {
      * Writes to table specified by the specified table function. The table is a function of
      * {@link ValueInSingleWindow}, so can be determined by the value or by the window.
      */
-    public Write<T> to(SerializableFunction<ValueInSingleWindow<T>, String> tableSpecFunction) {
+    public Write<T> to(
+        SerializableFunction<ValueInSingleWindow<T>, String> tableSpecFunction) {
       return toTableReference(new TranslateTableSpecFunction<T>(tableSpecFunction));
     }
 
@@ -848,6 +853,20 @@ public class BigQueryIO {
       }
     }
 
+    static class ConstantTableSpecFunction<T> implements
+        SerializableFunction<ValueInSingleWindow<T>, String> {
+      private ValueProvider<String> tableSpec;
+
+      ConstantTableSpecFunction(ValueProvider<String> tableSpec) {
+        this.tableSpec = tableSpec;
+      }
+
+      @Override
+      public String apply(ValueInSingleWindow<T> value) {
+        return tableSpec.get();
+      }
+    }
+
     /**
      * Uses the specified schema for rows to be written.
      *
@@ -900,13 +919,8 @@ public class BigQueryIO {
       BigQueryOptions options = input.getPipeline().getOptions().as(BigQueryOptions.class);
 
       // Exactly one of the table and table reference can be configured.
-      checkState(
-          getJsonTableRef() != null || getTableRefFunction() != null,
+      checkState(getTableRefFunction() != null,
           "must set the table reference of a BigQueryIO.Write transform");
-      checkState(
-          getJsonTableRef() == null || getTableRefFunction() == null,
-          "Cannot set both a table reference and a table function for a BigQueryIO.Write"
-              + " transform");
 
       checkArgument(getFormatFunction() != null,
                     "A function must be provided to convert type into a TableRow. "
@@ -920,6 +934,7 @@ public class BigQueryIO {
       // The user specified a table.
       if (getJsonTableRef() != null && getValidate()) {
         TableReference table = getTableWithDefaultProject(options).get();
+        // TODO: This seems wrong - what if the ValueProvider is not accessible?
 
         DatasetService datasetService = getBigQueryServices().getDatasetService(options);
         // Check for destination table presence and emptiness for early failure notification.
@@ -935,24 +950,12 @@ public class BigQueryIO {
         }
       }
 
-      if (input.isBounded() == PCollection.IsBounded.UNBOUNDED || getTableRefFunction() != null) {
+      if (input.isBounded() == PCollection.IsBounded.UNBOUNDED) {
         // We will use BigQuery's streaming write API -- validate supported dispositions.
-        if (getTableRefFunction() != null) {
-          checkArgument(
-              getCreateDisposition() != CreateDisposition.CREATE_NEVER,
-              "CreateDisposition.CREATE_NEVER is not supported when using a tablespec"
-              + " function.");
-        }
-        if (getJsonSchema() == null) {
-          checkArgument(
-              getCreateDisposition() == CreateDisposition.CREATE_NEVER,
-              "CreateDisposition.CREATE_NEVER must be used if jsonSchema is null.");
-        }
-
         checkArgument(
             getWriteDisposition() != WriteDisposition.WRITE_TRUNCATE,
-            "WriteDisposition.WRITE_TRUNCATE is not supported for an unbounded PCollection or"
-                + " when using a tablespec function.");
+            "WriteDisposition.WRITE_TRUNCATE is not supported for an unbounded"
+            + " PCollection.");
       } else {
         // We will use a BigQuery load job -- validate the temp location.
         String tempLocation = options.getTempLocation();
@@ -977,7 +980,7 @@ public class BigQueryIO {
     public WriteResult expand(PCollection<T> input) {
       // When writing an Unbounded PCollection, or when a tablespec function is defined, we use
       // StreamWithDeDup and BigQuery's streaming import API.
-      if (input.isBounded() == IsBounded.UNBOUNDED || getTableRefFunction() != null) {
+      if (input.isBounded() == IsBounded.UNBOUNDED) {
         return input.apply(new StreamWithDeDup<T>(this));
       } else {
         return input.apply(new BatchLoadBigQuery<T>(this));
@@ -1026,12 +1029,12 @@ public class BigQueryIO {
      *
      * <p>If the table's project is not specified, use the executing project.
      */
-    @Nullable ValueProvider<TableReference> getTableWithDefaultProject(
-        BigQueryOptions bqOptions) {
+    @Nullable ValueProvider<TableReference> getTableWithDefaultProject(BigQueryOptions bqOptions) {
       ValueProvider<TableReference> table = getTable();
       if (table == null) {
         return table;
       }
+
       if (!table.isAccessible()) {
         LOG.info("Using a dynamic value for table input. This must contain a project"
             + " in the table reference: {}", table);

http://git-wip-us.apache.org/repos/asf/beam/blob/c939a436/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamWithDeDup.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamWithDeDup.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamWithDeDup.java
index 1fa26d1..506a564 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamWithDeDup.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/StreamWithDeDup.java
@@ -64,8 +64,7 @@ class StreamWithDeDup<T> extends PTransform<PCollection<T>, WriteResult> {
 
     PCollection<KV<ShardedKey<String>, TableRowInfo>> tagged =
         input.apply(ParDo.of(new TagWithUniqueIdsAndTable<T>(
-            input.getPipeline().getOptions().as(BigQueryOptions.class), write.getTable(),
-            write.getTableRefFunction(), write.getFormatFunction())));
+            input.getPipeline().getOptions().as(BigQueryOptions.class), write)));
 
     // To prevent having the same TableRow processed more than once with regenerated
     // different unique ids, this implementation relies on "checkpointing", which is
@@ -85,6 +84,7 @@ class StreamWithDeDup<T> extends PTransform<PCollection<T>, WriteResult> {
                     write.getCreateDisposition(),
                     write.getTableDescription(),
                     write.getBigQueryServices())));
+
     return WriteResult.in(input.getPipeline());
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/c939a436/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java
index a6608e4..8d7d1e6 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/TagWithUniqueIdsAndTable.java
@@ -18,23 +18,18 @@
 
 package org.apache.beam.sdk.io.gcp.bigquery;
 
-import static com.google.common.base.Preconditions.checkArgument;
-
 import com.google.api.services.bigquery.model.TableReference;
-import com.google.api.services.bigquery.model.TableRow;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Strings;
 import java.io.IOException;
 import java.util.UUID;
 import java.util.concurrent.ThreadLocalRandom;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonTableRefToTableRef;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.TableRefToTableSpec;
+import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write;
 import org.apache.beam.sdk.options.BigQueryOptions;
 import org.apache.beam.sdk.options.ValueProvider;
 import org.apache.beam.sdk.options.ValueProvider.NestedValueProvider;
-import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
 import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.SerializableFunction;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
 import org.apache.beam.sdk.values.KV;
@@ -49,39 +44,22 @@ import org.apache.beam.sdk.values.ValueInSingleWindow;
 @VisibleForTesting
 class TagWithUniqueIdsAndTable<T>
     extends DoFn<T, KV<ShardedKey<String>, TableRowInfo>> {
-  /** TableSpec to write to. */
-  private final ValueProvider<String> tableSpec;
-
-  /** User function mapping windowed values to {@link TableReference} in JSON. */
-  private final SerializableFunction<ValueInSingleWindow<T>, TableReference> tableRefFunction;
+  /** TableSpec to write to in the case of a single static destination. */
+  private ValueProvider<String> tableSpec = null;
 
-  /** User function mapping user type to a TableRow. */
-  private final SerializableFunction<T, TableRow> formatFunction;
+  private final Write<T, ?> write;
 
   private transient String randomUUID;
   private transient long sequenceNo = 0L;
 
   TagWithUniqueIdsAndTable(BigQueryOptions options,
-                           ValueProvider<TableReference> table,
-                           SerializableFunction<ValueInSingleWindow<T>, TableReference>
-                               tableRefFunction,
-                           SerializableFunction<T, TableRow> formatFunction) {
-    checkArgument(table == null ^ tableRefFunction == null,
-        "Exactly one of table or tableRefFunction should be set");
+                           Write<T, ?> write) {
+    ValueProvider<TableReference> table = write.getTableWithDefaultProject(
+        options.as(BigQueryOptions.class));
     if (table != null) {
-      if (table.isAccessible() && Strings.isNullOrEmpty(table.get().getProjectId())) {
-        TableReference tableRef = table.get()
-            .setProjectId(options.as(BigQueryOptions.class).getProject());
-        table = NestedValueProvider.of(
-            StaticValueProvider.of(BigQueryHelpers.toJsonString(tableRef)),
-            new JsonTableRefToTableRef());
-      }
       this.tableSpec = NestedValueProvider.of(table, new TableRefToTableSpec());
-    } else {
-      tableSpec = null;
     }
-    this.tableRefFunction = tableRefFunction;
-    this.formatFunction = formatFunction;
+    this.write = write;
   }
 
 
@@ -101,7 +79,7 @@ class TagWithUniqueIdsAndTable<T>
     // We output on keys 0-50 to ensure that there's enough batching for
     // BigQuery.
     context.output(KV.of(ShardedKey.of(tableSpec, randomGenerator.nextInt(0, 50)),
-        new TableRowInfo(formatFunction.apply(context.element()), uniqueId)));
+        new TableRowInfo(write.getFormatFunction().apply(context.element()), uniqueId)));
   }
 
   @Override
@@ -109,10 +87,8 @@ class TagWithUniqueIdsAndTable<T>
     super.populateDisplayData(builder);
 
     builder.addIfNotNull(DisplayData.item("table", tableSpec));
-    if (tableRefFunction != null) {
-      builder.add(DisplayData.item("tableFn", tableRefFunction.getClass())
+    builder.add(DisplayData.item("tableFn", write.getTableRefFunction().getClass())
         .withLabel("Table Reference Function"));
-    }
   }
 
   @VisibleForTesting
@@ -120,16 +96,13 @@ class TagWithUniqueIdsAndTable<T>
     return tableSpec;
   }
 
+
   private String tableSpecFromWindowedValue(BigQueryOptions options,
                                             ValueInSingleWindow<T> value) {
-    if (tableSpec != null) {
-      return tableSpec.get();
-    } else {
-      TableReference table = tableRefFunction.apply(value);
-      if (table.getProjectId() == null) {
-        table.setProjectId(options.getProject());
-      }
-      return BigQueryHelpers.toTableSpec(table);
+    TableReference table = write.getTableRefFunction().apply(value);
+    if (Strings.isNullOrEmpty(table.getProjectId())) {
+      table.setProjectId(options.getProject());
     }
+    return BigQueryHelpers.toTableSpec(table);
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/c939a436/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
index 83fd8d9..499aa74 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/bigquery/BigQueryIOTest.java
@@ -26,7 +26,6 @@ import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisp
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.hasItem;
 import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertNull;
 import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
@@ -103,7 +102,6 @@ import org.apache.beam.sdk.io.CountingInput;
 import org.apache.beam.sdk.io.CountingSource;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.JsonSchemaToTableSchema;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.Status;
-import org.apache.beam.sdk.io.gcp.bigquery.BigQueryHelpers.TableSpecToTableRef;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.CreateDisposition;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryIO.Write.WriteDisposition;
 import org.apache.beam.sdk.io.gcp.bigquery.BigQueryServices.DatasetService;
@@ -150,6 +148,7 @@ import org.apache.beam.sdk.util.WindowingStrategy;
 import org.apache.beam.sdk.values.KV;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.PDone;
 import org.apache.beam.sdk.values.TupleTag;
 import org.apache.beam.sdk.values.TypeDescriptor;
 import org.apache.beam.sdk.values.ValueInSingleWindow;
@@ -1375,7 +1374,8 @@ public class BigQueryIOTest implements Serializable {
 
   @Test
   public void testBuildWriteDefaultProject() {
-    BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows().to("somedataset.sometable");
+    BigQueryIO.Write<TableRow> write = BigQueryIO.writeTableRows()
+        .to("somedataset" + ".sometable");
     checkWriteObject(
         write, null, "somedataset", "sometable",
         null, CreateDisposition.CREATE_IF_NEEDED, WriteDisposition.WRITE_EMPTY,
@@ -2350,19 +2350,6 @@ public class BigQueryIOTest implements Serializable {
     DisplayData.from(write);
   }
 
-  @Test
-  public void testTagWithUniqueIdsAndTableProjectNotNullWithNvp() {
-    BigQueryOptions bqOptions = PipelineOptionsFactory.as(BigQueryOptions.class);
-    bqOptions.setProject("project");
-    TagWithUniqueIdsAndTable<TableRow> tag =
-        new TagWithUniqueIdsAndTable<TableRow>(
-            bqOptions, NestedValueProvider.of(
-                StaticValueProvider.of("data_set.table_name"),
-                new TableSpecToTableRef()), null, null);
-    TableReference table = BigQueryHelpers.parseTableSpec(tag.getTableSpec().get());
-    assertNotNull(table.getProjectId());
-  }
-
   private static void testNumFiles(File tempDir, int expectedNumFiles) {
     assertEquals(expectedNumFiles, tempDir.listFiles(new FileFilter() {
       @Override

[38/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
deleted file mode 100644
index 681459a..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerRegistrar.java
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.flink;
-
-import com.google.auto.service.AutoService;
-import com.google.common.collect.ImmutableList;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsRegistrar;
-import org.apache.beam.sdk.runners.PipelineRunner;
-import org.apache.beam.sdk.runners.PipelineRunnerRegistrar;
-
-
-/**
- * AutoService registrar - will register FlinkRunner and FlinkOptions
- * as possible pipeline runner services.
- *
- * <p>It ends up in META-INF/services and gets picked up by Beam.
- *
- */
-public class FlinkRunnerRegistrar {
-  private FlinkRunnerRegistrar() { }
-
-  /**
-   * Pipeline runner registrar.
-   */
-  @AutoService(PipelineRunnerRegistrar.class)
-  public static class Runner implements PipelineRunnerRegistrar {
-    @Override
-    public Iterable<Class<? extends PipelineRunner<?>>> getPipelineRunners() {
-      return ImmutableList.<Class<? extends PipelineRunner<?>>>of(
-          FlinkRunner.class,
-          TestFlinkRunner.class);
-    }
-  }
-
-  /**
-   * Pipeline options registrar.
-   */
-  @AutoService(PipelineOptionsRegistrar.class)
-  public static class Options implements PipelineOptionsRegistrar {
-    @Override
-    public Iterable<Class<? extends PipelineOptions>> getPipelineOptions() {
-      return ImmutableList.<Class<? extends PipelineOptions>>of(FlinkPipelineOptions.class);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerResult.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerResult.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerResult.java
deleted file mode 100644
index 0682b56..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunnerResult.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import java.io.IOException;
-import java.util.Collections;
-import java.util.Map;
-import org.apache.beam.sdk.AggregatorRetrievalException;
-import org.apache.beam.sdk.AggregatorValues;
-import org.apache.beam.sdk.PipelineResult;
-import org.apache.beam.sdk.metrics.MetricResults;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.joda.time.Duration;
-
-/**
- * Result of executing a {@link org.apache.beam.sdk.Pipeline} with Flink. This
- * has methods to query to job runtime and the final values of
- * {@link org.apache.beam.sdk.transforms.Aggregator}s.
- */
-public class FlinkRunnerResult implements PipelineResult {
-
-  private final Map<String, Object> aggregators;
-
-  private final long runtime;
-
-  FlinkRunnerResult(Map<String, Object> aggregators, long runtime) {
-    this.aggregators = (aggregators == null || aggregators.isEmpty())
-        ? Collections.<String, Object>emptyMap()
-        : Collections.unmodifiableMap(aggregators);
-    this.runtime = runtime;
-  }
-
-  @Override
-  public State getState() {
-    return State.DONE;
-  }
-
-  @Override
-  public <T> AggregatorValues<T> getAggregatorValues(final Aggregator<?, T> aggregator)
-      throws AggregatorRetrievalException {
-    // TODO provide a list of all accumulator step values
-    Object value = aggregators.get(aggregator.getName());
-    if (value != null) {
-      return new AggregatorValues<T>() {
-        @Override
-        public Map<String, T> getValuesAtSteps() {
-          return (Map<String, T>) aggregators;
-        }
-      };
-    } else {
-      throw new AggregatorRetrievalException("Accumulator results not found.",
-          new RuntimeException("Accumulator does not exist."));
-    }
-  }
-
-  @Override
-  public String toString() {
-    return "FlinkRunnerResult{"
-        + "aggregators=" + aggregators
-        + ", runtime=" + runtime
-        + '}';
-  }
-
-  @Override
-  public State cancel() throws IOException {
-    throw new UnsupportedOperationException("FlinkRunnerResult does not support cancel.");
-  }
-
-  @Override
-  public State waitUntilFinish() {
-    return State.DONE;
-  }
-
-  @Override
-  public State waitUntilFinish(Duration duration) {
-    return State.DONE;
-  }
-
-  @Override
-  public MetricResults metrics() {
-    throw new UnsupportedOperationException("The FlinkRunner does not currently support metrics.");
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java
deleted file mode 100644
index 0459ef7..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingPipelineTranslator.java
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import com.google.common.collect.ImmutableList;
-import java.util.List;
-import java.util.Map;
-import org.apache.beam.runners.core.SplittableParDo;
-import org.apache.beam.runners.core.construction.PTransformMatchers;
-import org.apache.beam.runners.core.construction.PTransformReplacements;
-import org.apache.beam.runners.core.construction.ReplacementOutputs;
-import org.apache.beam.runners.core.construction.SingleInputOutputOverrideFactory;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.runners.PTransformOverride;
-import org.apache.beam.sdk.runners.PTransformOverrideFactory;
-import org.apache.beam.sdk.runners.TransformHierarchy;
-import org.apache.beam.sdk.transforms.AppliedPTransform;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo.MultiOutput;
-import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.util.InstanceBuilder;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionTuple;
-import org.apache.beam.sdk.values.PValue;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This is a {@link FlinkPipelineTranslator} for streaming jobs. Its role is to translate
- * the user-provided {@link org.apache.beam.sdk.values.PCollection}-based job into a
- * {@link org.apache.flink.streaming.api.datastream.DataStream} one.
- *
- */
-class FlinkStreamingPipelineTranslator extends FlinkPipelineTranslator {
-
-  private static final Logger LOG = LoggerFactory.getLogger(FlinkStreamingPipelineTranslator.class);
-
-  /** The necessary context in the case of a straming job. */
-  private final FlinkStreamingTranslationContext streamingContext;
-
-  private int depth = 0;
-
-  private FlinkRunner flinkRunner;
-
-  public FlinkStreamingPipelineTranslator(
-      FlinkRunner flinkRunner,
-      StreamExecutionEnvironment env,
-      PipelineOptions options) {
-    this.streamingContext = new FlinkStreamingTranslationContext(env, options);
-    this.flinkRunner = flinkRunner;
-  }
-
-  @Override
-  public void translate(Pipeline pipeline) {
-    List<PTransformOverride> transformOverrides =
-        ImmutableList.<PTransformOverride>builder()
-            .add(
-                PTransformOverride.of(
-                    PTransformMatchers.splittableParDoMulti(),
-                    new SplittableParDoOverrideFactory()))
-            .add(
-                PTransformOverride.of(
-                    PTransformMatchers.classEqualTo(View.AsIterable.class),
-                    new ReflectiveOneToOneOverrideFactory(
-                        FlinkStreamingViewOverrides.StreamingViewAsIterable.class, flinkRunner)))
-            .add(
-                PTransformOverride.of(
-                    PTransformMatchers.classEqualTo(View.AsList.class),
-                    new ReflectiveOneToOneOverrideFactory(
-                        FlinkStreamingViewOverrides.StreamingViewAsList.class, flinkRunner)))
-            .add(
-                PTransformOverride.of(
-                    PTransformMatchers.classEqualTo(View.AsMap.class),
-                    new ReflectiveOneToOneOverrideFactory(
-                        FlinkStreamingViewOverrides.StreamingViewAsMap.class, flinkRunner)))
-            .add(
-                PTransformOverride.of(
-                    PTransformMatchers.classEqualTo(View.AsMultimap.class),
-                    new ReflectiveOneToOneOverrideFactory(
-                        FlinkStreamingViewOverrides.StreamingViewAsMultimap.class, flinkRunner)))
-            .add(
-                PTransformOverride.of(
-                    PTransformMatchers.classEqualTo(View.AsSingleton.class),
-                    new ReflectiveOneToOneOverrideFactory(
-                        FlinkStreamingViewOverrides.StreamingViewAsSingleton.class, flinkRunner)))
-            // this has to be last since the ViewAsSingleton override
-            // can expand to a Combine.GloballyAsSingletonView
-            .add(
-                PTransformOverride.of(
-                    PTransformMatchers.classEqualTo(Combine.GloballyAsSingletonView.class),
-                    new ReflectiveOneToOneOverrideFactory(
-                        FlinkStreamingViewOverrides.StreamingCombineGloballyAsSingletonView.class,
-                        flinkRunner)))
-            .build();
-
-    pipeline.replaceAll(transformOverrides);
-    super.translate(pipeline);
-  }
-
-  // --------------------------------------------------------------------------------------------
-  //  Pipeline Visitor Methods
-  // --------------------------------------------------------------------------------------------
-
-  @Override
-  public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) {
-    LOG.info("{} enterCompositeTransform- {}", genSpaces(this.depth), node.getFullName());
-    this.depth++;
-
-    PTransform<?, ?> transform = node.getTransform();
-    if (transform != null) {
-      StreamTransformTranslator<?> translator =
-          FlinkStreamingTransformTranslators.getTranslator(transform);
-
-      if (translator != null && applyCanTranslate(transform, node, translator)) {
-        applyStreamingTransform(transform, node, translator);
-        LOG.info("{} translated- {}", genSpaces(this.depth), node.getFullName());
-        return CompositeBehavior.DO_NOT_ENTER_TRANSFORM;
-      }
-    }
-    return CompositeBehavior.ENTER_TRANSFORM;
-  }
-
-  @Override
-  public void leaveCompositeTransform(TransformHierarchy.Node node) {
-    this.depth--;
-    LOG.info("{} leaveCompositeTransform- {}", genSpaces(this.depth), node.getFullName());
-  }
-
-  @Override
-  public void visitPrimitiveTransform(TransformHierarchy.Node node) {
-    LOG.info("{} visitPrimitiveTransform- {}", genSpaces(this.depth), node.getFullName());
-    // get the transformation corresponding to hte node we are
-    // currently visiting and translate it into its Flink alternative.
-
-    PTransform<?, ?> transform = node.getTransform();
-    StreamTransformTranslator<?> translator =
-        FlinkStreamingTransformTranslators.getTranslator(transform);
-
-    if (translator == null || !applyCanTranslate(transform, node, translator)) {
-      LOG.info(node.getTransform().getClass().toString());
-      throw new UnsupportedOperationException(
-          "The transform " + transform + " is currently not supported.");
-    }
-    applyStreamingTransform(transform, node, translator);
-  }
-
-  @Override
-  public void visitValue(PValue value, TransformHierarchy.Node producer) {
-    // do nothing here
-  }
-
-  private <T extends PTransform<?, ?>> void applyStreamingTransform(
-      PTransform<?, ?> transform,
-      TransformHierarchy.Node node,
-      StreamTransformTranslator<?> translator) {
-
-    @SuppressWarnings("unchecked")
-    T typedTransform = (T) transform;
-
-    @SuppressWarnings("unchecked")
-    StreamTransformTranslator<T> typedTranslator = (StreamTransformTranslator<T>) translator;
-
-    // create the applied PTransform on the streamingContext
-    streamingContext.setCurrentTransform(node.toAppliedPTransform());
-    typedTranslator.translateNode(typedTransform, streamingContext);
-  }
-
-  private <T extends PTransform<?, ?>> boolean applyCanTranslate(
-      PTransform<?, ?> transform,
-      TransformHierarchy.Node node,
-      StreamTransformTranslator<?> translator) {
-
-    @SuppressWarnings("unchecked")
-    T typedTransform = (T) transform;
-
-    @SuppressWarnings("unchecked")
-    StreamTransformTranslator<T> typedTranslator = (StreamTransformTranslator<T>) translator;
-
-    streamingContext.setCurrentTransform(node.toAppliedPTransform());
-
-    return typedTranslator.canTranslate(typedTransform, streamingContext);
-  }
-
-  /**
-   * The interface that every Flink translator of a Beam operator should implement.
-   * This interface is for <b>streaming</b> jobs. For examples of such translators see
-   * {@link FlinkStreamingTransformTranslators}.
-   */
-  abstract static class StreamTransformTranslator<T extends PTransform> {
-
-    /**
-     * Translate the given transform.
-     */
-    abstract void translateNode(T transform, FlinkStreamingTranslationContext context);
-
-    /**
-     * Returns true iff this translator can translate the given transform.
-     */
-    boolean canTranslate(T transform, FlinkStreamingTranslationContext context) {
-      return true;
-    }
-  }
-
-  private static class ReflectiveOneToOneOverrideFactory<
-          InputT, OutputT, TransformT extends PTransform<PCollection<InputT>, PCollection<OutputT>>>
-      extends SingleInputOutputOverrideFactory<
-          PCollection<InputT>, PCollection<OutputT>, TransformT> {
-    private final Class<PTransform<PCollection<InputT>, PCollection<OutputT>>> replacement;
-    private final FlinkRunner runner;
-
-    private ReflectiveOneToOneOverrideFactory(
-        Class<PTransform<PCollection<InputT>, PCollection<OutputT>>> replacement,
-        FlinkRunner runner) {
-      this.replacement = replacement;
-      this.runner = runner;
-    }
-
-    @Override
-    public PTransformReplacement<PCollection<InputT>, PCollection<OutputT>> getReplacementTransform(
-        AppliedPTransform<PCollection<InputT>, PCollection<OutputT>, TransformT> transform) {
-      return PTransformReplacement.of(
-          PTransformReplacements.getSingletonMainInput(transform),
-          InstanceBuilder.ofType(replacement)
-              .withArg(FlinkRunner.class, runner)
-              .withArg(
-                  (Class<PTransform<PCollection<InputT>, PCollection<OutputT>>>)
-                      transform.getTransform().getClass(),
-                  transform.getTransform())
-              .build());
-    }
-  }
-
-  /**
-   * A {@link PTransformOverrideFactory} that overrides a <a
-   * href="https://s.apache.org/splittable-do-fn">Splittable DoFn</a> with {@link SplittableParDo}.
-   */
-  static class SplittableParDoOverrideFactory<InputT, OutputT>
-      implements PTransformOverrideFactory<
-          PCollection<InputT>, PCollectionTuple, MultiOutput<InputT, OutputT>> {
-    @Override
-    public PTransformReplacement<PCollection<InputT>, PCollectionTuple>
-        getReplacementTransform(
-            AppliedPTransform<
-                    PCollection<InputT>, PCollectionTuple, MultiOutput<InputT, OutputT>>
-                transform) {
-      return PTransformReplacement.of(
-          PTransformReplacements.getSingletonMainInput(transform),
-          new SplittableParDo<>(transform.getTransform()));
-    }
-
-    @Override
-    public Map<PValue, ReplacementOutput> mapOutputs(
-        Map<TupleTag<?>, PValue> outputs, PCollectionTuple newOutput) {
-      return ReplacementOutputs.tagged(outputs, newOutput);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java
deleted file mode 100644
index 123d5e7..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTransformTranslators.java
+++ /dev/null
@@ -1,1044 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.flink;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import org.apache.beam.runners.core.ElementAndRestriction;
-import org.apache.beam.runners.core.KeyedWorkItem;
-import org.apache.beam.runners.core.SplittableParDo;
-import org.apache.beam.runners.core.SystemReduceFn;
-import org.apache.beam.runners.flink.translation.functions.FlinkAssignWindows;
-import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.KvToByteBufferKeySelector;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.SingletonKeyedWorkItem;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.SingletonKeyedWorkItemCoder;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.SplittableDoFnOperator;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.WindowDoFnOperator;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.WorkItemKeySelector;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.io.BoundedSourceWrapper;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.io.UnboundedSourceWrapper;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.coders.VoidCoder;
-import org.apache.beam.sdk.io.Read;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.Flatten;
-import org.apache.beam.sdk.transforms.GroupByKey;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.transforms.join.UnionCoder;
-import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
-import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
-import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.transforms.windowing.WindowFn;
-import org.apache.beam.sdk.util.AppliedCombineFn;
-import org.apache.beam.sdk.util.Reshuffle;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.PValue;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.api.common.functions.FlatMapFunction;
-import org.apache.flink.api.common.functions.MapFunction;
-import org.apache.flink.api.common.functions.RichFlatMapFunction;
-import org.apache.flink.api.common.typeinfo.TypeInformation;
-import org.apache.flink.api.java.tuple.Tuple2;
-import org.apache.flink.core.fs.FileSystem;
-import org.apache.flink.streaming.api.collector.selector.OutputSelector;
-import org.apache.flink.streaming.api.datastream.DataStream;
-import org.apache.flink.streaming.api.datastream.DataStreamSink;
-import org.apache.flink.streaming.api.datastream.DataStreamSource;
-import org.apache.flink.streaming.api.datastream.KeyedStream;
-import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
-import org.apache.flink.streaming.api.datastream.SplitStream;
-import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
-import org.apache.flink.streaming.api.operators.TwoInputStreamOperator;
-import org.apache.flink.streaming.api.transformations.TwoInputTransformation;
-import org.apache.flink.util.Collector;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This class contains all the mappings between Beam and Flink
- * <b>streaming</b> transformations. The {@link FlinkStreamingPipelineTranslator}
- * traverses the Beam job and comes here to translate the encountered Beam transformations
- * into Flink one, based on the mapping available in this class.
- */
-class FlinkStreamingTransformTranslators {
-
-  // --------------------------------------------------------------------------------------------
-  //  Transform Translator Registry
-  // --------------------------------------------------------------------------------------------
-
-  @SuppressWarnings("rawtypes")
-  private static final Map<
-      Class<? extends PTransform>,
-      FlinkStreamingPipelineTranslator.StreamTransformTranslator> TRANSLATORS = new HashMap<>();
-
-  // here you can find all the available translators.
-  static {
-    TRANSLATORS.put(Read.Bounded.class, new BoundedReadSourceTranslator());
-    TRANSLATORS.put(Read.Unbounded.class, new UnboundedReadSourceTranslator());
-    TRANSLATORS.put(TextIO.Write.Bound.class, new TextIOWriteBoundStreamingTranslator());
-
-    TRANSLATORS.put(ParDo.MultiOutput.class, new ParDoStreamingTranslator());
-    TRANSLATORS.put(
-        SplittableParDo.ProcessElements.class, new SplittableProcessElementsStreamingTranslator());
-    TRANSLATORS.put(
-        SplittableParDo.GBKIntoKeyedWorkItems.class, new GBKIntoKeyedWorkItemsTranslator());
-
-
-    TRANSLATORS.put(Window.Assign.class, new WindowAssignTranslator());
-    TRANSLATORS.put(Flatten.PCollections.class, new FlattenPCollectionTranslator());
-    TRANSLATORS.put(
-        FlinkStreamingViewOverrides.CreateFlinkPCollectionView.class,
-        new CreateViewStreamingTranslator());
-
-    TRANSLATORS.put(Reshuffle.class, new ReshuffleTranslatorStreaming());
-    TRANSLATORS.put(GroupByKey.class, new GroupByKeyTranslator());
-    TRANSLATORS.put(Combine.PerKey.class, new CombinePerKeyTranslator());
-  }
-
-  public static FlinkStreamingPipelineTranslator.StreamTransformTranslator<?> getTranslator(
-      PTransform<?, ?> transform) {
-    return TRANSLATORS.get(transform.getClass());
-  }
-
-  // --------------------------------------------------------------------------------------------
-  //  Transformation Implementations
-  // --------------------------------------------------------------------------------------------
-
-  private static class TextIOWriteBoundStreamingTranslator
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<TextIO.Write.Bound> {
-
-    private static final Logger LOG =
-        LoggerFactory.getLogger(TextIOWriteBoundStreamingTranslator.class);
-
-    @Override
-    public void translateNode(
-        TextIO.Write.Bound transform,
-        FlinkStreamingTranslationContext context) {
-      PValue input = context.getInput(transform);
-      DataStream<WindowedValue<String>> inputDataStream = context.getInputDataStream(input);
-
-      String filenamePrefix = transform.getFilenamePrefix();
-      String filenameSuffix = transform.getFilenameSuffix();
-      boolean needsValidation = transform.needsValidation();
-      int numShards = transform.getNumShards();
-      String shardNameTemplate = transform.getShardNameTemplate();
-
-      // TODO: Implement these. We need Flink support for this.
-      LOG.warn(
-          "Translation of TextIO.Write.needsValidation not yet supported. Is: {}.",
-          needsValidation);
-      LOG.warn(
-          "Translation of TextIO.Write.filenameSuffix not yet supported. Is: {}.",
-          filenameSuffix);
-      LOG.warn(
-          "Translation of TextIO.Write.shardNameTemplate not yet supported. Is: {}.",
-          shardNameTemplate);
-
-      DataStream<String> dataSink = inputDataStream
-          .flatMap(new FlatMapFunction<WindowedValue<String>, String>() {
-            @Override
-            public void flatMap(
-                WindowedValue<String> value,
-                Collector<String> out)
-                throws Exception {
-              out.collect(value.getValue());
-            }
-          });
-      DataStreamSink<String> output =
-          dataSink.writeAsText(filenamePrefix, FileSystem.WriteMode.OVERWRITE);
-
-      if (numShards > 0) {
-        output.setParallelism(numShards);
-      }
-    }
-  }
-
-  private static class UnboundedReadSourceTranslator<T>
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<Read.Unbounded<T>> {
-
-    @Override
-    public void translateNode(
-        Read.Unbounded<T> transform,
-        FlinkStreamingTranslationContext context) {
-      PCollection<T> output = context.getOutput(transform);
-
-      TypeInformation<WindowedValue<T>> outputTypeInfo =
-          context.getTypeInfo(context.getOutput(transform));
-
-      DataStream<WindowedValue<T>> source;
-      try {
-        UnboundedSourceWrapper<T, ?> sourceWrapper =
-            new UnboundedSourceWrapper<>(
-                context.getPipelineOptions(),
-                transform.getSource(),
-                context.getExecutionEnvironment().getParallelism());
-        source = context
-            .getExecutionEnvironment()
-            .addSource(sourceWrapper).name(transform.getName()).returns(outputTypeInfo);
-      } catch (Exception e) {
-        throw new RuntimeException(
-            "Error while translating UnboundedSource: " + transform.getSource(), e);
-      }
-
-      context.setOutputDataStream(output, source);
-    }
-  }
-
-  private static class BoundedReadSourceTranslator<T>
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<Read.Bounded<T>> {
-
-    @Override
-    public void translateNode(
-        Read.Bounded<T> transform,
-        FlinkStreamingTranslationContext context) {
-      PCollection<T> output = context.getOutput(transform);
-
-      TypeInformation<WindowedValue<T>> outputTypeInfo =
-          context.getTypeInfo(context.getOutput(transform));
-
-
-      DataStream<WindowedValue<T>> source;
-      try {
-        BoundedSourceWrapper<T> sourceWrapper =
-            new BoundedSourceWrapper<>(
-                context.getPipelineOptions(),
-                transform.getSource(),
-                context.getExecutionEnvironment().getParallelism());
-        source = context
-            .getExecutionEnvironment()
-            .addSource(sourceWrapper).name(transform.getName()).returns(outputTypeInfo);
-      } catch (Exception e) {
-        throw new RuntimeException(
-            "Error while translating BoundedSource: " + transform.getSource(), e);
-      }
-
-      context.setOutputDataStream(output, source);
-    }
-  }
-
-  /**
-   * Wraps each element in a {@link RawUnionValue} with the given tag id.
-   */
-  private static class ToRawUnion<T> implements MapFunction<T, RawUnionValue> {
-    private final int intTag;
-
-    public ToRawUnion(int intTag) {
-      this.intTag = intTag;
-    }
-
-    @Override
-    public RawUnionValue map(T o) throws Exception {
-      return new RawUnionValue(intTag, o);
-    }
-  }
-
-  private static Tuple2<Map<Integer, PCollectionView<?>>, DataStream<RawUnionValue>>
-        transformSideInputs(
-          Collection<PCollectionView<?>> sideInputs,
-          FlinkStreamingTranslationContext context) {
-
-    // collect all side inputs
-    Map<TupleTag<?>, Integer> tagToIntMapping = new HashMap<>();
-    Map<Integer, PCollectionView<?>> intToViewMapping = new HashMap<>();
-    int count = 0;
-    for (PCollectionView<?> sideInput: sideInputs) {
-      TupleTag<?> tag = sideInput.getTagInternal();
-      intToViewMapping.put(count, sideInput);
-      tagToIntMapping.put(tag, count);
-      count++;
-      Coder<Iterable<WindowedValue<?>>> coder = sideInput.getCoderInternal();
-    }
-
-
-    List<Coder<?>> inputCoders = new ArrayList<>();
-    for (PCollectionView<?> sideInput: sideInputs) {
-      DataStream<Object> sideInputStream = context.getInputDataStream(sideInput);
-      TypeInformation<Object> tpe = sideInputStream.getType();
-      if (!(tpe instanceof CoderTypeInformation)) {
-        throw new IllegalStateException(
-            "Input Stream TypeInformation is no CoderTypeInformation.");
-      }
-
-      Coder<?> coder = ((CoderTypeInformation) tpe).getCoder();
-      inputCoders.add(coder);
-    }
-
-    UnionCoder unionCoder = UnionCoder.of(inputCoders);
-
-    CoderTypeInformation<RawUnionValue> unionTypeInformation =
-        new CoderTypeInformation<>(unionCoder);
-
-    // transform each side input to RawUnionValue and union them
-    DataStream<RawUnionValue> sideInputUnion = null;
-
-    for (PCollectionView<?> sideInput: sideInputs) {
-      TupleTag<?> tag = sideInput.getTagInternal();
-      final int intTag = tagToIntMapping.get(tag);
-      DataStream<Object> sideInputStream = context.getInputDataStream(sideInput);
-      DataStream<RawUnionValue> unionValueStream =
-          sideInputStream.map(new ToRawUnion<>(intTag)).returns(unionTypeInformation);
-
-      if (sideInputUnion == null) {
-        sideInputUnion = unionValueStream;
-      } else {
-        sideInputUnion = sideInputUnion.union(unionValueStream);
-      }
-    }
-
-    if (sideInputUnion == null) {
-      throw new IllegalStateException("No unioned side inputs, this indicates a bug.");
-    }
-
-    return new Tuple2<>(intToViewMapping, sideInputUnion);
-  }
-
-  /**
-   * Helper for translating {@link ParDo.MultiOutput} and {@link SplittableParDo.ProcessElements}.
-   */
-  static class ParDoTranslationHelper {
-
-    interface DoFnOperatorFactory<InputT, OutputT> {
-      DoFnOperator<InputT, OutputT, RawUnionValue> createDoFnOperator(
-          DoFn<InputT, OutputT> doFn,
-          List<PCollectionView<?>> sideInputs,
-          TupleTag<OutputT> mainOutputTag,
-          List<TupleTag<?>> additionalOutputTags,
-          FlinkStreamingTranslationContext context,
-          WindowingStrategy<?, ?> windowingStrategy,
-          Map<TupleTag<?>, Integer> tagsToLabels,
-          Coder<WindowedValue<InputT>> inputCoder,
-          Coder keyCoder,
-          Map<Integer, PCollectionView<?>> transformedSideInputs);
-    }
-
-    static <InputT, OutputT> void translateParDo(
-        String transformName,
-        DoFn<InputT, OutputT> doFn,
-        PCollection<InputT> input,
-        List<PCollectionView<?>> sideInputs,
-        Map<TupleTag<?>, PValue> outputs,
-        TupleTag<OutputT> mainOutputTag,
-        List<TupleTag<?>> additionalOutputTags,
-        FlinkStreamingTranslationContext context,
-        DoFnOperatorFactory<InputT, OutputT> doFnOperatorFactory) {
-
-      // we assume that the transformation does not change the windowing strategy.
-      WindowingStrategy<?, ?> windowingStrategy = input.getWindowingStrategy();
-
-      Map<TupleTag<?>, Integer> tagsToLabels =
-          transformTupleTagsToLabels(mainOutputTag, outputs);
-
-      SingleOutputStreamOperator<RawUnionValue> unionOutputStream;
-
-      Coder<WindowedValue<InputT>> inputCoder = context.getCoder(input);
-
-      DataStream<WindowedValue<InputT>> inputDataStream = context.getInputDataStream(input);
-
-      Coder keyCoder = null;
-      boolean stateful = false;
-      DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
-      if (signature.stateDeclarations().size() > 0
-          || signature.timerDeclarations().size() > 0) {
-        // Based on the fact that the signature is stateful, DoFnSignatures ensures
-        // that it is also keyed
-        keyCoder = ((KvCoder) input.getCoder()).getKeyCoder();
-        inputDataStream = inputDataStream.keyBy(new KvToByteBufferKeySelector(keyCoder));
-        stateful = true;
-      } else if (doFn instanceof SplittableParDo.ProcessFn) {
-        // we know that it is keyed on String
-        keyCoder = StringUtf8Coder.of();
-        stateful = true;
-      }
-
-      if (sideInputs.isEmpty()) {
-        DoFnOperator<InputT, OutputT, RawUnionValue> doFnOperator =
-            doFnOperatorFactory.createDoFnOperator(
-                doFn,
-                sideInputs,
-                mainOutputTag,
-                additionalOutputTags,
-                context,
-                windowingStrategy,
-                tagsToLabels,
-                inputCoder,
-                keyCoder,
-                new HashMap<Integer, PCollectionView<?>>() /* side-input mapping */);
-
-        UnionCoder outputUnionCoder = createUnionCoder(outputs);
-
-        CoderTypeInformation<RawUnionValue> outputUnionTypeInformation =
-            new CoderTypeInformation<>(outputUnionCoder);
-
-        unionOutputStream = inputDataStream
-            .transform(transformName, outputUnionTypeInformation, doFnOperator);
-
-      } else {
-        Tuple2<Map<Integer, PCollectionView<?>>, DataStream<RawUnionValue>> transformedSideInputs =
-            transformSideInputs(sideInputs, context);
-
-        DoFnOperator<InputT, OutputT, RawUnionValue> doFnOperator =
-            doFnOperatorFactory.createDoFnOperator(
-                doFn,
-                sideInputs,
-                mainOutputTag,
-                additionalOutputTags,
-                context,
-                windowingStrategy,
-                tagsToLabels,
-                inputCoder,
-                keyCoder,
-                transformedSideInputs.f0);
-
-        UnionCoder outputUnionCoder = createUnionCoder(outputs);
-
-        CoderTypeInformation<RawUnionValue> outputUnionTypeInformation =
-            new CoderTypeInformation<>(outputUnionCoder);
-
-        if (stateful) {
-          // we have to manually contruct the two-input transform because we're not
-          // allowed to have only one input keyed, normally.
-          KeyedStream keyedStream = (KeyedStream<?, InputT>) inputDataStream;
-          TwoInputTransformation<
-              WindowedValue<KV<?, InputT>>,
-              RawUnionValue,
-              WindowedValue<OutputT>> rawFlinkTransform = new TwoInputTransformation(
-              keyedStream.getTransformation(),
-              transformedSideInputs.f1.broadcast().getTransformation(),
-              transformName,
-              (TwoInputStreamOperator) doFnOperator,
-              outputUnionTypeInformation,
-              keyedStream.getParallelism());
-
-          rawFlinkTransform.setStateKeyType(keyedStream.getKeyType());
-          rawFlinkTransform.setStateKeySelectors(keyedStream.getKeySelector(), null);
-
-          unionOutputStream = new SingleOutputStreamOperator(
-                  keyedStream.getExecutionEnvironment(),
-                  rawFlinkTransform) {}; // we have to cheat around the ctor being protected
-
-          keyedStream.getExecutionEnvironment().addOperator(rawFlinkTransform);
-
-        } else {
-          unionOutputStream = inputDataStream
-              .connect(transformedSideInputs.f1.broadcast())
-              .transform(transformName, outputUnionTypeInformation, doFnOperator);
-        }
-      }
-
-      SplitStream<RawUnionValue> splitStream = unionOutputStream
-              .split(new OutputSelector<RawUnionValue>() {
-                @Override
-                public Iterable<String> select(RawUnionValue value) {
-                  return Collections.singletonList(Integer.toString(value.getUnionTag()));
-                }
-              });
-
-      for (Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
-        final int outputTag = tagsToLabels.get(output.getKey());
-
-        TypeInformation outputTypeInfo = context.getTypeInfo((PCollection<?>) output.getValue());
-
-        @SuppressWarnings("unchecked")
-        DataStream unwrapped = splitStream.select(String.valueOf(outputTag))
-          .flatMap(new FlatMapFunction<RawUnionValue, Object>() {
-            @Override
-            public void flatMap(RawUnionValue value, Collector<Object> out) throws Exception {
-              out.collect(value.getValue());
-            }
-          }).returns(outputTypeInfo);
-
-        context.setOutputDataStream(output.getValue(), unwrapped);
-      }
-    }
-
-    private static Map<TupleTag<?>, Integer> transformTupleTagsToLabels(
-        TupleTag<?> mainTag,
-        Map<TupleTag<?>, PValue> allTaggedValues) {
-
-      Map<TupleTag<?>, Integer> tagToLabelMap = Maps.newHashMap();
-      int count = 0;
-      tagToLabelMap.put(mainTag, count++);
-      for (TupleTag<?> key : allTaggedValues.keySet()) {
-        if (!tagToLabelMap.containsKey(key)) {
-          tagToLabelMap.put(key, count++);
-        }
-      }
-      return tagToLabelMap;
-    }
-
-    private static UnionCoder createUnionCoder(Map<TupleTag<?>, PValue> taggedCollections) {
-      List<Coder<?>> outputCoders = Lists.newArrayList();
-      for (PValue taggedColl : taggedCollections.values()) {
-        checkArgument(
-            taggedColl instanceof PCollection,
-            "A Union Coder can only be created for a Collection of Tagged %s. Got %s",
-            PCollection.class.getSimpleName(),
-            taggedColl.getClass().getSimpleName());
-        PCollection<?> coll = (PCollection<?>) taggedColl;
-        WindowedValue.FullWindowedValueCoder<?> windowedValueCoder =
-            WindowedValue.getFullCoder(
-                coll.getCoder(),
-                coll.getWindowingStrategy().getWindowFn().windowCoder());
-        outputCoders.add(windowedValueCoder);
-      }
-      return UnionCoder.of(outputCoders);
-    }
-  }
-
-  private static class ParDoStreamingTranslator<InputT, OutputT>
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
-      ParDo.MultiOutput<InputT, OutputT>> {
-
-    @Override
-    public void translateNode(
-        ParDo.MultiOutput<InputT, OutputT> transform,
-        FlinkStreamingTranslationContext context) {
-
-      ParDoTranslationHelper.translateParDo(
-          transform.getName(),
-          transform.getFn(),
-          (PCollection<InputT>) context.getInput(transform),
-          transform.getSideInputs(),
-          context.getOutputs(transform),
-          transform.getMainOutputTag(),
-          transform.getAdditionalOutputTags().getAll(),
-          context,
-          new ParDoTranslationHelper.DoFnOperatorFactory<InputT, OutputT>() {
-            @Override
-            public DoFnOperator<InputT, OutputT, RawUnionValue> createDoFnOperator(
-                DoFn<InputT, OutputT> doFn,
-                List<PCollectionView<?>> sideInputs,
-                TupleTag<OutputT> mainOutputTag,
-                List<TupleTag<?>> additionalOutputTags,
-                FlinkStreamingTranslationContext context,
-                WindowingStrategy<?, ?> windowingStrategy,
-                Map<TupleTag<?>, Integer> tagsToLabels,
-                Coder<WindowedValue<InputT>> inputCoder,
-                Coder keyCoder,
-                Map<Integer, PCollectionView<?>> transformedSideInputs) {
-              return new DoFnOperator<>(
-                  doFn,
-                  inputCoder,
-                  mainOutputTag,
-                  additionalOutputTags,
-                  new DoFnOperator.MultiOutputOutputManagerFactory(tagsToLabels),
-                  windowingStrategy,
-                  transformedSideInputs,
-                  sideInputs,
-                  context.getPipelineOptions(),
-                  keyCoder);
-            }
-          });
-    }
-  }
-
-  private static class SplittableProcessElementsStreamingTranslator<
-      InputT, OutputT, RestrictionT, TrackerT extends RestrictionTracker<RestrictionT>>
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
-      SplittableParDo.ProcessElements<InputT, OutputT, RestrictionT, TrackerT>> {
-
-    @Override
-    public void translateNode(
-        SplittableParDo.ProcessElements<InputT, OutputT, RestrictionT, TrackerT> transform,
-        FlinkStreamingTranslationContext context) {
-
-      ParDoTranslationHelper.translateParDo(
-          transform.getName(),
-          transform.newProcessFn(transform.getFn()),
-          (PCollection<KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>>)
-              context.getInput(transform),
-          transform.getSideInputs(),
-          context.getOutputs(transform),
-          transform.getMainOutputTag(),
-          transform.getAdditionalOutputTags().getAll(),
-          context,
-          new ParDoTranslationHelper.DoFnOperatorFactory<
-              KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>, OutputT>() {
-            @Override
-            public DoFnOperator<
-                KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>,
-                OutputT,
-                RawUnionValue> createDoFnOperator(
-                    DoFn<
-                        KeyedWorkItem<String, ElementAndRestriction<InputT, RestrictionT>>,
-                        OutputT> doFn,
-                    List<PCollectionView<?>> sideInputs,
-                    TupleTag<OutputT> mainOutputTag,
-                    List<TupleTag<?>> additionalOutputTags,
-                    FlinkStreamingTranslationContext context,
-                    WindowingStrategy<?, ?> windowingStrategy,
-                    Map<TupleTag<?>, Integer> tagsToLabels,
-                    Coder<
-                        WindowedValue<
-                            KeyedWorkItem<
-                                String,
-                                ElementAndRestriction<InputT, RestrictionT>>>> inputCoder,
-                    Coder keyCoder,
-                    Map<Integer, PCollectionView<?>> transformedSideInputs) {
-              return new SplittableDoFnOperator<>(
-                  doFn,
-                  inputCoder,
-                  mainOutputTag,
-                  additionalOutputTags,
-                  new DoFnOperator.MultiOutputOutputManagerFactory(tagsToLabels),
-                  windowingStrategy,
-                  transformedSideInputs,
-                  sideInputs,
-                  context.getPipelineOptions(),
-                  keyCoder);
-            }
-          });
-    }
-  }
-
-  private static class CreateViewStreamingTranslator<ElemT, ViewT>
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
-      FlinkStreamingViewOverrides.CreateFlinkPCollectionView<ElemT, ViewT>> {
-
-    @Override
-    public void translateNode(
-        FlinkStreamingViewOverrides.CreateFlinkPCollectionView<ElemT, ViewT> transform,
-        FlinkStreamingTranslationContext context) {
-      // just forward
-      DataStream<WindowedValue<List<ElemT>>> inputDataSet =
-          context.getInputDataStream(context.getInput(transform));
-
-      PCollectionView<ViewT> view = context.getOutput(transform);
-
-      context.setOutputDataStream(view, inputDataSet);
-    }
-  }
-
-  private static class WindowAssignTranslator<T>
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<Window.Assign<T>> {
-
-    @Override
-    public void translateNode(
-        Window.Assign<T> transform,
-        FlinkStreamingTranslationContext context) {
-
-      @SuppressWarnings("unchecked")
-      WindowingStrategy<T, BoundedWindow> windowingStrategy =
-          (WindowingStrategy<T, BoundedWindow>)
-              context.getOutput(transform).getWindowingStrategy();
-
-      TypeInformation<WindowedValue<T>> typeInfo =
-          context.getTypeInfo(context.getOutput(transform));
-
-      DataStream<WindowedValue<T>> inputDataStream =
-          context.getInputDataStream(context.getInput(transform));
-
-      WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn();
-
-      FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction =
-          new FlinkAssignWindows<>(windowFn);
-
-      SingleOutputStreamOperator<WindowedValue<T>> outputDataStream = inputDataStream
-          .flatMap(assignWindowsFunction)
-          .name(context.getOutput(transform).getName())
-          .returns(typeInfo);
-
-      context.setOutputDataStream(context.getOutput(transform), outputDataStream);
-    }
-  }
-
-  private static class ReshuffleTranslatorStreaming<K, InputT>
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<Reshuffle<K, InputT>> {
-
-    @Override
-    public void translateNode(
-        Reshuffle<K, InputT> transform,
-        FlinkStreamingTranslationContext context) {
-
-      DataStream<WindowedValue<KV<K, InputT>>> inputDataSet =
-          context.getInputDataStream(context.getInput(transform));
-
-      context.setOutputDataStream(context.getOutput(transform), inputDataSet.rebalance());
-
-    }
-  }
-
-
-  private static class GroupByKeyTranslator<K, InputT>
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<GroupByKey<K, InputT>> {
-
-    @Override
-    public void translateNode(
-        GroupByKey<K, InputT> transform,
-        FlinkStreamingTranslationContext context) {
-
-      PCollection<KV<K, InputT>> input = context.getInput(transform);
-
-      @SuppressWarnings("unchecked")
-      WindowingStrategy<?, BoundedWindow> windowingStrategy =
-          (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();
-
-      KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();
-
-      SingletonKeyedWorkItemCoder<K, InputT> workItemCoder = SingletonKeyedWorkItemCoder.of(
-          inputKvCoder.getKeyCoder(),
-          inputKvCoder.getValueCoder(),
-          input.getWindowingStrategy().getWindowFn().windowCoder());
-
-      DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);
-
-      WindowedValue.
-          FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>> windowedWorkItemCoder =
-          WindowedValue.getFullCoder(
-              workItemCoder,
-              input.getWindowingStrategy().getWindowFn().windowCoder());
-
-      CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
-          new CoderTypeInformation<>(windowedWorkItemCoder);
-
-      DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
-          inputDataStream
-              .flatMap(new ToKeyedWorkItem<K, InputT>())
-              .returns(workItemTypeInfo).name("ToKeyedWorkItem");
-
-      KeyedStream<
-          WindowedValue<
-              SingletonKeyedWorkItem<K, InputT>>, ByteBuffer> keyedWorkItemStream = workItemStream
-          .keyBy(new WorkItemKeySelector<K, InputT>(inputKvCoder.getKeyCoder()));
-
-      SystemReduceFn<K, InputT, Iterable<InputT>, Iterable<InputT>, BoundedWindow> reduceFn =
-          SystemReduceFn.buffering(inputKvCoder.getValueCoder());
-
-      TypeInformation<WindowedValue<KV<K, Iterable<InputT>>>> outputTypeInfo =
-          context.getTypeInfo(context.getOutput(transform));
-
-      DoFnOperator.DefaultOutputManagerFactory<
-            WindowedValue<KV<K, Iterable<InputT>>>> outputManagerFactory =
-          new DoFnOperator.DefaultOutputManagerFactory<>();
-
-      WindowDoFnOperator<K, InputT, Iterable<InputT>> doFnOperator =
-          new WindowDoFnOperator<>(
-              reduceFn,
-              (Coder) windowedWorkItemCoder,
-              new TupleTag<KV<K, Iterable<InputT>>>("main output"),
-              Collections.<TupleTag<?>>emptyList(),
-              outputManagerFactory,
-              windowingStrategy,
-              new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
-              Collections.<PCollectionView<?>>emptyList(), /* side inputs */
-              context.getPipelineOptions(),
-              inputKvCoder.getKeyCoder());
-
-      // our operator excepts WindowedValue<KeyedWorkItem> while our input stream
-      // is WindowedValue<SingletonKeyedWorkItem>, which is fine but Java doesn't like it ...
-      @SuppressWarnings("unchecked")
-      SingleOutputStreamOperator<WindowedValue<KV<K, Iterable<InputT>>>> outDataStream =
-          keyedWorkItemStream
-              .transform(
-                  transform.getName(),
-                  outputTypeInfo,
-                  (OneInputStreamOperator) doFnOperator);
-
-      context.setOutputDataStream(context.getOutput(transform), outDataStream);
-
-    }
-  }
-
-  private static class CombinePerKeyTranslator<K, InputT, OutputT>
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
-      Combine.PerKey<K, InputT, OutputT>> {
-
-    @Override
-    boolean canTranslate(
-        Combine.PerKey<K, InputT, OutputT> transform,
-        FlinkStreamingTranslationContext context) {
-
-      // if we have a merging window strategy and side inputs we cannot
-      // translate as a proper combine. We have to group and then run the combine
-      // over the final grouped values.
-      PCollection<KV<K, InputT>> input = context.getInput(transform);
-
-      @SuppressWarnings("unchecked")
-      WindowingStrategy<?, BoundedWindow> windowingStrategy =
-          (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();
-
-      return windowingStrategy.getWindowFn().isNonMerging() || transform.getSideInputs().isEmpty();
-    }
-
-    @Override
-    public void translateNode(
-        Combine.PerKey<K, InputT, OutputT> transform,
-        FlinkStreamingTranslationContext context) {
-
-      PCollection<KV<K, InputT>> input = context.getInput(transform);
-
-      @SuppressWarnings("unchecked")
-      WindowingStrategy<?, BoundedWindow> windowingStrategy =
-          (WindowingStrategy<?, BoundedWindow>) input.getWindowingStrategy();
-
-      KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();
-
-      SingletonKeyedWorkItemCoder<K, InputT> workItemCoder = SingletonKeyedWorkItemCoder.of(
-          inputKvCoder.getKeyCoder(),
-          inputKvCoder.getValueCoder(),
-          input.getWindowingStrategy().getWindowFn().windowCoder());
-
-      DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);
-
-      WindowedValue.
-          FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>> windowedWorkItemCoder =
-            WindowedValue.getFullCoder(
-                workItemCoder,
-                input.getWindowingStrategy().getWindowFn().windowCoder());
-
-      CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
-          new CoderTypeInformation<>(windowedWorkItemCoder);
-
-      DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
-          inputDataStream
-              .flatMap(new ToKeyedWorkItem<K, InputT>())
-              .returns(workItemTypeInfo).name("ToKeyedWorkItem");
-
-      KeyedStream<
-            WindowedValue<
-                SingletonKeyedWorkItem<K, InputT>>, ByteBuffer> keyedWorkItemStream = workItemStream
-          .keyBy(new WorkItemKeySelector<K, InputT>(inputKvCoder.getKeyCoder()));
-
-      SystemReduceFn<K, InputT, ?, OutputT, BoundedWindow> reduceFn = SystemReduceFn.combining(
-          inputKvCoder.getKeyCoder(),
-          AppliedCombineFn.withInputCoder(
-              transform.getFn(), input.getPipeline().getCoderRegistry(), inputKvCoder));
-
-      TypeInformation<WindowedValue<KV<K, OutputT>>> outputTypeInfo =
-          context.getTypeInfo(context.getOutput(transform));
-
-      List<PCollectionView<?>> sideInputs = transform.getSideInputs();
-
-      if (sideInputs.isEmpty()) {
-
-        WindowDoFnOperator<K, InputT, OutputT> doFnOperator =
-            new WindowDoFnOperator<>(
-                reduceFn,
-                (Coder) windowedWorkItemCoder,
-                new TupleTag<KV<K, OutputT>>("main output"),
-                Collections.<TupleTag<?>>emptyList(),
-                new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<K, OutputT>>>(),
-                windowingStrategy,
-                new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
-                Collections.<PCollectionView<?>>emptyList(), /* side inputs */
-                context.getPipelineOptions(),
-                inputKvCoder.getKeyCoder());
-
-        // our operator excepts WindowedValue<KeyedWorkItem> while our input stream
-        // is WindowedValue<SingletonKeyedWorkItem>, which is fine but Java doesn't like it ...
-        @SuppressWarnings("unchecked")
-        SingleOutputStreamOperator<WindowedValue<KV<K, OutputT>>> outDataStream =
-            keyedWorkItemStream.transform(
-                transform.getName(), outputTypeInfo, (OneInputStreamOperator) doFnOperator);
-
-        context.setOutputDataStream(context.getOutput(transform), outDataStream);
-      } else {
-        Tuple2<Map<Integer, PCollectionView<?>>, DataStream<RawUnionValue>> transformSideInputs =
-            transformSideInputs(sideInputs, context);
-
-        WindowDoFnOperator<K, InputT, OutputT> doFnOperator =
-            new WindowDoFnOperator<>(
-                reduceFn,
-                (Coder) windowedWorkItemCoder,
-                new TupleTag<KV<K, OutputT>>("main output"),
-                Collections.<TupleTag<?>>emptyList(),
-                new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<K, OutputT>>>(),
-                windowingStrategy,
-                transformSideInputs.f0,
-                sideInputs,
-                context.getPipelineOptions(),
-                inputKvCoder.getKeyCoder());
-
-        // we have to manually contruct the two-input transform because we're not
-        // allowed to have only one input keyed, normally.
-
-        TwoInputTransformation<
-            WindowedValue<SingletonKeyedWorkItem<K, InputT>>,
-            RawUnionValue,
-            WindowedValue<KV<K, OutputT>>> rawFlinkTransform = new TwoInputTransformation<>(
-            keyedWorkItemStream.getTransformation(),
-            transformSideInputs.f1.broadcast().getTransformation(),
-            transform.getName(),
-            (TwoInputStreamOperator) doFnOperator,
-            outputTypeInfo,
-            keyedWorkItemStream.getParallelism());
-
-        rawFlinkTransform.setStateKeyType(keyedWorkItemStream.getKeyType());
-        rawFlinkTransform.setStateKeySelectors(keyedWorkItemStream.getKeySelector(), null);
-
-        @SuppressWarnings({ "unchecked", "rawtypes" })
-        SingleOutputStreamOperator<WindowedValue<KV<K, OutputT>>> outDataStream =
-            new SingleOutputStreamOperator(
-                keyedWorkItemStream.getExecutionEnvironment(),
-                rawFlinkTransform) {}; // we have to cheat around the ctor being protected
-
-        keyedWorkItemStream.getExecutionEnvironment().addOperator(rawFlinkTransform);
-
-        context.setOutputDataStream(context.getOutput(transform), outDataStream);
-      }
-    }
-  }
-
-  private static class GBKIntoKeyedWorkItemsTranslator<K, InputT>
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
-      SplittableParDo.GBKIntoKeyedWorkItems<K, InputT>> {
-
-    @Override
-    boolean canTranslate(
-        SplittableParDo.GBKIntoKeyedWorkItems<K, InputT> transform,
-        FlinkStreamingTranslationContext context) {
-      return true;
-    }
-
-    @Override
-    public void translateNode(
-        SplittableParDo.GBKIntoKeyedWorkItems<K, InputT> transform,
-        FlinkStreamingTranslationContext context) {
-
-      PCollection<KV<K, InputT>> input = context.getInput(transform);
-
-      KvCoder<K, InputT> inputKvCoder = (KvCoder<K, InputT>) input.getCoder();
-
-      SingletonKeyedWorkItemCoder<K, InputT> workItemCoder = SingletonKeyedWorkItemCoder.of(
-          inputKvCoder.getKeyCoder(),
-          inputKvCoder.getValueCoder(),
-          input.getWindowingStrategy().getWindowFn().windowCoder());
-
-
-      WindowedValue.
-          FullWindowedValueCoder<SingletonKeyedWorkItem<K, InputT>> windowedWorkItemCoder =
-          WindowedValue.getFullCoder(
-              workItemCoder,
-              input.getWindowingStrategy().getWindowFn().windowCoder());
-
-      CoderTypeInformation<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemTypeInfo =
-          new CoderTypeInformation<>(windowedWorkItemCoder);
-
-      DataStream<WindowedValue<KV<K, InputT>>> inputDataStream = context.getInputDataStream(input);
-
-      DataStream<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> workItemStream =
-          inputDataStream
-              .flatMap(new ToKeyedWorkItem<K, InputT>())
-              .returns(workItemTypeInfo).name("ToKeyedWorkItem");
-
-      KeyedStream<
-          WindowedValue<
-              SingletonKeyedWorkItem<K, InputT>>, ByteBuffer> keyedWorkItemStream = workItemStream
-          .keyBy(new WorkItemKeySelector<K, InputT>(inputKvCoder.getKeyCoder()));
-
-      context.setOutputDataStream(context.getOutput(transform), keyedWorkItemStream);
-    }
-  }
-
-  private static class FlattenPCollectionTranslator<T>
-      extends FlinkStreamingPipelineTranslator.StreamTransformTranslator<
-      Flatten.PCollections<T>> {
-
-    @Override
-    public void translateNode(
-        Flatten.PCollections<T> transform,
-        FlinkStreamingTranslationContext context) {
-      Map<TupleTag<?>, PValue> allInputs = context.getInputs(transform);
-
-      if (allInputs.isEmpty()) {
-
-        // create an empty dummy source to satisfy downstream operations
-        // we cannot create an empty source in Flink, therefore we have to
-        // add the flatMap that simply never forwards the single element
-        DataStreamSource<String> dummySource =
-            context.getExecutionEnvironment().fromElements("dummy");
-
-        DataStream<WindowedValue<T>> result = dummySource.flatMap(
-            new FlatMapFunction<String, WindowedValue<T>>() {
-              @Override
-              public void flatMap(
-                  String s,
-                  Collector<WindowedValue<T>> collector) throws Exception {
-                // never return anything
-              }
-            }).returns(
-            new CoderTypeInformation<>(
-                WindowedValue.getFullCoder(
-                    (Coder<T>) VoidCoder.of(),
-                    GlobalWindow.Coder.INSTANCE)));
-        context.setOutputDataStream(context.getOutput(transform), result);
-
-      } else {
-        DataStream<T> result = null;
-        for (PValue input : allInputs.values()) {
-          DataStream<T> current = context.getInputDataStream(input);
-          result = (result == null) ? current : result.union(current);
-        }
-        context.setOutputDataStream(context.getOutput(transform), result);
-      }
-    }
-  }
-
-  private static class ToKeyedWorkItem<K, InputT>
-      extends RichFlatMapFunction<
-      WindowedValue<KV<K, InputT>>,
-      WindowedValue<SingletonKeyedWorkItem<K, InputT>>> {
-
-    @Override
-    public void flatMap(
-        WindowedValue<KV<K, InputT>> inWithMultipleWindows,
-        Collector<WindowedValue<SingletonKeyedWorkItem<K, InputT>>> out) throws Exception {
-
-      // we need to wrap each one work item per window for now
-      // since otherwise the PushbackSideInputRunner will not correctly
-      // determine whether side inputs are ready
-      //
-      // this is tracked as https://issues.apache.org/jira/browse/BEAM-1850
-      for (WindowedValue<KV<K, InputT>> in : inWithMultipleWindows.explodeWindows()) {
-        SingletonKeyedWorkItem<K, InputT> workItem =
-            new SingletonKeyedWorkItem<>(
-                in.getValue().getKey(),
-                in.withValue(in.getValue().getValue()));
-
-        out.collect(in.withValue(workItem));
-      }
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTranslationContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTranslationContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTranslationContext.java
deleted file mode 100644
index 1a943a3..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingTranslationContext.java
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import com.google.common.collect.Iterables;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.AppliedPTransform;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PInput;
-import org.apache.beam.sdk.values.POutput;
-import org.apache.beam.sdk.values.PValue;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.api.common.typeinfo.TypeInformation;
-import org.apache.flink.streaming.api.datastream.DataStream;
-import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
-
-/**
- * Helper for keeping track of which {@link DataStream DataStreams} map
- * to which {@link PTransform PTransforms}.
- */
-class FlinkStreamingTranslationContext {
-
-  private final StreamExecutionEnvironment env;
-  private final PipelineOptions options;
-
-  /**
-   * Keeps a mapping between the output value of the PTransform (in Dataflow) and the
-   * Flink Operator that produced it, after the translation of the correspondinf PTransform
-   * to its Flink equivalent.
-   * */
-  private final Map<PValue, DataStream<?>> dataStreams;
-
-  private AppliedPTransform<?, ?, ?> currentTransform;
-
-  public FlinkStreamingTranslationContext(StreamExecutionEnvironment env, PipelineOptions options) {
-    this.env = checkNotNull(env);
-    this.options = checkNotNull(options);
-    this.dataStreams = new HashMap<>();
-  }
-
-  public StreamExecutionEnvironment getExecutionEnvironment() {
-    return env;
-  }
-
-  public PipelineOptions getPipelineOptions() {
-    return options;
-  }
-
-  @SuppressWarnings("unchecked")
-  public <T> DataStream<T> getInputDataStream(PValue value) {
-    return (DataStream<T>) dataStreams.get(value);
-  }
-
-  public void setOutputDataStream(PValue value, DataStream<?> set) {
-    if (!dataStreams.containsKey(value)) {
-      dataStreams.put(value, set);
-    }
-  }
-
-  /**
-   * Sets the AppliedPTransform which carries input/output.
-   * @param currentTransform
-   */
-  public void setCurrentTransform(AppliedPTransform<?, ?, ?> currentTransform) {
-    this.currentTransform = currentTransform;
-  }
-
-  public <T> Coder<WindowedValue<T>> getCoder(PCollection<T> collection) {
-    Coder<T> valueCoder = collection.getCoder();
-
-    return WindowedValue.getFullCoder(
-        valueCoder,
-        collection.getWindowingStrategy().getWindowFn().windowCoder());
-  }
-
-  @SuppressWarnings("unchecked")
-  public <T> TypeInformation<WindowedValue<T>> getTypeInfo(PCollection<T> collection) {
-    Coder<T> valueCoder = collection.getCoder();
-    WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
-        WindowedValue.getFullCoder(
-            valueCoder,
-            collection.getWindowingStrategy().getWindowFn().windowCoder());
-
-    return new CoderTypeInformation<>(windowedValueCoder);
-  }
-
-
-  @SuppressWarnings("unchecked")
-  public <T extends PValue> T getInput(PTransform<T, ?> transform) {
-    return (T) Iterables.getOnlyElement(currentTransform.getInputs().values());
-  }
-
-  public <T extends PInput> Map<TupleTag<?>, PValue> getInputs(PTransform<T, ?> transform) {
-    return currentTransform.getInputs();
-  }
-
-  @SuppressWarnings("unchecked")
-  public <T extends PValue> T getOutput(PTransform<?, T> transform) {
-    return (T) Iterables.getOnlyElement(currentTransform.getOutputs().values());
-  }
-
-  public <OutputT extends POutput> Map<TupleTag<?>, PValue> getOutputs(
-      PTransform<?, OutputT> transform) {
-    return currentTransform.getOutputs();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingViewOverrides.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingViewOverrides.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingViewOverrides.java
deleted file mode 100644
index f955f2a..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkStreamingViewOverrides.java
+++ /dev/null
@@ -1,372 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.CoderRegistry;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.coders.ListCoder;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.util.PCollectionViews;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionView;
-
-/**
- * Flink streaming overrides for various view (side input) transforms.
- */
-class FlinkStreamingViewOverrides {
-
-  /**
-   * Specialized implementation for
-   * {@link org.apache.beam.sdk.transforms.View.AsMap View.AsMap}
-   * for the Flink runner in streaming mode.
-   */
-  static class StreamingViewAsMap<K, V>
-      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, V>>> {
-
-    private final transient FlinkRunner runner;
-
-    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
-    public StreamingViewAsMap(FlinkRunner runner, View.AsMap<K, V> transform) {
-      this.runner = runner;
-    }
-
-    @Override
-    public PCollectionView<Map<K, V>> expand(PCollection<KV<K, V>> input) {
-      PCollectionView<Map<K, V>> view =
-          PCollectionViews.mapView(
-              input,
-              input.getWindowingStrategy(),
-              input.getCoder());
-
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
-      try {
-        inputCoder.getKeyCoder().verifyDeterministic();
-      } catch (Coder.NonDeterministicException e) {
-        runner.recordViewUsesNonDeterministicKeyCoder(this);
-      }
-
-      return input
-          .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
-          .apply(CreateFlinkPCollectionView.<KV<K, V>, Map<K, V>>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingViewAsMap";
-    }
-  }
-
-  /**
-   * Specialized expansion for {@link
-   * View.AsMultimap View.AsMultimap} for the
-   * Flink runner in streaming mode.
-   */
-  static class StreamingViewAsMultimap<K, V>
-      extends PTransform<PCollection<KV<K, V>>, PCollectionView<Map<K, Iterable<V>>>> {
-
-    private final transient FlinkRunner runner;
-
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
-    public StreamingViewAsMultimap(FlinkRunner runner, View.AsMultimap<K, V> transform) {
-      this.runner = runner;
-    }
-
-    @Override
-    public PCollectionView<Map<K, Iterable<V>>> expand(PCollection<KV<K, V>> input) {
-      PCollectionView<Map<K, Iterable<V>>> view =
-          PCollectionViews.multimapView(
-              input,
-              input.getWindowingStrategy(),
-              input.getCoder());
-
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      KvCoder<K, V> inputCoder = (KvCoder) input.getCoder();
-      try {
-        inputCoder.getKeyCoder().verifyDeterministic();
-      } catch (Coder.NonDeterministicException e) {
-        runner.recordViewUsesNonDeterministicKeyCoder(this);
-      }
-
-      return input
-          .apply(Combine.globally(new Concatenate<KV<K, V>>()).withoutDefaults())
-          .apply(CreateFlinkPCollectionView.<KV<K, V>, Map<K, Iterable<V>>>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingViewAsMultimap";
-    }
-  }
-
-  /**
-   * Specialized implementation for
-   * {@link View.AsList View.AsList} for the
-   * Flink runner in streaming mode.
-   */
-  static class StreamingViewAsList<T>
-      extends PTransform<PCollection<T>, PCollectionView<List<T>>> {
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
-    public StreamingViewAsList(FlinkRunner runner, View.AsList<T> transform) {}
-
-    @Override
-    public PCollectionView<List<T>> expand(PCollection<T> input) {
-      PCollectionView<List<T>> view =
-          PCollectionViews.listView(
-              input,
-              input.getWindowingStrategy(),
-              input.getCoder());
-
-      return input.apply(Combine.globally(new Concatenate<T>()).withoutDefaults())
-          .apply(CreateFlinkPCollectionView.<T, List<T>>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingViewAsList";
-    }
-  }
-
-  /**
-   * Specialized implementation for
-   * {@link View.AsIterable View.AsIterable} for the
-   * Flink runner in streaming mode.
-   */
-  static class StreamingViewAsIterable<T>
-      extends PTransform<PCollection<T>, PCollectionView<Iterable<T>>> {
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
-    public StreamingViewAsIterable(FlinkRunner runner, View.AsIterable<T> transform) { }
-
-    @Override
-    public PCollectionView<Iterable<T>> expand(PCollection<T> input) {
-      PCollectionView<Iterable<T>> view =
-          PCollectionViews.iterableView(
-              input,
-              input.getWindowingStrategy(),
-              input.getCoder());
-
-      return input.apply(Combine.globally(new Concatenate<T>()).withoutDefaults())
-          .apply(CreateFlinkPCollectionView.<T, Iterable<T>>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingViewAsIterable";
-    }
-  }
-
-  /**
-   * Specialized expansion for
-   * {@link View.AsSingleton View.AsSingleton} for the
-   * Flink runner in streaming mode.
-   */
-  static class StreamingViewAsSingleton<T>
-      extends PTransform<PCollection<T>, PCollectionView<T>> {
-    private View.AsSingleton<T> transform;
-
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
-    public StreamingViewAsSingleton(FlinkRunner runner, View.AsSingleton<T> transform) {
-      this.transform = transform;
-    }
-
-    @Override
-    public PCollectionView<T> expand(PCollection<T> input) {
-      Combine.Globally<T, T> combine = Combine.globally(
-          new SingletonCombine<>(transform.hasDefaultValue(), transform.defaultValue()));
-      if (!transform.hasDefaultValue()) {
-        combine = combine.withoutDefaults();
-      }
-      return input.apply(combine.asSingletonView());
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingViewAsSingleton";
-    }
-
-    private static class SingletonCombine<T> extends Combine.BinaryCombineFn<T> {
-      private boolean hasDefaultValue;
-      private T defaultValue;
-
-      SingletonCombine(boolean hasDefaultValue, T defaultValue) {
-        this.hasDefaultValue = hasDefaultValue;
-        this.defaultValue = defaultValue;
-      }
-
-      @Override
-      public T apply(T left, T right) {
-        throw new IllegalArgumentException("PCollection with more than one element "
-            + "accessed as a singleton view. Consider using Combine.globally().asSingleton() to "
-            + "combine the PCollection into a single value");
-      }
-
-      @Override
-      public T identity() {
-        if (hasDefaultValue) {
-          return defaultValue;
-        } else {
-          throw new IllegalArgumentException(
-              "Empty PCollection accessed as a singleton view. "
-                  + "Consider setting withDefault to provide a default value");
-        }
-      }
-    }
-  }
-
-  static class StreamingCombineGloballyAsSingletonView<InputT, OutputT>
-      extends PTransform<PCollection<InputT>, PCollectionView<OutputT>> {
-    Combine.GloballyAsSingletonView<InputT, OutputT> transform;
-
-    /**
-     * Builds an instance of this class from the overridden transform.
-     */
-    @SuppressWarnings("unused") // used via reflection in FlinkRunner#apply()
-    public StreamingCombineGloballyAsSingletonView(
-        FlinkRunner runner,
-        Combine.GloballyAsSingletonView<InputT, OutputT> transform) {
-      this.transform = transform;
-    }
-
-    @Override
-    public PCollectionView<OutputT> expand(PCollection<InputT> input) {
-      PCollection<OutputT> combined =
-          input.apply(Combine.globally(transform.getCombineFn())
-              .withoutDefaults()
-              .withFanout(transform.getFanout()));
-
-      PCollectionView<OutputT> view = PCollectionViews.singletonView(
-          combined,
-          combined.getWindowingStrategy(),
-          transform.getInsertDefault(),
-          transform.getInsertDefault()
-              ? transform.getCombineFn().defaultValue() : null,
-          combined.getCoder());
-      return combined
-          .apply(ParDo.of(new WrapAsList<OutputT>()))
-          .apply(CreateFlinkPCollectionView.<OutputT, OutputT>of(view));
-    }
-
-    @Override
-    protected String getKindString() {
-      return "StreamingCombineGloballyAsSingletonView";
-    }
-  }
-
-  private static class WrapAsList<T> extends DoFn<T, List<T>> {
-    @ProcessElement
-    public void processElement(ProcessContext c) {
-      c.output(Collections.singletonList(c.element()));
-    }
-  }
-
-  /**
-   * Combiner that combines {@code T}s into a single {@code List<T>} containing all inputs.
-   *
-   * <p>For internal use by {@link StreamingViewAsMap}, {@link StreamingViewAsMultimap},
-   * {@link StreamingViewAsList}, {@link StreamingViewAsIterable}.
-   * They require the input {@link PCollection} fits in memory.
-   * For a large {@link PCollection} this is expected to crash!
-   *
-   * @param <T> the type of elements to concatenate.
-   */
-  private static class Concatenate<T> extends Combine.CombineFn<T, List<T>, List<T>> {
-    @Override
-    public List<T> createAccumulator() {
-      return new ArrayList<T>();
-    }
-
-    @Override
-    public List<T> addInput(List<T> accumulator, T input) {
-      accumulator.add(input);
-      return accumulator;
-    }
-
-    @Override
-    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
-      List<T> result = createAccumulator();
-      for (List<T> accumulator : accumulators) {
-        result.addAll(accumulator);
-      }
-      return result;
-    }
-
-    @Override
-    public List<T> extractOutput(List<T> accumulator) {
-      return accumulator;
-    }
-
-    @Override
-    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
-      return ListCoder.of(inputCoder);
-    }
-
-    @Override
-    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
-      return ListCoder.of(inputCoder);
-    }
-  }
-
-  /**
-   * Creates a primitive {@link PCollectionView}.
-   *
-   * <p>For internal use only by runner implementors.
-   *
-   * @param <ElemT> The type of the elements of the input PCollection
-   * @param <ViewT> The type associated with the {@link PCollectionView} used as a side input
-   */
-  public static class CreateFlinkPCollectionView<ElemT, ViewT>
-      extends PTransform<PCollection<List<ElemT>>, PCollectionView<ViewT>> {
-    private PCollectionView<ViewT> view;
-
-    private CreateFlinkPCollectionView(PCollectionView<ViewT> view) {
-      this.view = view;
-    }
-
-    public static <ElemT, ViewT> CreateFlinkPCollectionView<ElemT, ViewT> of(
-        PCollectionView<ViewT> view) {
-      return new CreateFlinkPCollectionView<>(view);
-    }
-
-    @Override
-    public PCollectionView<ViewT> expand(PCollection<List<ElemT>> input) {
-      return view;
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/PipelineTranslationOptimizer.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/PipelineTranslationOptimizer.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/PipelineTranslationOptimizer.java
deleted file mode 100644
index 3acc3ea..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/PipelineTranslationOptimizer.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.io.Read;
-import org.apache.beam.sdk.runners.TransformHierarchy;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.values.PValue;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Traverses the Pipeline to determine the {@link TranslationMode} for this pipeline.
- */
-class PipelineTranslationOptimizer extends FlinkPipelineTranslator {
-
-  private static final Logger LOG = LoggerFactory.getLogger(PipelineTranslationOptimizer.class);
-
-  private TranslationMode translationMode;
-
-  private final FlinkPipelineOptions options;
-
-  public PipelineTranslationOptimizer(TranslationMode defaultMode, FlinkPipelineOptions options) {
-    this.translationMode = defaultMode;
-    this.options = options;
-  }
-
-  public TranslationMode getTranslationMode() {
-
-    // override user-specified translation mode
-    if (options.isStreaming()) {
-      return TranslationMode.STREAMING;
-    }
-
-    return translationMode;
-  }
-
-  @Override
-  public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) {
-    return CompositeBehavior.ENTER_TRANSFORM;
-  }
-
-  @Override
-  public void leaveCompositeTransform(TransformHierarchy.Node node) {}
-
-  @Override
-  public void visitPrimitiveTransform(TransformHierarchy.Node node) {
-    Class<? extends PTransform> transformClass = node.getTransform().getClass();
-    if (transformClass == Read.Unbounded.class) {
-      LOG.info("Found {}. Switching to streaming execution.", transformClass);
-      translationMode = TranslationMode.STREAMING;
-    }
-  }
-
-  @Override
-  public void visitValue(PValue value, TransformHierarchy.Node producer) {}
-}

[33/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
deleted file mode 100644
index 44c9017..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import com.google.common.base.Joiner;
-import java.io.File;
-import java.net.URI;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.CountingInput;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-/**
- * Reads from a bounded source in batch execution.
- */
-public class ReadSourceITCase extends JavaProgramTestBase {
-
-  protected String resultPath;
-
-  public ReadSourceITCase(){
-  }
-
-  private static final String[] EXPECTED_RESULT = new String[] {
-     "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-
-    // need to create the dir, otherwise Beam sinks don't
-    // work for these tests
-
-    if (!new File(new URI(resultPath)).mkdirs()) {
-      throw new RuntimeException("Could not create output dir.");
-    }
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-    runProgram(resultPath);
-  }
-
-  private static void runProgram(String resultPath) throws Exception {
-
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    PCollection<String> result = p
-        .apply(CountingInput.upTo(10))
-        .apply(ParDo.of(new DoFn<Long, String>() {
-          @ProcessElement
-          public void processElement(ProcessContext c) throws Exception {
-            c.output(c.element().toString());
-          }
-        }));
-
-    result.apply(TextIO.Write.to(new URI(resultPath).getPath() + "/part"));
-
-    p.run();
-  }
-}
-
-

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingITCase.java
deleted file mode 100644
index 79b7882..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingITCase.java
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import com.google.common.base.Joiner;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.CountingInput;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.flink.streaming.util.StreamingProgramTestBase;
-
-/**
- * Reads from a bounded source in streaming.
- */
-public class ReadSourceStreamingITCase extends StreamingProgramTestBase {
-
-  protected String resultPath;
-
-  public ReadSourceStreamingITCase(){
-  }
-
-  private static final String[] EXPECTED_RESULT = new String[] {
-     "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-    runProgram(resultPath);
-  }
-
-  private static void runProgram(String resultPath) {
-
-    Pipeline p = FlinkTestPipeline.createForStreaming();
-
-    p
-      .apply(CountingInput.upTo(10))
-      .apply(ParDo.of(new DoFn<Long, String>() {
-          @ProcessElement
-          public void processElement(ProcessContext c) throws Exception {
-            c.output(c.element().toString());
-          }
-        }))
-      .apply(TextIO.Write.to(resultPath));
-
-    p.run();
-  }
-}
-
-

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
deleted file mode 100644
index 38b790e..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.flink;
-
-import static org.junit.Assert.assertNotNull;
-
-import com.google.common.base.Joiner;
-import com.google.common.collect.ImmutableList;
-import java.io.File;
-import java.io.IOException;
-import java.io.PrintWriter;
-import java.net.URI;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.Sink;
-import org.apache.beam.sdk.io.Write;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.flink.core.fs.FileSystem;
-import org.apache.flink.core.fs.Path;
-import org.apache.flink.test.util.JavaProgramTestBase;
-
-/**
- * Tests the translation of custom Write sinks.
- */
-public class WriteSinkITCase extends JavaProgramTestBase {
-
-  protected String resultPath;
-
-  public WriteSinkITCase(){
-  }
-
-  static final String[] EXPECTED_RESULT = new String[] {
-      "Joe red 3", "Mary blue 4", "Max yellow 23"};
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result-" + System.nanoTime());
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-    runProgram(resultPath);
-  }
-
-  @Override
-  public void stopCluster() throws Exception {
-    try {
-      super.stopCluster();
-    } catch (final IOException ioe) {
-      if (ioe.getMessage().startsWith("Unable to delete file")) {
-        // that's ok for the test itself, just the OS playing with us on cleanup phase
-      }
-    }
-  }
-
-  private static void runProgram(String resultPath) {
-    Pipeline p = FlinkTestPipeline.createForBatch();
-
-    p.apply(Create.of(ImmutableList.copyOf(EXPECTED_RESULT))).setCoder(StringUtf8Coder.of())
-      .apply("CustomSink", Write.to(new MyCustomSink(resultPath)));
-
-    p.run();
-  }
-
-  /**
-   * Simple custom sink which writes to a file.
-   */
-  private static class MyCustomSink extends Sink<String> {
-
-    private final String resultPath;
-
-    public MyCustomSink(String resultPath) {
-      this.resultPath = resultPath;
-    }
-
-    @Override
-    public void validate(PipelineOptions options) {
-      assertNotNull(options);
-    }
-
-    @Override
-    public WriteOperation<String, ?> createWriteOperation(PipelineOptions options) {
-      return new MyWriteOperation();
-    }
-
-    private class MyWriteOperation extends WriteOperation<String, String> {
-
-      @Override
-      public Coder<String> getWriterResultCoder() {
-        return StringUtf8Coder.of();
-      }
-
-      @Override
-      public void initialize(PipelineOptions options) throws Exception {
-
-      }
-
-      @Override
-      public void setWindowedWrites(boolean windowedWrites) {
-
-      }
-
-      @Override
-      public void finalize(Iterable<String> writerResults, PipelineOptions options)
-          throws Exception {
-
-      }
-
-      @Override
-      public Writer<String, String> createWriter(PipelineOptions options) throws Exception {
-        return new MyWriter();
-      }
-
-      @Override
-      public Sink<String> getSink() {
-        return MyCustomSink.this;
-      }
-
-      /**
-       * Simple Writer which writes to a file.
-       */
-      private class MyWriter extends Writer<String, String> {
-
-        private PrintWriter internalWriter;
-
-        @Override
-        public final void openWindowed(String uId,
-                                       BoundedWindow window,
-                                       PaneInfo paneInfo,
-                                       int shard,
-                                       int numShards) throws Exception {
-          throw new UnsupportedOperationException("Windowed writes not supported.");
-        }
-
-        @Override
-        public final void openUnwindowed(String uId, int shard, int numShards) throws Exception {
-          Path path = new Path(resultPath + "/" + uId);
-          FileSystem.get(new URI("file:///")).create(path, false);
-          internalWriter = new PrintWriter(new File(path.toUri()));
-        }
-
-        @Override
-        public void cleanup() throws Exception {
-
-        }
-
-        @Override
-        public void write(String value) throws Exception {
-          internalWriter.println(value);
-        }
-
-        @Override
-        public String close() throws Exception {
-          internalWriter.close();
-          return resultPath;
-        }
-
-        @Override
-        public WriteOperation<String, String> getWriteOperation() {
-          return MyWriteOperation.this;
-        }
-      }
-    }
-  }
-
-}
-

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java
deleted file mode 100644
index 4c826d1..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/DoFnOperatorTest.java
+++ /dev/null
@@ -1,600 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.streaming;
-
-import static org.hamcrest.Matchers.emptyIterable;
-import static org.hamcrest.collection.IsIterableContainingInOrder.contains;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertThat;
-
-import com.google.common.base.Function;
-import com.google.common.base.Predicate;
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import java.util.Collections;
-import java.util.HashMap;
-import javax.annotation.Nullable;
-import org.apache.beam.runners.core.StatefulDoFnRunner;
-import org.apache.beam.runners.flink.FlinkPipelineOptions;
-import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.coders.VarIntCoder;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.testing.PCollectionViewTesting;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.FixedWindows;
-import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.TimeDomain;
-import org.apache.beam.sdk.util.Timer;
-import org.apache.beam.sdk.util.TimerSpec;
-import org.apache.beam.sdk.util.TimerSpecs;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.util.state.StateSpec;
-import org.apache.beam.sdk.util.state.StateSpecs;
-import org.apache.beam.sdk.util.state.ValueState;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.api.common.typeinfo.BasicTypeInfo;
-import org.apache.flink.api.java.functions.KeySelector;
-import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
-import org.apache.flink.streaming.util.KeyedOneInputStreamOperatorTestHarness;
-import org.apache.flink.streaming.util.KeyedTwoInputStreamOperatorTestHarness;
-import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness;
-import org.apache.flink.streaming.util.TwoInputStreamOperatorTestHarness;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for {@link DoFnOperator}.
- */
-@RunWith(JUnit4.class)
-public class DoFnOperatorTest {
-
-  // views and windows for testing side inputs
-  private static final long WINDOW_MSECS_1 = 100;
-  private static final long WINDOW_MSECS_2 = 500;
-
-  private WindowingStrategy<Object, IntervalWindow> windowingStrategy1 =
-      WindowingStrategy.of(FixedWindows.of(new Duration(WINDOW_MSECS_1)));
-
-  private PCollectionView<Iterable<String>> view1 =
-      PCollectionViewTesting.testingView(
-          new TupleTag<Iterable<WindowedValue<String>>>() {},
-          new PCollectionViewTesting.IdentityViewFn<String>(),
-          StringUtf8Coder.of(),
-          windowingStrategy1);
-
-  private WindowingStrategy<Object, IntervalWindow> windowingStrategy2 =
-      WindowingStrategy.of(FixedWindows.of(new Duration(WINDOW_MSECS_2)));
-
-  private PCollectionView<Iterable<String>> view2 =
-      PCollectionViewTesting.testingView(
-          new TupleTag<Iterable<WindowedValue<String>>>() {},
-          new PCollectionViewTesting.IdentityViewFn<String>(),
-          StringUtf8Coder.of(),
-          windowingStrategy2);
-
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testSingleOutput() throws Exception {
-
-    WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
-        WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
-
-    TupleTag<String> outputTag = new TupleTag<>("main-output");
-
-    DoFnOperator<String, String, String> doFnOperator = new DoFnOperator<>(
-        new IdentityDoFn<String>(),
-        windowedValueCoder,
-        outputTag,
-        Collections.<TupleTag<?>>emptyList(),
-        new DoFnOperator.DefaultOutputManagerFactory(),
-        WindowingStrategy.globalDefault(),
-        new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
-        Collections.<PCollectionView<?>>emptyList(), /* side inputs */
-        PipelineOptionsFactory.as(FlinkPipelineOptions.class),
-        null);
-
-    OneInputStreamOperatorTestHarness<WindowedValue<String>, String> testHarness =
-        new OneInputStreamOperatorTestHarness<>(doFnOperator);
-
-    testHarness.open();
-
-    testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("Hello")));
-
-    assertThat(
-        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
-        contains(WindowedValue.valueInGlobalWindow("Hello")));
-
-    testHarness.close();
-  }
-
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testMultiOutputOutput() throws Exception {
-
-    WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
-        WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
-
-    TupleTag<String> mainOutput = new TupleTag<>("main-output");
-    TupleTag<String> additionalOutput1 = new TupleTag<>("output-1");
-    TupleTag<String> additionalOutput2 = new TupleTag<>("output-2");
-    ImmutableMap<TupleTag<?>, Integer> outputMapping = ImmutableMap.<TupleTag<?>, Integer>builder()
-        .put(mainOutput, 1)
-        .put(additionalOutput1, 2)
-        .put(additionalOutput2, 3)
-        .build();
-
-    DoFnOperator<String, String, RawUnionValue> doFnOperator = new DoFnOperator<>(
-        new MultiOutputDoFn(additionalOutput1, additionalOutput2),
-        windowedValueCoder,
-        mainOutput,
-        ImmutableList.<TupleTag<?>>of(additionalOutput1, additionalOutput2),
-        new DoFnOperator.MultiOutputOutputManagerFactory(outputMapping),
-        WindowingStrategy.globalDefault(),
-        new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
-        Collections.<PCollectionView<?>>emptyList(), /* side inputs */
-        PipelineOptionsFactory.as(FlinkPipelineOptions.class),
-        null);
-
-    OneInputStreamOperatorTestHarness<WindowedValue<String>, RawUnionValue> testHarness =
-        new OneInputStreamOperatorTestHarness<>(doFnOperator);
-
-    testHarness.open();
-
-    testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("one")));
-    testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("two")));
-    testHarness.processElement(new StreamRecord<>(WindowedValue.valueInGlobalWindow("hello")));
-
-    assertThat(
-        this.stripStreamRecordFromRawUnion(testHarness.getOutput()),
-        contains(
-            new RawUnionValue(2, WindowedValue.valueInGlobalWindow("extra: one")),
-            new RawUnionValue(3, WindowedValue.valueInGlobalWindow("extra: two")),
-            new RawUnionValue(1, WindowedValue.valueInGlobalWindow("got: hello")),
-            new RawUnionValue(2, WindowedValue.valueInGlobalWindow("got: hello")),
-            new RawUnionValue(3, WindowedValue.valueInGlobalWindow("got: hello"))));
-
-    testHarness.close();
-  }
-
-  @Test
-  public void testLateDroppingForStatefulFn() throws Exception {
-
-    WindowingStrategy<Object, IntervalWindow> windowingStrategy =
-        WindowingStrategy.of(FixedWindows.of(new Duration(10)));
-
-    DoFn<Integer, String> fn = new DoFn<Integer, String>() {
-
-      @StateId("state")
-      private final StateSpec<Object, ValueState<String>> stateSpec =
-          StateSpecs.value(StringUtf8Coder.of());
-
-      @ProcessElement
-      public void processElement(ProcessContext context) {
-        context.output(context.element().toString());
-      }
-    };
-
-    WindowedValue.FullWindowedValueCoder<Integer> windowedValueCoder =
-        WindowedValue.getFullCoder(
-            VarIntCoder.of(),
-            windowingStrategy.getWindowFn().windowCoder());
-
-    TupleTag<String> outputTag = new TupleTag<>("main-output");
-
-    DoFnOperator<Integer, String, WindowedValue<String>> doFnOperator = new DoFnOperator<>(
-        fn,
-        windowedValueCoder,
-        outputTag,
-        Collections.<TupleTag<?>>emptyList(),
-        new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<String>>(),
-        windowingStrategy,
-        new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
-        Collections.<PCollectionView<?>>emptyList(), /* side inputs */
-        PipelineOptionsFactory.as(FlinkPipelineOptions.class),
-        VarIntCoder.of() /* key coder */);
-
-    OneInputStreamOperatorTestHarness<WindowedValue<Integer>, WindowedValue<String>> testHarness =
-        new KeyedOneInputStreamOperatorTestHarness<>(
-            doFnOperator,
-            new KeySelector<WindowedValue<Integer>, Integer>() {
-              @Override
-              public Integer getKey(WindowedValue<Integer> integerWindowedValue) throws Exception {
-                return integerWindowedValue.getValue();
-              }
-            },
-            new CoderTypeInformation<>(VarIntCoder.of()));
-
-    testHarness.open();
-
-    testHarness.processWatermark(0);
-
-    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
-
-    // this should not be late
-    testHarness.processElement(
-        new StreamRecord<>(WindowedValue.of(13, new Instant(0), window1, PaneInfo.NO_FIRING)));
-
-    assertThat(
-        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
-        contains(WindowedValue.of("13", new Instant(0), window1, PaneInfo.NO_FIRING)));
-
-    testHarness.getOutput().clear();
-
-    testHarness.processWatermark(9);
-
-    // this should still not be considered late
-    testHarness.processElement(
-        new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
-
-    assertThat(
-        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
-        contains(WindowedValue.of("17", new Instant(0), window1, PaneInfo.NO_FIRING)));
-
-    testHarness.getOutput().clear();
-
-    testHarness.processWatermark(10);
-
-    // this should now be considered late
-    testHarness.processElement(
-        new StreamRecord<>(WindowedValue.of(17, new Instant(0), window1, PaneInfo.NO_FIRING)));
-
-    assertThat(
-        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
-        emptyIterable());
-
-    testHarness.close();
-  }
-
-  @Test
-  public void testStateGCForStatefulFn() throws Exception {
-
-    WindowingStrategy<Object, IntervalWindow> windowingStrategy =
-        WindowingStrategy.of(FixedWindows.of(new Duration(10))).withAllowedLateness(Duration.ZERO);
-
-    final String timerId = "boo";
-    final String stateId = "dazzle";
-
-    final int offset = 5000;
-    final int timerOutput = 4093;
-
-    DoFn<KV<String, Integer>, KV<String, Integer>> fn =
-        new DoFn<KV<String, Integer>, KV<String, Integer>>() {
-
-          @TimerId(timerId)
-          private final TimerSpec spec = TimerSpecs.timer(TimeDomain.EVENT_TIME);
-
-          @StateId(stateId)
-          private final StateSpec<Object, ValueState<String>> stateSpec =
-              StateSpecs.value(StringUtf8Coder.of());
-
-          @ProcessElement
-          public void processElement(
-              ProcessContext context,
-              @TimerId(timerId) Timer timer,
-              @StateId(stateId) ValueState<String> state,
-              BoundedWindow window) {
-            timer.set(window.maxTimestamp());
-            state.write(context.element().getKey());
-            context.output(
-                KV.of(context.element().getKey(), context.element().getValue() + offset));
-          }
-
-          @OnTimer(timerId)
-          public void onTimer(OnTimerContext context, @StateId(stateId) ValueState<String> state) {
-            context.output(KV.of(state.read(), timerOutput));
-          }
-        };
-
-    WindowedValue.FullWindowedValueCoder<KV<String, Integer>> windowedValueCoder =
-        WindowedValue.getFullCoder(
-            KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()),
-            windowingStrategy.getWindowFn().windowCoder());
-
-    TupleTag<KV<String, Integer>> outputTag = new TupleTag<>("main-output");
-
-    DoFnOperator<
-        KV<String, Integer>, KV<String, Integer>, WindowedValue<KV<String, Integer>>> doFnOperator =
-        new DoFnOperator<>(
-            fn,
-            windowedValueCoder,
-            outputTag,
-            Collections.<TupleTag<?>>emptyList(),
-            new DoFnOperator.DefaultOutputManagerFactory<WindowedValue<KV<String, Integer>>>(),
-            windowingStrategy,
-            new HashMap<Integer, PCollectionView<?>>(), /* side-input mapping */
-            Collections.<PCollectionView<?>>emptyList(), /* side inputs */
-            PipelineOptionsFactory.as(FlinkPipelineOptions.class),
-            StringUtf8Coder.of() /* key coder */);
-
-    KeyedOneInputStreamOperatorTestHarness<
-        String,
-        WindowedValue<KV<String, Integer>>,
-        WindowedValue<KV<String, Integer>>> testHarness =
-        new KeyedOneInputStreamOperatorTestHarness<>(
-            doFnOperator,
-            new KeySelector<WindowedValue<KV<String, Integer>>, String>() {
-              @Override
-              public String getKey(
-                  WindowedValue<KV<String, Integer>> kvWindowedValue) throws Exception {
-                return kvWindowedValue.getValue().getKey();
-              }
-            },
-            new CoderTypeInformation<>(StringUtf8Coder.of()));
-
-    testHarness.open();
-
-    testHarness.processWatermark(0);
-
-    assertEquals(0, testHarness.numKeyedStateEntries());
-
-    IntervalWindow window1 = new IntervalWindow(new Instant(0), Duration.millis(10));
-
-    testHarness.processElement(
-        new StreamRecord<>(
-            WindowedValue.of(KV.of("key1", 5), new Instant(1), window1, PaneInfo.NO_FIRING)));
-
-    testHarness.processElement(
-        new StreamRecord<>(
-            WindowedValue.of(KV.of("key2", 7), new Instant(3), window1, PaneInfo.NO_FIRING)));
-
-    assertThat(
-        this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
-        contains(
-            WindowedValue.of(
-                KV.of("key1", 5 + offset), new Instant(1), window1, PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                KV.of("key2", 7 + offset), new Instant(3), window1, PaneInfo.NO_FIRING)));
-
-    assertEquals(2, testHarness.numKeyedStateEntries());
-
-    testHarness.getOutput().clear();
-
-    // this should trigger both the window.maxTimestamp() timer and the GC timer
-    // this tests that the GC timer fires after the user timer
-    testHarness.processWatermark(
-        window1.maxTimestamp()
-            .plus(windowingStrategy.getAllowedLateness())
-            .plus(StatefulDoFnRunner.TimeInternalsCleanupTimer.GC_DELAY_MS)
-            .getMillis());
-
-    assertThat(
-        this.<KV<String, Integer>>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
-        contains(
-            WindowedValue.of(
-                KV.of("key1", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING),
-            WindowedValue.of(
-                KV.of("key2", timerOutput), new Instant(9), window1, PaneInfo.NO_FIRING)));
-
-    // ensure the state was garbage collected
-    assertEquals(0, testHarness.numKeyedStateEntries());
-
-    testHarness.close();
-  }
-
-  public void testSideInputs(boolean keyed) throws Exception {
-
-    WindowedValue.ValueOnlyWindowedValueCoder<String> windowedValueCoder =
-        WindowedValue.getValueOnlyCoder(StringUtf8Coder.of());
-
-    TupleTag<String> outputTag = new TupleTag<>("main-output");
-
-    ImmutableMap<Integer, PCollectionView<?>> sideInputMapping =
-        ImmutableMap.<Integer, PCollectionView<?>>builder()
-            .put(1, view1)
-            .put(2, view2)
-            .build();
-
-    Coder<String> keyCoder = null;
-    if (keyed) {
-      keyCoder = StringUtf8Coder.of();
-    }
-
-    DoFnOperator<String, String, String> doFnOperator = new DoFnOperator<>(
-        new IdentityDoFn<String>(),
-        windowedValueCoder,
-        outputTag,
-        Collections.<TupleTag<?>>emptyList(),
-        new DoFnOperator.DefaultOutputManagerFactory<String>(),
-        WindowingStrategy.globalDefault(),
-        sideInputMapping, /* side-input mapping */
-        ImmutableList.<PCollectionView<?>>of(view1, view2), /* side inputs */
-        PipelineOptionsFactory.as(FlinkPipelineOptions.class),
-        keyCoder);
-
-    TwoInputStreamOperatorTestHarness<WindowedValue<String>, RawUnionValue, String> testHarness =
-        new TwoInputStreamOperatorTestHarness<>(doFnOperator);
-
-    if (keyed) {
-      // we use a dummy key for the second input since it is considered to be broadcast
-      testHarness = new KeyedTwoInputStreamOperatorTestHarness<>(
-          doFnOperator,
-          new StringKeySelector(),
-          new DummyKeySelector(),
-          BasicTypeInfo.STRING_TYPE_INFO);
-    }
-
-    testHarness.open();
-
-    IntervalWindow firstWindow = new IntervalWindow(new Instant(0), new Instant(100));
-    IntervalWindow secondWindow = new IntervalWindow(new Instant(0), new Instant(500));
-
-    // test the keep of sideInputs events
-    testHarness.processElement2(
-        new StreamRecord<>(
-            new RawUnionValue(
-                1,
-                valuesInWindow(ImmutableList.of("hello", "ciao"), new Instant(0), firstWindow))));
-    testHarness.processElement2(
-        new StreamRecord<>(
-            new RawUnionValue(
-                2,
-                valuesInWindow(ImmutableList.of("foo", "bar"), new Instant(0), secondWindow))));
-
-    // push in a regular elements
-    WindowedValue<String> helloElement = valueInWindow("Hello", new Instant(0), firstWindow);
-    WindowedValue<String> worldElement = valueInWindow("World", new Instant(1000), firstWindow);
-    testHarness.processElement1(new StreamRecord<>(helloElement));
-    testHarness.processElement1(new StreamRecord<>(worldElement));
-
-    // test the keep of pushed-back events
-    testHarness.processElement2(
-        new StreamRecord<>(
-            new RawUnionValue(
-                1,
-                valuesInWindow(ImmutableList.of("hello", "ciao"),
-                    new Instant(1000), firstWindow))));
-    testHarness.processElement2(
-        new StreamRecord<>(
-            new RawUnionValue(
-                2,
-                valuesInWindow(ImmutableList.of("foo", "bar"), new Instant(1000), secondWindow))));
-
-    assertThat(
-        this.<String>stripStreamRecordFromWindowedValue(testHarness.getOutput()),
-        contains(helloElement, worldElement));
-
-    testHarness.close();
-
-  }
-
-  /**
-   * {@link TwoInputStreamOperatorTestHarness} support OperatorStateBackend,
-   * but don't support KeyedStateBackend. So we just test sideInput of normal ParDo.
-   */
-  @Test
-  @SuppressWarnings("unchecked")
-  public void testNormalParDoSideInputs() throws Exception {
-    testSideInputs(false);
-  }
-
-  @Test
-  public void testKeyedSideInputs() throws Exception {
-    testSideInputs(true);
-  }
-
-  private <T> Iterable<WindowedValue<T>> stripStreamRecordFromWindowedValue(
-      Iterable<Object> input) {
-
-    return FluentIterable.from(input).filter(new Predicate<Object>() {
-      @Override
-      public boolean apply(@Nullable Object o) {
-        return o instanceof StreamRecord && ((StreamRecord) o).getValue() instanceof WindowedValue;
-      }
-    }).transform(new Function<Object, WindowedValue<T>>() {
-      @Nullable
-      @Override
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      public WindowedValue<T> apply(@Nullable Object o) {
-        if (o instanceof StreamRecord && ((StreamRecord) o).getValue() instanceof WindowedValue) {
-          return (WindowedValue) ((StreamRecord) o).getValue();
-        }
-        throw new RuntimeException("unreachable");
-      }
-    });
-  }
-
-  private Iterable<RawUnionValue> stripStreamRecordFromRawUnion(Iterable<Object> input) {
-    return FluentIterable.from(input).filter(new Predicate<Object>() {
-      @Override
-      public boolean apply(@Nullable Object o) {
-        return o instanceof StreamRecord && ((StreamRecord) o).getValue() instanceof RawUnionValue;
-      }
-    }).transform(new Function<Object, RawUnionValue>() {
-      @Nullable
-      @Override
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      public RawUnionValue apply(@Nullable Object o) {
-        if (o instanceof StreamRecord && ((StreamRecord) o).getValue() instanceof RawUnionValue) {
-          return (RawUnionValue) ((StreamRecord) o).getValue();
-        }
-        throw new RuntimeException("unreachable");
-      }
-    });
-  }
-
-  private static class MultiOutputDoFn extends DoFn<String, String> {
-    private TupleTag<String> additionalOutput1;
-    private TupleTag<String> additionalOutput2;
-
-    public MultiOutputDoFn(TupleTag<String> additionalOutput1, TupleTag<String> additionalOutput2) {
-      this.additionalOutput1 = additionalOutput1;
-      this.additionalOutput2 = additionalOutput2;
-    }
-
-    @ProcessElement
-    public void processElement(ProcessContext c) throws Exception {
-      if (c.element().equals("one")) {
-        c.output(additionalOutput1, "extra: one");
-      } else if (c.element().equals("two")) {
-        c.output(additionalOutput2, "extra: two");
-      } else {
-        c.output("got: " + c.element());
-        c.output(additionalOutput1, "got: " + c.element());
-        c.output(additionalOutput2, "got: " + c.element());
-      }
-    }
-  }
-
-  private static class IdentityDoFn<T> extends DoFn<T, T> {
-    @ProcessElement
-    public void processElement(ProcessContext c) throws Exception {
-      c.output(c.element());
-    }
-  }
-
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  private WindowedValue<Iterable<?>> valuesInWindow(
-      Iterable<?> values, Instant timestamp, BoundedWindow window) {
-    return (WindowedValue) WindowedValue.of(values, timestamp, window, PaneInfo.NO_FIRING);
-  }
-
-  @SuppressWarnings({"unchecked", "rawtypes"})
-  private <T> WindowedValue<T> valueInWindow(
-      T value, Instant timestamp, BoundedWindow window) {
-    return WindowedValue.of(value, timestamp, window, PaneInfo.NO_FIRING);
-  }
-
-
-  private static class DummyKeySelector implements KeySelector<RawUnionValue, String> {
-    @Override
-    public String getKey(RawUnionValue stringWindowedValue) throws Exception {
-      return "dummy_key";
-    }
-  }
-
-  private static class StringKeySelector implements KeySelector<WindowedValue<String>, String> {
-    @Override
-    public String getKey(WindowedValue<String> stringWindowedValue) throws Exception {
-      return stringWindowedValue.getValue();
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkBroadcastStateInternalsTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkBroadcastStateInternalsTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkBroadcastStateInternalsTest.java
deleted file mode 100644
index 7e7d1e1..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkBroadcastStateInternalsTest.java
+++ /dev/null
@@ -1,245 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.streaming;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertThat;
-
-import java.util.Arrays;
-import org.apache.beam.runners.core.StateMerging;
-import org.apache.beam.runners.core.StateNamespace;
-import org.apache.beam.runners.core.StateNamespaceForTest;
-import org.apache.beam.runners.core.StateTag;
-import org.apache.beam.runners.core.StateTags;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkBroadcastStateInternals;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.coders.VarIntCoder;
-import org.apache.beam.sdk.transforms.Sum;
-import org.apache.beam.sdk.util.state.BagState;
-import org.apache.beam.sdk.util.state.CombiningState;
-import org.apache.beam.sdk.util.state.GroupingState;
-import org.apache.beam.sdk.util.state.ReadableState;
-import org.apache.beam.sdk.util.state.ValueState;
-import org.apache.flink.runtime.operators.testutils.DummyEnvironment;
-import org.apache.flink.runtime.state.OperatorStateBackend;
-import org.apache.flink.runtime.state.memory.MemoryStateBackend;
-import org.hamcrest.Matchers;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for {@link FlinkBroadcastStateInternals}. This is based on the tests for
- * {@code InMemoryStateInternals}.
- */
-@RunWith(JUnit4.class)
-public class FlinkBroadcastStateInternalsTest {
-  private static final StateNamespace NAMESPACE_1 = new StateNamespaceForTest("ns1");
-  private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
-  private static final StateNamespace NAMESPACE_3 = new StateNamespaceForTest("ns3");
-
-  private static final StateTag<Object, ValueState<String>> STRING_VALUE_ADDR =
-      StateTags.value("stringValue", StringUtf8Coder.of());
-  private static final StateTag<Object, CombiningState<Integer, int[], Integer>>
-      SUM_INTEGER_ADDR = StateTags.combiningValueFromInputInternal(
-          "sumInteger", VarIntCoder.of(), Sum.ofIntegers());
-  private static final StateTag<Object, BagState<String>> STRING_BAG_ADDR =
-      StateTags.bag("stringBag", StringUtf8Coder.of());
-
-  FlinkBroadcastStateInternals<String> underTest;
-
-  @Before
-  public void initStateInternals() {
-    MemoryStateBackend backend = new MemoryStateBackend();
-    try {
-      OperatorStateBackend operatorStateBackend =
-          backend.createOperatorStateBackend(new DummyEnvironment("test", 1, 0), "");
-      underTest = new FlinkBroadcastStateInternals<>(1, operatorStateBackend);
-
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  @Test
-  public void testValue() throws Exception {
-    ValueState<String> value = underTest.state(NAMESPACE_1, STRING_VALUE_ADDR);
-
-    assertEquals(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), value);
-    assertNotEquals(
-        underTest.state(NAMESPACE_2, STRING_VALUE_ADDR),
-        value);
-
-    assertThat(value.read(), Matchers.nullValue());
-    value.write("hello");
-    assertThat(value.read(), Matchers.equalTo("hello"));
-    value.write("world");
-    assertThat(value.read(), Matchers.equalTo("world"));
-
-    value.clear();
-    assertThat(value.read(), Matchers.nullValue());
-    assertEquals(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), value);
-
-  }
-
-  @Test
-  public void testBag() throws Exception {
-    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-
-    assertEquals(value, underTest.state(NAMESPACE_1, STRING_BAG_ADDR));
-    assertFalse(value.equals(underTest.state(NAMESPACE_2, STRING_BAG_ADDR)));
-
-    assertThat(value.read(), Matchers.emptyIterable());
-    value.add("hello");
-    assertThat(value.read(), Matchers.containsInAnyOrder("hello"));
-
-    value.add("world");
-    assertThat(value.read(), Matchers.containsInAnyOrder("hello", "world"));
-
-    value.clear();
-    assertThat(value.read(), Matchers.emptyIterable());
-    assertEquals(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), value);
-
-  }
-
-  @Test
-  public void testBagIsEmpty() throws Exception {
-    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-
-    assertThat(value.isEmpty().read(), Matchers.is(true));
-    ReadableState<Boolean> readFuture = value.isEmpty();
-    value.add("hello");
-    assertThat(readFuture.read(), Matchers.is(false));
-
-    value.clear();
-    assertThat(readFuture.read(), Matchers.is(true));
-  }
-
-  @Test
-  public void testMergeBagIntoSource() throws Exception {
-    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
-
-    bag1.add("Hello");
-    bag2.add("World");
-    bag1.add("!");
-
-    StateMerging.mergeBags(Arrays.asList(bag1, bag2), bag1);
-
-    // Reading the merged bag gets both the contents
-    assertThat(bag1.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
-    assertThat(bag2.read(), Matchers.emptyIterable());
-  }
-
-  @Test
-  public void testMergeBagIntoNewNamespace() throws Exception {
-    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
-    BagState<String> bag3 = underTest.state(NAMESPACE_3, STRING_BAG_ADDR);
-
-    bag1.add("Hello");
-    bag2.add("World");
-    bag1.add("!");
-
-    StateMerging.mergeBags(Arrays.asList(bag1, bag2, bag3), bag3);
-
-    // Reading the merged bag gets both the contents
-    assertThat(bag3.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
-    assertThat(bag1.read(), Matchers.emptyIterable());
-    assertThat(bag2.read(), Matchers.emptyIterable());
-  }
-
-  @Test
-  public void testCombiningValue() throws Exception {
-    GroupingState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-
-    // State instances are cached, but depend on the namespace.
-    assertEquals(value, underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR));
-    assertFalse(value.equals(underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR)));
-
-    assertThat(value.read(), Matchers.equalTo(0));
-    value.add(2);
-    assertThat(value.read(), Matchers.equalTo(2));
-
-    value.add(3);
-    assertThat(value.read(), Matchers.equalTo(5));
-
-    value.clear();
-    assertThat(value.read(), Matchers.equalTo(0));
-    assertEquals(underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR), value);
-  }
-
-  @Test
-  public void testCombiningIsEmpty() throws Exception {
-    GroupingState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-
-    assertThat(value.isEmpty().read(), Matchers.is(true));
-    ReadableState<Boolean> readFuture = value.isEmpty();
-    value.add(5);
-    assertThat(readFuture.read(), Matchers.is(false));
-
-    value.clear();
-    assertThat(readFuture.read(), Matchers.is(true));
-  }
-
-  @Test
-  public void testMergeCombiningValueIntoSource() throws Exception {
-    CombiningState<Integer, int[], Integer> value1 =
-        underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-    CombiningState<Integer, int[], Integer> value2 =
-        underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
-
-    value1.add(5);
-    value2.add(10);
-    value1.add(6);
-
-    assertThat(value1.read(), Matchers.equalTo(11));
-    assertThat(value2.read(), Matchers.equalTo(10));
-
-    // Merging clears the old values and updates the result value.
-    StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value1);
-
-    assertThat(value1.read(), Matchers.equalTo(21));
-    assertThat(value2.read(), Matchers.equalTo(0));
-  }
-
-  @Test
-  public void testMergeCombiningValueIntoNewNamespace() throws Exception {
-    CombiningState<Integer, int[], Integer> value1 =
-        underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-    CombiningState<Integer, int[], Integer> value2 =
-        underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
-    CombiningState<Integer, int[], Integer> value3 =
-        underTest.state(NAMESPACE_3, SUM_INTEGER_ADDR);
-
-    value1.add(5);
-    value2.add(10);
-    value1.add(6);
-
-    StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value3);
-
-    // Merging clears the old values and updates the result value.
-    assertThat(value1.read(), Matchers.equalTo(0));
-    assertThat(value2.read(), Matchers.equalTo(0));
-    assertThat(value3.read(), Matchers.equalTo(21));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkKeyGroupStateInternalsTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkKeyGroupStateInternalsTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkKeyGroupStateInternalsTest.java
deleted file mode 100644
index 5433d07..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkKeyGroupStateInternalsTest.java
+++ /dev/null
@@ -1,262 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.streaming;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
-
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.apache.beam.runners.core.StateMerging;
-import org.apache.beam.runners.core.StateNamespace;
-import org.apache.beam.runners.core.StateNamespaceForTest;
-import org.apache.beam.runners.core.StateTag;
-import org.apache.beam.runners.core.StateTags;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkKeyGroupStateInternals;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.beam.sdk.util.state.BagState;
-import org.apache.beam.sdk.util.state.ReadableState;
-import org.apache.flink.api.common.ExecutionConfig;
-import org.apache.flink.api.common.JobID;
-import org.apache.flink.api.java.typeutils.GenericTypeInfo;
-import org.apache.flink.runtime.jobgraph.JobVertexID;
-import org.apache.flink.runtime.operators.testutils.DummyEnvironment;
-import org.apache.flink.runtime.query.KvStateRegistry;
-import org.apache.flink.runtime.state.AbstractKeyedStateBackend;
-import org.apache.flink.runtime.state.KeyGroupRange;
-import org.apache.flink.runtime.state.KeyedStateBackend;
-import org.apache.flink.runtime.state.memory.MemoryStateBackend;
-import org.apache.flink.streaming.api.operators.KeyContext;
-import org.hamcrest.Matchers;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for {@link FlinkKeyGroupStateInternals}. This is based on the tests for
- * {@code InMemoryStateInternals}.
- */
-@RunWith(JUnit4.class)
-public class FlinkKeyGroupStateInternalsTest {
-  private static final StateNamespace NAMESPACE_1 = new StateNamespaceForTest("ns1");
-  private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
-  private static final StateNamespace NAMESPACE_3 = new StateNamespaceForTest("ns3");
-
-  private static final StateTag<Object, BagState<String>> STRING_BAG_ADDR =
-      StateTags.bag("stringBag", StringUtf8Coder.of());
-
-  FlinkKeyGroupStateInternals<String> underTest;
-  private KeyedStateBackend keyedStateBackend;
-
-  @Before
-  public void initStateInternals() {
-    try {
-      keyedStateBackend = getKeyedStateBackend(2, new KeyGroupRange(0, 1));
-      underTest = new FlinkKeyGroupStateInternals<>(StringUtf8Coder.of(), keyedStateBackend);
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  private KeyedStateBackend getKeyedStateBackend(int numberOfKeyGroups,
-                                                   KeyGroupRange keyGroupRange) {
-    MemoryStateBackend backend = new MemoryStateBackend();
-    try {
-      AbstractKeyedStateBackend<ByteBuffer> keyedStateBackend = backend.createKeyedStateBackend(
-          new DummyEnvironment("test", 1, 0),
-          new JobID(),
-          "test_op",
-          new GenericTypeInfo<>(ByteBuffer.class).createSerializer(new ExecutionConfig()),
-          numberOfKeyGroups,
-          keyGroupRange,
-          new KvStateRegistry().createTaskRegistry(new JobID(), new JobVertexID()));
-      keyedStateBackend.setCurrentKey(ByteBuffer.wrap(
-          CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "1")));
-      return keyedStateBackend;
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  @Test
-  public void testBag() throws Exception {
-    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-
-    assertEquals(value, underTest.state(NAMESPACE_1, STRING_BAG_ADDR));
-    assertFalse(value.equals(underTest.state(NAMESPACE_2, STRING_BAG_ADDR)));
-
-    assertThat(value.read(), Matchers.emptyIterable());
-    value.add("hello");
-    assertThat(value.read(), Matchers.containsInAnyOrder("hello"));
-
-    value.add("world");
-    assertThat(value.read(), Matchers.containsInAnyOrder("hello", "world"));
-
-    value.clear();
-    assertThat(value.read(), Matchers.emptyIterable());
-    assertEquals(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), value);
-
-  }
-
-  @Test
-  public void testBagIsEmpty() throws Exception {
-    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-
-    assertThat(value.isEmpty().read(), Matchers.is(true));
-    ReadableState<Boolean> readFuture = value.isEmpty();
-    value.add("hello");
-    assertThat(readFuture.read(), Matchers.is(false));
-
-    value.clear();
-    assertThat(readFuture.read(), Matchers.is(true));
-  }
-
-  @Test
-  public void testMergeBagIntoSource() throws Exception {
-    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
-
-    bag1.add("Hello");
-    bag2.add("World");
-    bag1.add("!");
-
-    StateMerging.mergeBags(Arrays.asList(bag1, bag2), bag1);
-
-    // Reading the merged bag gets both the contents
-    assertThat(bag1.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
-    assertThat(bag2.read(), Matchers.emptyIterable());
-  }
-
-  @Test
-  public void testMergeBagIntoNewNamespace() throws Exception {
-    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
-    BagState<String> bag3 = underTest.state(NAMESPACE_3, STRING_BAG_ADDR);
-
-    bag1.add("Hello");
-    bag2.add("World");
-    bag1.add("!");
-
-    StateMerging.mergeBags(Arrays.asList(bag1, bag2, bag3), bag3);
-
-    // Reading the merged bag gets both the contents
-    assertThat(bag3.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
-    assertThat(bag1.read(), Matchers.emptyIterable());
-    assertThat(bag2.read(), Matchers.emptyIterable());
-  }
-
-  @Test
-  public void testKeyGroupAndCheckpoint() throws Exception {
-    // assign to keyGroup 0
-    ByteBuffer key0 = ByteBuffer.wrap(
-        CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "11111111"));
-    // assign to keyGroup 1
-    ByteBuffer key1 = ByteBuffer.wrap(
-        CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "22222222"));
-    FlinkKeyGroupStateInternals<String> allState;
-    {
-      KeyedStateBackend keyedStateBackend = getKeyedStateBackend(2, new KeyGroupRange(0, 1));
-      allState = new FlinkKeyGroupStateInternals<>(
-          StringUtf8Coder.of(), keyedStateBackend);
-      BagState<String> valueForNamespace0 = allState.state(NAMESPACE_1, STRING_BAG_ADDR);
-      BagState<String> valueForNamespace1 = allState.state(NAMESPACE_2, STRING_BAG_ADDR);
-      keyedStateBackend.setCurrentKey(key0);
-      valueForNamespace0.add("0");
-      valueForNamespace1.add("2");
-      keyedStateBackend.setCurrentKey(key1);
-      valueForNamespace0.add("1");
-      valueForNamespace1.add("3");
-      assertThat(valueForNamespace0.read(), Matchers.containsInAnyOrder("0", "1"));
-      assertThat(valueForNamespace1.read(), Matchers.containsInAnyOrder("2", "3"));
-    }
-
-    ClassLoader classLoader = FlinkKeyGroupStateInternalsTest.class.getClassLoader();
-
-    // 1. scale up
-    ByteArrayOutputStream out0 = new ByteArrayOutputStream();
-    allState.snapshotKeyGroupState(0, new DataOutputStream(out0));
-    DataInputStream in0 = new DataInputStream(
-        new ByteArrayInputStream(out0.toByteArray()));
-    {
-      KeyedStateBackend keyedStateBackend = getKeyedStateBackend(2, new KeyGroupRange(0, 0));
-      FlinkKeyGroupStateInternals<String> state0 =
-          new FlinkKeyGroupStateInternals<>(
-              StringUtf8Coder.of(), keyedStateBackend);
-      state0.restoreKeyGroupState(0, in0, classLoader);
-      BagState<String> valueForNamespace0 = state0.state(NAMESPACE_1, STRING_BAG_ADDR);
-      BagState<String> valueForNamespace1 = state0.state(NAMESPACE_2, STRING_BAG_ADDR);
-      assertThat(valueForNamespace0.read(), Matchers.containsInAnyOrder("0"));
-      assertThat(valueForNamespace1.read(), Matchers.containsInAnyOrder("2"));
-    }
-
-    ByteArrayOutputStream out1 = new ByteArrayOutputStream();
-    allState.snapshotKeyGroupState(1, new DataOutputStream(out1));
-    DataInputStream in1 = new DataInputStream(
-        new ByteArrayInputStream(out1.toByteArray()));
-    {
-      KeyedStateBackend keyedStateBackend = getKeyedStateBackend(2, new KeyGroupRange(1, 1));
-      FlinkKeyGroupStateInternals<String> state1 =
-          new FlinkKeyGroupStateInternals<>(
-              StringUtf8Coder.of(), keyedStateBackend);
-      state1.restoreKeyGroupState(1, in1, classLoader);
-      BagState<String> valueForNamespace0 = state1.state(NAMESPACE_1, STRING_BAG_ADDR);
-      BagState<String> valueForNamespace1 = state1.state(NAMESPACE_2, STRING_BAG_ADDR);
-      assertThat(valueForNamespace0.read(), Matchers.containsInAnyOrder("1"));
-      assertThat(valueForNamespace1.read(), Matchers.containsInAnyOrder("3"));
-    }
-
-    // 2. scale down
-    {
-      KeyedStateBackend keyedStateBackend = getKeyedStateBackend(2, new KeyGroupRange(0, 1));
-      FlinkKeyGroupStateInternals<String> newAllState = new FlinkKeyGroupStateInternals<>(
-          StringUtf8Coder.of(), keyedStateBackend);
-      in0.reset();
-      in1.reset();
-      newAllState.restoreKeyGroupState(0, in0, classLoader);
-      newAllState.restoreKeyGroupState(1, in1, classLoader);
-      BagState<String> valueForNamespace0 = newAllState.state(NAMESPACE_1, STRING_BAG_ADDR);
-      BagState<String> valueForNamespace1 = newAllState.state(NAMESPACE_2, STRING_BAG_ADDR);
-      assertThat(valueForNamespace0.read(), Matchers.containsInAnyOrder("0", "1"));
-      assertThat(valueForNamespace1.read(), Matchers.containsInAnyOrder("2", "3"));
-    }
-
-  }
-
-  private static class TestKeyContext implements KeyContext {
-
-    private Object key;
-
-    @Override
-    public void setCurrentKey(Object key) {
-      this.key = key;
-    }
-
-    @Override
-    public Object getCurrentKey() {
-      return key;
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkSplitStateInternalsTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkSplitStateInternalsTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkSplitStateInternalsTest.java
deleted file mode 100644
index 08ae0c4..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkSplitStateInternalsTest.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.streaming;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
-
-import org.apache.beam.runners.core.StateNamespace;
-import org.apache.beam.runners.core.StateNamespaceForTest;
-import org.apache.beam.runners.core.StateTag;
-import org.apache.beam.runners.core.StateTags;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkSplitStateInternals;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.util.state.BagState;
-import org.apache.beam.sdk.util.state.ReadableState;
-import org.apache.flink.runtime.operators.testutils.DummyEnvironment;
-import org.apache.flink.runtime.state.OperatorStateBackend;
-import org.apache.flink.runtime.state.memory.MemoryStateBackend;
-import org.hamcrest.Matchers;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for {@link FlinkSplitStateInternals}. This is based on the tests for
- * {@code InMemoryStateInternals}.
- */
-@RunWith(JUnit4.class)
-public class FlinkSplitStateInternalsTest {
-  private static final StateNamespace NAMESPACE_1 = new StateNamespaceForTest("ns1");
-  private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
-
-  private static final StateTag<Object, BagState<String>> STRING_BAG_ADDR =
-      StateTags.bag("stringBag", StringUtf8Coder.of());
-
-  FlinkSplitStateInternals<String> underTest;
-
-  @Before
-  public void initStateInternals() {
-    MemoryStateBackend backend = new MemoryStateBackend();
-    try {
-      OperatorStateBackend operatorStateBackend =
-          backend.createOperatorStateBackend(new DummyEnvironment("test", 1, 0), "");
-      underTest = new FlinkSplitStateInternals<>(operatorStateBackend);
-
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  @Test
-  public void testBag() throws Exception {
-    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-
-    assertEquals(value, underTest.state(NAMESPACE_1, STRING_BAG_ADDR));
-    assertFalse(value.equals(underTest.state(NAMESPACE_2, STRING_BAG_ADDR)));
-
-    assertThat(value.read(), Matchers.emptyIterable());
-    value.add("hello");
-    assertThat(value.read(), Matchers.containsInAnyOrder("hello"));
-
-    value.add("world");
-    assertThat(value.read(), Matchers.containsInAnyOrder("hello", "world"));
-
-    value.clear();
-    assertThat(value.read(), Matchers.emptyIterable());
-    assertEquals(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), value);
-
-  }
-
-  @Test
-  public void testBagIsEmpty() throws Exception {
-    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-
-    assertThat(value.isEmpty().read(), Matchers.is(true));
-    ReadableState<Boolean> readFuture = value.isEmpty();
-    value.add("hello");
-    assertThat(readFuture.read(), Matchers.is(false));
-
-    value.clear();
-    assertThat(readFuture.read(), Matchers.is(true));
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkStateInternalsTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkStateInternalsTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkStateInternalsTest.java
deleted file mode 100644
index d140271..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/FlinkStateInternalsTest.java
+++ /dev/null
@@ -1,395 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.streaming;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertThat;
-
-import java.nio.ByteBuffer;
-import java.util.Arrays;
-import org.apache.beam.runners.core.StateMerging;
-import org.apache.beam.runners.core.StateNamespace;
-import org.apache.beam.runners.core.StateNamespaceForTest;
-import org.apache.beam.runners.core.StateTag;
-import org.apache.beam.runners.core.StateTags;
-import org.apache.beam.runners.flink.translation.wrappers.streaming.state.FlinkStateInternals;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.coders.VarIntCoder;
-import org.apache.beam.sdk.transforms.Sum;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
-import org.apache.beam.sdk.transforms.windowing.OutputTimeFns;
-import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.beam.sdk.util.state.BagState;
-import org.apache.beam.sdk.util.state.CombiningState;
-import org.apache.beam.sdk.util.state.GroupingState;
-import org.apache.beam.sdk.util.state.ReadableState;
-import org.apache.beam.sdk.util.state.ValueState;
-import org.apache.beam.sdk.util.state.WatermarkHoldState;
-import org.apache.flink.api.common.ExecutionConfig;
-import org.apache.flink.api.common.JobID;
-import org.apache.flink.api.java.typeutils.GenericTypeInfo;
-import org.apache.flink.runtime.jobgraph.JobVertexID;
-import org.apache.flink.runtime.operators.testutils.DummyEnvironment;
-import org.apache.flink.runtime.query.KvStateRegistry;
-import org.apache.flink.runtime.state.AbstractKeyedStateBackend;
-import org.apache.flink.runtime.state.KeyGroupRange;
-import org.apache.flink.runtime.state.memory.MemoryStateBackend;
-import org.hamcrest.Matchers;
-import org.joda.time.Instant;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for {@link FlinkStateInternals}. This is based on the tests for
- * {@code InMemoryStateInternals}.
- */
-@RunWith(JUnit4.class)
-public class FlinkStateInternalsTest {
-  private static final BoundedWindow WINDOW_1 = new IntervalWindow(new Instant(0), new Instant(10));
-  private static final StateNamespace NAMESPACE_1 = new StateNamespaceForTest("ns1");
-  private static final StateNamespace NAMESPACE_2 = new StateNamespaceForTest("ns2");
-  private static final StateNamespace NAMESPACE_3 = new StateNamespaceForTest("ns3");
-
-  private static final StateTag<Object, ValueState<String>> STRING_VALUE_ADDR =
-      StateTags.value("stringValue", StringUtf8Coder.of());
-  private static final StateTag<Object, CombiningState<Integer, int[], Integer>>
-      SUM_INTEGER_ADDR = StateTags.combiningValueFromInputInternal(
-          "sumInteger", VarIntCoder.of(), Sum.ofIntegers());
-  private static final StateTag<Object, BagState<String>> STRING_BAG_ADDR =
-      StateTags.bag("stringBag", StringUtf8Coder.of());
-  private static final StateTag<Object, WatermarkHoldState<BoundedWindow>>
-      WATERMARK_EARLIEST_ADDR =
-      StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtEarliestInputTimestamp());
-  private static final StateTag<Object, WatermarkHoldState<BoundedWindow>>
-      WATERMARK_LATEST_ADDR =
-      StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtLatestInputTimestamp());
-  private static final StateTag<Object, WatermarkHoldState<BoundedWindow>> WATERMARK_EOW_ADDR =
-      StateTags.watermarkStateInternal("watermark", OutputTimeFns.outputAtEndOfWindow());
-
-  FlinkStateInternals<String> underTest;
-
-  @Before
-  public void initStateInternals() {
-    MemoryStateBackend backend = new MemoryStateBackend();
-    try {
-      AbstractKeyedStateBackend<ByteBuffer> keyedStateBackend = backend.createKeyedStateBackend(
-          new DummyEnvironment("test", 1, 0),
-          new JobID(),
-          "test_op",
-          new GenericTypeInfo<>(ByteBuffer.class).createSerializer(new ExecutionConfig()),
-          1,
-          new KeyGroupRange(0, 0),
-          new KvStateRegistry().createTaskRegistry(new JobID(), new JobVertexID()));
-      underTest = new FlinkStateInternals<>(keyedStateBackend, StringUtf8Coder.of());
-
-      keyedStateBackend.setCurrentKey(
-          ByteBuffer.wrap(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "Hello")));
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  @Test
-  public void testValue() throws Exception {
-    ValueState<String> value = underTest.state(NAMESPACE_1, STRING_VALUE_ADDR);
-
-    assertEquals(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), value);
-    assertNotEquals(
-        underTest.state(NAMESPACE_2, STRING_VALUE_ADDR),
-        value);
-
-    assertThat(value.read(), Matchers.nullValue());
-    value.write("hello");
-    assertThat(value.read(), Matchers.equalTo("hello"));
-    value.write("world");
-    assertThat(value.read(), Matchers.equalTo("world"));
-
-    value.clear();
-    assertThat(value.read(), Matchers.nullValue());
-    assertEquals(underTest.state(NAMESPACE_1, STRING_VALUE_ADDR), value);
-
-  }
-
-  @Test
-  public void testBag() throws Exception {
-    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-
-    assertEquals(value, underTest.state(NAMESPACE_1, STRING_BAG_ADDR));
-    assertFalse(value.equals(underTest.state(NAMESPACE_2, STRING_BAG_ADDR)));
-
-    assertThat(value.read(), Matchers.emptyIterable());
-    value.add("hello");
-    assertThat(value.read(), Matchers.containsInAnyOrder("hello"));
-
-    value.add("world");
-    assertThat(value.read(), Matchers.containsInAnyOrder("hello", "world"));
-
-    value.clear();
-    assertThat(value.read(), Matchers.emptyIterable());
-    assertEquals(underTest.state(NAMESPACE_1, STRING_BAG_ADDR), value);
-
-  }
-
-  @Test
-  public void testBagIsEmpty() throws Exception {
-    BagState<String> value = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-
-    assertThat(value.isEmpty().read(), Matchers.is(true));
-    ReadableState<Boolean> readFuture = value.isEmpty();
-    value.add("hello");
-    assertThat(readFuture.read(), Matchers.is(false));
-
-    value.clear();
-    assertThat(readFuture.read(), Matchers.is(true));
-  }
-
-  @Test
-  public void testMergeBagIntoSource() throws Exception {
-    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
-
-    bag1.add("Hello");
-    bag2.add("World");
-    bag1.add("!");
-
-    StateMerging.mergeBags(Arrays.asList(bag1, bag2), bag1);
-
-    // Reading the merged bag gets both the contents
-    assertThat(bag1.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
-    assertThat(bag2.read(), Matchers.emptyIterable());
-  }
-
-  @Test
-  public void testMergeBagIntoNewNamespace() throws Exception {
-    BagState<String> bag1 = underTest.state(NAMESPACE_1, STRING_BAG_ADDR);
-    BagState<String> bag2 = underTest.state(NAMESPACE_2, STRING_BAG_ADDR);
-    BagState<String> bag3 = underTest.state(NAMESPACE_3, STRING_BAG_ADDR);
-
-    bag1.add("Hello");
-    bag2.add("World");
-    bag1.add("!");
-
-    StateMerging.mergeBags(Arrays.asList(bag1, bag2, bag3), bag3);
-
-    // Reading the merged bag gets both the contents
-    assertThat(bag3.read(), Matchers.containsInAnyOrder("Hello", "World", "!"));
-    assertThat(bag1.read(), Matchers.emptyIterable());
-    assertThat(bag2.read(), Matchers.emptyIterable());
-  }
-
-  @Test
-  public void testCombiningValue() throws Exception {
-    GroupingState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-
-    // State instances are cached, but depend on the namespace.
-    assertEquals(value, underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR));
-    assertFalse(value.equals(underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR)));
-
-    assertThat(value.read(), Matchers.equalTo(0));
-    value.add(2);
-    assertThat(value.read(), Matchers.equalTo(2));
-
-    value.add(3);
-    assertThat(value.read(), Matchers.equalTo(5));
-
-    value.clear();
-    assertThat(value.read(), Matchers.equalTo(0));
-    assertEquals(underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR), value);
-  }
-
-  @Test
-  public void testCombiningIsEmpty() throws Exception {
-    GroupingState<Integer, Integer> value = underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-
-    assertThat(value.isEmpty().read(), Matchers.is(true));
-    ReadableState<Boolean> readFuture = value.isEmpty();
-    value.add(5);
-    assertThat(readFuture.read(), Matchers.is(false));
-
-    value.clear();
-    assertThat(readFuture.read(), Matchers.is(true));
-  }
-
-  @Test
-  public void testMergeCombiningValueIntoSource() throws Exception {
-    CombiningState<Integer, int[], Integer> value1 =
-        underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-    CombiningState<Integer, int[], Integer> value2 =
-        underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
-
-    value1.add(5);
-    value2.add(10);
-    value1.add(6);
-
-    assertThat(value1.read(), Matchers.equalTo(11));
-    assertThat(value2.read(), Matchers.equalTo(10));
-
-    // Merging clears the old values and updates the result value.
-    StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value1);
-
-    assertThat(value1.read(), Matchers.equalTo(21));
-    assertThat(value2.read(), Matchers.equalTo(0));
-  }
-
-  @Test
-  public void testMergeCombiningValueIntoNewNamespace() throws Exception {
-    CombiningState<Integer, int[], Integer> value1 =
-        underTest.state(NAMESPACE_1, SUM_INTEGER_ADDR);
-    CombiningState<Integer, int[], Integer> value2 =
-        underTest.state(NAMESPACE_2, SUM_INTEGER_ADDR);
-    CombiningState<Integer, int[], Integer> value3 =
-        underTest.state(NAMESPACE_3, SUM_INTEGER_ADDR);
-
-    value1.add(5);
-    value2.add(10);
-    value1.add(6);
-
-    StateMerging.mergeCombiningValues(Arrays.asList(value1, value2), value3);
-
-    // Merging clears the old values and updates the result value.
-    assertThat(value1.read(), Matchers.equalTo(0));
-    assertThat(value2.read(), Matchers.equalTo(0));
-    assertThat(value3.read(), Matchers.equalTo(21));
-  }
-
-  @Test
-  public void testWatermarkEarliestState() throws Exception {
-    WatermarkHoldState<BoundedWindow> value =
-        underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
-
-    // State instances are cached, but depend on the namespace.
-    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR));
-    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR)));
-
-    assertThat(value.read(), Matchers.nullValue());
-    value.add(new Instant(2000));
-    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
-
-    value.add(new Instant(3000));
-    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
-
-    value.add(new Instant(1000));
-    assertThat(value.read(), Matchers.equalTo(new Instant(1000)));
-
-    value.clear();
-    assertThat(value.read(), Matchers.equalTo(null));
-    assertEquals(underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR), value);
-  }
-
-  @Test
-  public void testWatermarkLatestState() throws Exception {
-    WatermarkHoldState<BoundedWindow> value =
-        underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
-
-    // State instances are cached, but depend on the namespace.
-    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR));
-    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR)));
-
-    assertThat(value.read(), Matchers.nullValue());
-    value.add(new Instant(2000));
-    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
-
-    value.add(new Instant(3000));
-    assertThat(value.read(), Matchers.equalTo(new Instant(3000)));
-
-    value.add(new Instant(1000));
-    assertThat(value.read(), Matchers.equalTo(new Instant(3000)));
-
-    value.clear();
-    assertThat(value.read(), Matchers.equalTo(null));
-    assertEquals(underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR), value);
-  }
-
-  @Test
-  public void testWatermarkEndOfWindowState() throws Exception {
-    WatermarkHoldState<BoundedWindow> value = underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR);
-
-    // State instances are cached, but depend on the namespace.
-    assertEquals(value, underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR));
-    assertFalse(value.equals(underTest.state(NAMESPACE_2, WATERMARK_EOW_ADDR)));
-
-    assertThat(value.read(), Matchers.nullValue());
-    value.add(new Instant(2000));
-    assertThat(value.read(), Matchers.equalTo(new Instant(2000)));
-
-    value.clear();
-    assertThat(value.read(), Matchers.equalTo(null));
-    assertEquals(underTest.state(NAMESPACE_1, WATERMARK_EOW_ADDR), value);
-  }
-
-  @Test
-  public void testWatermarkStateIsEmpty() throws Exception {
-    WatermarkHoldState<BoundedWindow> value =
-        underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
-
-    assertThat(value.isEmpty().read(), Matchers.is(true));
-    ReadableState<Boolean> readFuture = value.isEmpty();
-    value.add(new Instant(1000));
-    assertThat(readFuture.read(), Matchers.is(false));
-
-    value.clear();
-    assertThat(readFuture.read(), Matchers.is(true));
-  }
-
-  @Test
-  public void testMergeEarliestWatermarkIntoSource() throws Exception {
-    WatermarkHoldState<BoundedWindow> value1 =
-        underTest.state(NAMESPACE_1, WATERMARK_EARLIEST_ADDR);
-    WatermarkHoldState<BoundedWindow> value2 =
-        underTest.state(NAMESPACE_2, WATERMARK_EARLIEST_ADDR);
-
-    value1.add(new Instant(3000));
-    value2.add(new Instant(5000));
-    value1.add(new Instant(4000));
-    value2.add(new Instant(2000));
-
-    // Merging clears the old values and updates the merged value.
-    StateMerging.mergeWatermarks(Arrays.asList(value1, value2), value1, WINDOW_1);
-
-    assertThat(value1.read(), Matchers.equalTo(new Instant(2000)));
-    assertThat(value2.read(), Matchers.equalTo(null));
-  }
-
-  @Test
-  public void testMergeLatestWatermarkIntoSource() throws Exception {
-    WatermarkHoldState<BoundedWindow> value1 =
-        underTest.state(NAMESPACE_1, WATERMARK_LATEST_ADDR);
-    WatermarkHoldState<BoundedWindow> value2 =
-        underTest.state(NAMESPACE_2, WATERMARK_LATEST_ADDR);
-    WatermarkHoldState<BoundedWindow> value3 =
-        underTest.state(NAMESPACE_3, WATERMARK_LATEST_ADDR);
-
-    value1.add(new Instant(3000));
-    value2.add(new Instant(5000));
-    value1.add(new Instant(4000));
-    value2.add(new Instant(2000));
-
-    // Merging clears the old values and updates the result value.
-    StateMerging.mergeWatermarks(Arrays.asList(value1, value2), value3, WINDOW_1);
-
-    // Merging clears the old values and updates the result value.
-    assertThat(value3.read(), Matchers.equalTo(new Instant(5000)));
-    assertThat(value1.read(), Matchers.equalTo(null));
-    assertThat(value2.read(), Matchers.equalTo(null));
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java b/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java
deleted file mode 100644
index 663b910..0000000
--- a/runners/flink/runner/src/test/java/org/apache/beam/runners/flink/streaming/GroupByNullKeyTest.java
+++ /dev/null
@@ -1,124 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink.streaming;
-
-import com.google.common.base.Joiner;
-import java.io.Serializable;
-import java.util.Arrays;
-import org.apache.beam.runners.flink.FlinkTestPipeline;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.io.TextIO;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.GroupByKey;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.windowing.AfterWatermark;
-import org.apache.beam.sdk.transforms.windowing.FixedWindows;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.flink.streaming.util.StreamingProgramTestBase;
-import org.joda.time.Duration;
-import org.joda.time.Instant;
-
-/**
- * Test for GroupByNullKey.
- */
-public class GroupByNullKeyTest extends StreamingProgramTestBase implements Serializable {
-
-
-  protected String resultPath;
-
-  static final String[] EXPECTED_RESULT = new String[] {
-      "k: null v: user1 user1 user1 user2 user2 user2 user2 user3"
-  };
-
-  public GroupByNullKeyTest(){
-  }
-
-  @Override
-  protected void preSubmit() throws Exception {
-    resultPath = getTempDirPath("result");
-  }
-
-  @Override
-  protected void postSubmit() throws Exception {
-    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
-  }
-
-  /**
-   * DoFn extracting user and timestamp.
-   */
-  private static class ExtractUserAndTimestamp extends DoFn<KV<Integer, String>, String> {
-    @ProcessElement
-    public void processElement(ProcessContext c) {
-      KV<Integer, String> record = c.element();
-      int timestamp = record.getKey();
-      String userName = record.getValue();
-      if (userName != null) {
-        // Sets the implicit timestamp field to be used in windowing.
-        c.outputWithTimestamp(userName, new Instant(timestamp));
-      }
-    }
-  }
-
-  @Override
-  protected void testProgram() throws Exception {
-
-    Pipeline p = FlinkTestPipeline.createForStreaming();
-
-    PCollection<String> output =
-      p.apply(Create.of(Arrays.asList(
-          KV.<Integer, String>of(0, "user1"),
-          KV.<Integer, String>of(1, "user1"),
-          KV.<Integer, String>of(2, "user1"),
-          KV.<Integer, String>of(10, "user2"),
-          KV.<Integer, String>of(1, "user2"),
-          KV.<Integer, String>of(15000, "user2"),
-          KV.<Integer, String>of(12000, "user2"),
-          KV.<Integer, String>of(25000, "user3"))))
-          .apply(ParDo.of(new ExtractUserAndTimestamp()))
-          .apply(Window.<String>into(FixedWindows.of(Duration.standardHours(1)))
-              .triggering(AfterWatermark.pastEndOfWindow())
-              .withAllowedLateness(Duration.ZERO)
-              .discardingFiredPanes())
-
-          .apply(ParDo.of(new DoFn<String, KV<Void, String>>() {
-            @ProcessElement
-            public void processElement(ProcessContext c) throws Exception {
-              String elem = c.element();
-              c.output(KV.<Void, String>of(null, elem));
-            }
-          }))
-          .apply(GroupByKey.<Void, String>create())
-          .apply(ParDo.of(new DoFn<KV<Void, Iterable<String>>, String>() {
-            @ProcessElement
-            public void processElement(ProcessContext c) throws Exception {
-              KV<Void, Iterable<String>> elem = c.element();
-              StringBuilder str = new StringBuilder();
-              str.append("k: " + elem.getKey() + " v:");
-              for (String v : elem.getValue()) {
-                str.append(" " + v);
-              }
-              c.output(str.toString());
-            }
-          }));
-    output.apply(TextIO.Write.to(resultPath));
-    p.run();
-  }
-}

[41/50] [abbrv] beam git commit: This closes #2576

Posted by dh...@apache.org.

This closes #2576


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/83193698
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/83193698
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/83193698

Branch: refs/heads/DSL_SQL
Commit: 83193698d8ea3dc9cb2a3ed8fe6b4ee6b810237c
Parents: 8a00f22 cdd2544
Author: Isma�l Mej�a <ie...@apache.org>
Authored: Wed Apr 19 15:07:54 2017 +0200
Committer: Isma�l Mej�a <ie...@apache.org>
Committed: Wed Apr 19 15:07:54 2017 +0200

----------------------------------------------------------------------
 ...PostCommit_Java_ValidatesRunner_Flink.groovy |    2 +-
 runners/flink/examples/pom.xml                  |  130 ---
 .../beam/runners/flink/examples/TFIDF.java      |  455 --------
 .../beam/runners/flink/examples/WordCount.java  |  129 ---
 .../runners/flink/examples/package-info.java    |   22 -
 .../flink/examples/streaming/AutoComplete.java  |  400 -------
 .../flink/examples/streaming/JoinExamples.java  |  154 ---
 .../examples/streaming/WindowedWordCount.java   |  141 ---
 .../flink/examples/streaming/package-info.java  |   22 -
 runners/flink/pom.xml                           |  275 ++++-
 runners/flink/runner/pom.xml                    |  330 ------
 .../flink/DefaultParallelismFactory.java        |   39 -
 .../flink/FlinkBatchPipelineTranslator.java     |  139 ---
 .../flink/FlinkBatchTransformTranslators.java   |  723 ------------
 .../flink/FlinkBatchTranslationContext.java     |  153 ---
 .../flink/FlinkDetachedRunnerResult.java        |   75 --
 .../FlinkPipelineExecutionEnvironment.java      |  241 ----
 .../runners/flink/FlinkPipelineOptions.java     |  101 --
 .../runners/flink/FlinkPipelineTranslator.java  |   53 -
 .../apache/beam/runners/flink/FlinkRunner.java  |  232 ----
 .../runners/flink/FlinkRunnerRegistrar.java     |   62 --
 .../beam/runners/flink/FlinkRunnerResult.java   |   98 --
 .../flink/FlinkStreamingPipelineTranslator.java |  276 -----
 .../FlinkStreamingTransformTranslators.java     | 1044 -----------------
 .../flink/FlinkStreamingTranslationContext.java |  130 ---
 .../flink/FlinkStreamingViewOverrides.java      |  372 -------
 .../flink/PipelineTranslationOptimizer.java     |   72 --
 .../beam/runners/flink/TestFlinkRunner.java     |   84 --
 .../beam/runners/flink/TranslationMode.java     |   31 -
 .../apache/beam/runners/flink/package-info.java |   22 -
 .../functions/FlinkAggregatorFactory.java       |   53 -
 .../functions/FlinkAssignContext.java           |   63 --
 .../functions/FlinkAssignWindows.java           |   49 -
 .../functions/FlinkDoFnFunction.java            |  161 ---
 .../FlinkMergingNonShuffleReduceFunction.java   |  228 ----
 .../FlinkMergingPartialReduceFunction.java      |  201 ----
 .../functions/FlinkMergingReduceFunction.java   |  199 ----
 .../FlinkMultiOutputPruningFunction.java        |   50 -
 .../functions/FlinkNoOpStepContext.java         |   73 --
 .../functions/FlinkPartialReduceFunction.java   |  172 ---
 .../functions/FlinkReduceFunction.java          |  173 ---
 .../functions/FlinkSideInputReader.java         |   80 --
 .../functions/FlinkStatefulDoFnFunction.java    |  198 ----
 .../functions/SideInputInitializer.java         |   73 --
 .../translation/functions/package-info.java     |   22 -
 .../runners/flink/translation/package-info.java |   22 -
 .../translation/types/CoderTypeInformation.java |  120 --
 .../translation/types/CoderTypeSerializer.java  |  132 ---
 .../types/EncodedValueComparator.java           |  195 ----
 .../types/EncodedValueSerializer.java           |  113 --
 .../types/EncodedValueTypeInformation.java      |   98 --
 .../types/InspectableByteArrayOutputStream.java |   34 -
 .../flink/translation/types/KvKeySelector.java  |   50 -
 .../flink/translation/types/package-info.java   |   22 -
 .../utils/SerializedPipelineOptions.java        |   67 --
 .../flink/translation/utils/package-info.java   |   22 -
 .../wrappers/DataInputViewWrapper.java          |   58 -
 .../wrappers/DataOutputViewWrapper.java         |   51 -
 .../SerializableFnAggregatorWrapper.java        |   98 --
 .../translation/wrappers/SourceInputFormat.java |  150 ---
 .../translation/wrappers/SourceInputSplit.java  |   52 -
 .../translation/wrappers/package-info.java      |   22 -
 .../wrappers/streaming/DoFnOperator.java        |  774 -------------
 .../streaming/KvToByteBufferKeySelector.java    |   56 -
 .../streaming/SingletonKeyedWorkItem.java       |   56 -
 .../streaming/SingletonKeyedWorkItemCoder.java  |  126 ---
 .../streaming/SplittableDoFnOperator.java       |  150 ---
 .../wrappers/streaming/WindowDoFnOperator.java  |  117 --
 .../wrappers/streaming/WorkItemKeySelector.java |   56 -
 .../streaming/io/BoundedSourceWrapper.java      |  218 ----
 .../streaming/io/UnboundedSocketSource.java     |  249 -----
 .../streaming/io/UnboundedSourceWrapper.java    |  476 --------
 .../wrappers/streaming/io/package-info.java     |   22 -
 .../wrappers/streaming/package-info.java        |   22 -
 .../state/FlinkBroadcastStateInternals.java     |  865 --------------
 .../state/FlinkKeyGroupStateInternals.java      |  487 --------
 .../state/FlinkSplitStateInternals.java         |  260 -----
 .../streaming/state/FlinkStateInternals.java    | 1053 ------------------
 .../state/KeyGroupCheckpointedOperator.java     |   35 -
 .../state/KeyGroupRestoringOperator.java        |   32 -
 .../wrappers/streaming/state/package-info.java  |   22 -
 .../runner/src/main/resources/log4j.properties  |   23 -
 .../flink/EncodedValueComparatorTest.java       |   70 --
 .../runners/flink/FlinkRunnerRegistrarTest.java |   48 -
 .../beam/runners/flink/FlinkTestPipeline.java   |   72 --
 .../beam/runners/flink/PipelineOptionsTest.java |  184 ---
 .../beam/runners/flink/ReadSourceITCase.java    |   85 --
 .../flink/ReadSourceStreamingITCase.java        |   74 --
 .../beam/runners/flink/WriteSinkITCase.java     |  192 ----
 .../flink/streaming/DoFnOperatorTest.java       |  600 ----------
 .../FlinkBroadcastStateInternalsTest.java       |  245 ----
 .../FlinkKeyGroupStateInternalsTest.java        |  262 -----
 .../streaming/FlinkSplitStateInternalsTest.java |  101 --
 .../streaming/FlinkStateInternalsTest.java      |  395 -------
 .../flink/streaming/GroupByNullKeyTest.java     |  124 ---
 .../flink/streaming/TestCountingSource.java     |  254 -----
 .../streaming/TopWikipediaSessionsITCase.java   |  133 ---
 .../streaming/UnboundedSourceWrapperTest.java   |  464 --------
 .../runners/flink/streaming/package-info.java   |   22 -
 .../src/test/resources/log4j-test.properties    |   27 -
 .../flink/DefaultParallelismFactory.java        |   39 +
 .../flink/FlinkBatchPipelineTranslator.java     |  139 +++
 .../flink/FlinkBatchTransformTranslators.java   |  723 ++++++++++++
 .../flink/FlinkBatchTranslationContext.java     |  153 +++
 .../flink/FlinkDetachedRunnerResult.java        |   75 ++
 .../FlinkPipelineExecutionEnvironment.java      |  241 ++++
 .../runners/flink/FlinkPipelineOptions.java     |  101 ++
 .../runners/flink/FlinkPipelineTranslator.java  |   53 +
 .../apache/beam/runners/flink/FlinkRunner.java  |  232 ++++
 .../runners/flink/FlinkRunnerRegistrar.java     |   62 ++
 .../beam/runners/flink/FlinkRunnerResult.java   |   98 ++
 .../flink/FlinkStreamingPipelineTranslator.java |  276 +++++
 .../FlinkStreamingTransformTranslators.java     | 1044 +++++++++++++++++
 .../flink/FlinkStreamingTranslationContext.java |  130 +++
 .../flink/FlinkStreamingViewOverrides.java      |  372 +++++++
 .../flink/PipelineTranslationOptimizer.java     |   72 ++
 .../beam/runners/flink/TestFlinkRunner.java     |   84 ++
 .../beam/runners/flink/TranslationMode.java     |   31 +
 .../apache/beam/runners/flink/package-info.java |   22 +
 .../functions/FlinkAggregatorFactory.java       |   53 +
 .../functions/FlinkAssignContext.java           |   63 ++
 .../functions/FlinkAssignWindows.java           |   49 +
 .../functions/FlinkDoFnFunction.java            |  161 +++
 .../FlinkMergingNonShuffleReduceFunction.java   |  228 ++++
 .../FlinkMergingPartialReduceFunction.java      |  201 ++++
 .../functions/FlinkMergingReduceFunction.java   |  199 ++++
 .../FlinkMultiOutputPruningFunction.java        |   50 +
 .../functions/FlinkNoOpStepContext.java         |   73 ++
 .../functions/FlinkPartialReduceFunction.java   |  172 +++
 .../functions/FlinkReduceFunction.java          |  173 +++
 .../functions/FlinkSideInputReader.java         |   80 ++
 .../functions/FlinkStatefulDoFnFunction.java    |  198 ++++
 .../functions/SideInputInitializer.java         |   73 ++
 .../translation/functions/package-info.java     |   22 +
 .../runners/flink/translation/package-info.java |   22 +
 .../translation/types/CoderTypeInformation.java |  120 ++
 .../translation/types/CoderTypeSerializer.java  |  132 +++
 .../types/EncodedValueComparator.java           |  195 ++++
 .../types/EncodedValueSerializer.java           |  113 ++
 .../types/EncodedValueTypeInformation.java      |   98 ++
 .../types/InspectableByteArrayOutputStream.java |   34 +
 .../flink/translation/types/KvKeySelector.java  |   50 +
 .../flink/translation/types/package-info.java   |   22 +
 .../utils/SerializedPipelineOptions.java        |   67 ++
 .../flink/translation/utils/package-info.java   |   22 +
 .../wrappers/DataInputViewWrapper.java          |   58 +
 .../wrappers/DataOutputViewWrapper.java         |   51 +
 .../SerializableFnAggregatorWrapper.java        |   98 ++
 .../translation/wrappers/SourceInputFormat.java |  150 +++
 .../translation/wrappers/SourceInputSplit.java  |   52 +
 .../translation/wrappers/package-info.java      |   22 +
 .../wrappers/streaming/DoFnOperator.java        |  774 +++++++++++++
 .../streaming/KvToByteBufferKeySelector.java    |   56 +
 .../streaming/SingletonKeyedWorkItem.java       |   56 +
 .../streaming/SingletonKeyedWorkItemCoder.java  |  126 +++
 .../streaming/SplittableDoFnOperator.java       |  150 +++
 .../wrappers/streaming/WindowDoFnOperator.java  |  117 ++
 .../wrappers/streaming/WorkItemKeySelector.java |   56 +
 .../streaming/io/BoundedSourceWrapper.java      |  218 ++++
 .../streaming/io/UnboundedSocketSource.java     |  249 +++++
 .../streaming/io/UnboundedSourceWrapper.java    |  476 ++++++++
 .../wrappers/streaming/io/package-info.java     |   22 +
 .../wrappers/streaming/package-info.java        |   22 +
 .../state/FlinkBroadcastStateInternals.java     |  865 ++++++++++++++
 .../state/FlinkKeyGroupStateInternals.java      |  487 ++++++++
 .../state/FlinkSplitStateInternals.java         |  260 +++++
 .../streaming/state/FlinkStateInternals.java    | 1053 ++++++++++++++++++
 .../state/KeyGroupCheckpointedOperator.java     |   35 +
 .../state/KeyGroupRestoringOperator.java        |   32 +
 .../wrappers/streaming/state/package-info.java  |   22 +
 .../flink/src/main/resources/log4j.properties   |   23 +
 .../flink/EncodedValueComparatorTest.java       |   70 ++
 .../runners/flink/FlinkRunnerRegistrarTest.java |   48 +
 .../beam/runners/flink/FlinkTestPipeline.java   |   72 ++
 .../beam/runners/flink/PipelineOptionsTest.java |  184 +++
 .../beam/runners/flink/ReadSourceITCase.java    |   85 ++
 .../flink/ReadSourceStreamingITCase.java        |   74 ++
 .../beam/runners/flink/WriteSinkITCase.java     |  192 ++++
 .../flink/streaming/DoFnOperatorTest.java       |  600 ++++++++++
 .../FlinkBroadcastStateInternalsTest.java       |  245 ++++
 .../FlinkKeyGroupStateInternalsTest.java        |  262 +++++
 .../streaming/FlinkSplitStateInternalsTest.java |  101 ++
 .../streaming/FlinkStateInternalsTest.java      |  395 +++++++
 .../flink/streaming/GroupByNullKeyTest.java     |  124 +++
 .../flink/streaming/TestCountingSource.java     |  254 +++++
 .../streaming/TopWikipediaSessionsITCase.java   |  133 +++
 .../streaming/UnboundedSourceWrapperTest.java   |  464 ++++++++
 .../runners/flink/streaming/package-info.java   |   22 +
 .../src/test/resources/log4j-test.properties    |   27 +
 189 files changed, 15765 insertions(+), 17293 deletions(-)
----------------------------------------------------------------------

[42/50] [abbrv] beam git commit: [BEAM-1441] Remove deprecated ChannelFactory

Posted by dh...@apache.org.

[BEAM-1441] Remove deprecated ChannelFactory


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/97c66784
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/97c66784
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/97c66784

Branch: refs/heads/DSL_SQL
Commit: 97c667846b566c312ceaadc66fb14fde1dfa7ebe
Parents: 8319369
Author: Sourabh Bajaj <so...@google.com>
Authored: Fri Apr 14 14:45:16 2017 -0700
Committer: chamikara@google.com <ch...@google.com>
Committed: Wed Apr 19 09:56:28 2017 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/io/fileio.py | 90 -------------------------------
 1 file changed, 90 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/97c66784/sdks/python/apache_beam/io/fileio.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/fileio.py b/sdks/python/apache_beam/io/fileio.py
index 8ee5198..f61289e 100644
--- a/sdks/python/apache_beam/io/fileio.py
+++ b/sdks/python/apache_beam/io/fileio.py
@@ -27,7 +27,6 @@ import time
 from apache_beam.internal import util
 from apache_beam.io import iobase
 from apache_beam.io.filesystem import BeamIOError
-from apache_beam.io.filesystem import CompressedFile as _CompressedFile
 from apache_beam.io.filesystem import CompressionTypes
 from apache_beam.io.filesystems_util import get_filesystem
 from apache_beam.transforms.display import DisplayDataItem
@@ -38,95 +37,6 @@ from apache_beam.utils.value_provider import check_accessible
 DEFAULT_SHARD_NAME_TEMPLATE = '-SSSSS-of-NNNNN'
 
 
-# TODO(sourabhbajaj): Remove this after BFS API is used everywhere
-class ChannelFactory(object):
-  @staticmethod
-  def mkdir(path):
-    bfs = get_filesystem(path)
-    return bfs.mkdirs(path)
-
-  @staticmethod
-  def open(path,
-           mode,
-           mime_type='application/octet-stream',
-           compression_type=CompressionTypes.AUTO):
-    bfs = get_filesystem(path)
-    if mode == 'rb':
-      return bfs.open(path, mime_type, compression_type)
-    elif mode == 'wb':
-      return bfs.create(path, mime_type, compression_type)
-
-  @staticmethod
-  def is_compressed(fileobj):
-    return isinstance(fileobj, _CompressedFile)
-
-  @staticmethod
-  def rename(src, dest):
-    bfs = get_filesystem(src)
-    return bfs.rename([src], [dest])
-
-  @staticmethod
-  def rename_batch(src_dest_pairs):
-    sources = [s for s, _ in src_dest_pairs]
-    destinations = [d for _, d in src_dest_pairs]
-    if not sources:
-      return []
-    bfs = get_filesystem(sources[0])
-    try:
-      bfs.rename(sources, destinations)
-      return []
-    except BeamIOError as exp:
-      return [(s, d, e) for (s, d), e in exp.exception_details.iteritems()]
-
-  @staticmethod
-  def copytree(src, dest):
-    bfs = get_filesystem(src)
-    return bfs.copy([src], [dest])
-
-  @staticmethod
-  def exists(path):
-    bfs = get_filesystem(path)
-    return bfs.exists(path)
-
-  @staticmethod
-  def rmdir(path):
-    bfs = get_filesystem(path)
-    return bfs.delete([path])
-
-  @staticmethod
-  def rm(path):
-    bfs = get_filesystem(path)
-    return bfs.delete([path])
-
-  @staticmethod
-  def glob(path, limit=None):
-    bfs = get_filesystem(path)
-    match_result = bfs.match([path], [limit])[0]
-    return [f.path for f in match_result.metadata_list]
-
-  @staticmethod
-  def size_in_bytes(path):
-    bfs = get_filesystem(path)
-    match_result = bfs.match([path])[0]
-    return [f.size_in_bytes for f in match_result.metadata_list][0]
-
-  @staticmethod
-  def size_of_files_in_glob(path, file_names=None):
-    bfs = get_filesystem(path)
-    match_result = bfs.match([path])[0]
-    part_files = {f.path:f.size_in_bytes for f in match_result.metadata_list}
-
-    if file_names is not None:
-      specific_files = {}
-      match_results = bfs.match(file_names)
-      for match_result in match_results:
-        for metadata in match_result.metadata_list:
-          specific_files[metadata.path] = metadata.size_in_bytes
-
-      part_files.update(specific_files)
-    return part_files
-
-
 class FileSink(iobase.Sink):
   """A sink to a GCS or local files.

[39/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/runner/pom.xml b/runners/flink/runner/pom.xml
deleted file mode 100644
index 18343ef..0000000
--- a/runners/flink/runner/pom.xml
+++ /dev/null
@@ -1,330 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    (the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-
-  <modelVersion>4.0.0</modelVersion>
-
-  <parent>
-    <groupId>org.apache.beam</groupId>
-    <artifactId>beam-runners-flink-parent</artifactId>
-    <version>0.7.0-SNAPSHOT</version>
-    <relativePath>../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>beam-runners-flink_2.10</artifactId>
-
-  <name>Apache Beam :: Runners :: Flink :: Core</name>
-
-  <packaging>jar</packaging>
-
-  <profiles>
-    <profile>
-      <id>local-validates-runner-tests</id>
-      <activation><activeByDefault>false</activeByDefault></activation>
-      <build>
-        <plugins>
-          <plugin>
-            <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-surefire-plugin</artifactId>
-            <executions>
-
-              <!-- This configures the inherited validates-runner-tests
-                   execution to execute with a local Flink instance. -->
-              <execution>
-                <id>validates-runner-tests</id>
-                <phase>integration-test</phase>
-                <goals>
-                  <goal>test</goal>
-                </goals>
-                <configuration>
-                  <groups>org.apache.beam.sdk.testing.ValidatesRunner</groups>
-                  <excludedGroups>
-                    org.apache.beam.sdk.testing.FlattenWithHeterogeneousCoders,
-                    org.apache.beam.sdk.testing.UsesSplittableParDo,
-                    org.apache.beam.sdk.testing.UsesAttemptedMetrics,
-                    org.apache.beam.sdk.testing.UsesCommittedMetrics,
-                    org.apache.beam.sdk.testing.UsesTestStream
-                  </excludedGroups>
-                  <parallel>none</parallel>
-                  <failIfNoTests>true</failIfNoTests>
-                  <dependenciesToScan>
-                    <dependency>org.apache.beam:beam-sdks-java-core</dependency>
-                  </dependenciesToScan>
-                  <systemPropertyVariables>
-                    <beamTestPipelineOptions>
-                      [
-                      "--runner=TestFlinkRunner",
-                      "--streaming=false"
-                      ]
-                    </beamTestPipelineOptions>
-                  </systemPropertyVariables>
-                </configuration>
-              </execution>
-
-              <!-- This second execution runs the tests in streaming mode -->
-              <execution>
-                <id>streaming-validates-runner-tests</id>
-                <phase>integration-test</phase>
-                <goals>
-                  <goal>test</goal>
-                </goals>
-                <configuration>
-                  <groups>org.apache.beam.sdk.testing.ValidatesRunner</groups>
-                  <excludedGroups>
-                    org.apache.beam.sdk.testing.FlattenWithHeterogeneousCoders,
-                    org.apache.beam.sdk.testing.UsesSetState,
-                    org.apache.beam.sdk.testing.UsesMapState,
-                    org.apache.beam.sdk.testing.UsesAttemptedMetrics,
-                    org.apache.beam.sdk.testing.UsesCommittedMetrics,
-                    org.apache.beam.sdk.testing.UsesTestStream,
-                    org.apache.beam.sdk.testing.UsesSplittableParDoWithWindowedSideInputs
-                  </excludedGroups>
-                  <parallel>none</parallel>
-                  <failIfNoTests>true</failIfNoTests>
-                  <dependenciesToScan>
-                    <dependency>org.apache.beam:beam-sdks-java-core</dependency>
-                  </dependenciesToScan>
-                  <systemPropertyVariables>
-                    <beamTestPipelineOptions>
-                      [
-                        "--runner=TestFlinkRunner",
-                        "--streaming=true"
-                      ]
-                    </beamTestPipelineOptions>
-                  </systemPropertyVariables>
-                </configuration>
-              </execution>
-            </executions>
-          </plugin>
-        </plugins>
-      </build>
-    </profile>
-  </profiles>
-
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-compiler-plugin</artifactId>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-jar-plugin</artifactId>
-      </plugin>
-
-      <!-- Integration Tests -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-failsafe-plugin</artifactId>
-      </plugin>
-
-      <!-- Unit Tests -->
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-      </plugin>
-    </plugins>
-  </build>
-
-  <dependencies>
-    <!-- Flink dependencies -->
-    <dependency>
-      <groupId>org.apache.flink</groupId>
-      <artifactId>flink-java</artifactId>
-      <version>${flink.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.flink</groupId>
-      <artifactId>flink-clients_2.10</artifactId>
-      <version>${flink.version}</version>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.flink</groupId>
-      <artifactId>flink-runtime_2.10</artifactId>
-      <version>${flink.version}</version>
-    </dependency>
-
-    <!-- For testing -->
-    <dependency>
-      <groupId>org.apache.flink</groupId>
-      <artifactId>flink-core</artifactId>
-      <version>${flink.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.flink</groupId>
-      <artifactId>flink-runtime_2.10</artifactId>
-      <version>${flink.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-
-    <!-- Beam -->
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>beam-sdks-java-core</artifactId>
-      <exclusions>
-        <exclusion>
-          <groupId>org.slf4j</groupId>
-          <artifactId>slf4j-jdk14</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>beam-runners-core-java</artifactId>
-      <exclusions>
-        <exclusion>
-          <groupId>org.slf4j</groupId>
-          <artifactId>slf4j-jdk14</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>beam-runners-core-construction-java</artifactId>
-      <exclusions>
-        <exclusion>
-          <groupId>org.slf4j</groupId>
-          <artifactId>slf4j-jdk14</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-annotations</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-databind</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-    </dependency>
-
-    <dependency>
-      <groupId>com.google.code.findbugs</groupId>
-      <artifactId>jsr305</artifactId>
-    </dependency>
-
-    <!--
-    Force an upgrade on the version of Apache Commons from Flink to support DEFLATE compression.
-    -->
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-compress</artifactId>
-      <scope>runtime</scope>
-    </dependency>
-
-    <!-- Test scoped -->
-    <dependency>
-      <groupId>com.google.apis</groupId>
-      <artifactId>google-api-services-bigquery</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-lang3</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.hamcrest</groupId>
-      <artifactId>hamcrest-all</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>junit</groupId>
-      <artifactId>junit</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-all</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <!-- Depend on test jar to scan for ValidatesRunner tests -->
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>beam-sdks-java-core</artifactId>
-      <classifier>tests</classifier>
-      <scope>test</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>org.slf4j</groupId>
-          <artifactId>slf4j-jdk14</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.flink</groupId>
-      <artifactId>flink-streaming-java_2.10</artifactId>
-      <version>${flink.version}</version>
-      <scope>test</scope>
-      <type>test-jar</type>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.flink</groupId>
-      <artifactId>flink-test-utils_2.10</artifactId>
-      <version>${flink.version}</version>
-      <scope>test</scope>
-        <exclusions>
-            <exclusion>
-                <artifactId>apacheds-jdbm1</artifactId>
-                <groupId>org.apache.directory.jdbm</groupId>
-            </exclusion>
-        </exclusions>
-    </dependency>
-
-    <!-- Optional Pipeline Registration -->
-    <dependency>
-      <groupId>com.google.auto.service</groupId>
-      <artifactId>auto-service</artifactId>
-      <optional>true</optional>
-    </dependency>
-
-    <!-- transitive test dependencies from beam-sdk-java-core -->
-    <dependency>
-      <groupId>com.fasterxml.jackson.dataformat</groupId>
-      <artifactId>jackson-dataformat-yaml</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.beam</groupId>
-      <artifactId>beam-sdks-common-fn-api</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-</project>

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/DefaultParallelismFactory.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/DefaultParallelismFactory.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/DefaultParallelismFactory.java
deleted file mode 100644
index b745f0b..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/DefaultParallelismFactory.java
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.options.DefaultValueFactory;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.flink.configuration.ConfigConstants;
-import org.apache.flink.configuration.GlobalConfiguration;
-
-/**
- * {@link DefaultValueFactory} for getting a default value for the parallelism option
- * on {@link FlinkPipelineOptions}.
- *
- * <p>This will return either the default value from {@link GlobalConfiguration} or {@code 1}.
- * A valid {@link GlobalConfiguration} is only available if the program is executed by the Flink
- * run scripts.
- */
-public class DefaultParallelismFactory implements DefaultValueFactory<Integer> {
-  @Override
-  public Integer create(PipelineOptions options) {
-    return GlobalConfiguration.loadConfiguration()
-        .getInteger(ConfigConstants.DEFAULT_PARALLELISM_KEY, 1);
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchPipelineTranslator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchPipelineTranslator.java
deleted file mode 100644
index 854b674..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchPipelineTranslator.java
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.runners.TransformHierarchy;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.flink.api.java.DataSet;
-import org.apache.flink.api.java.ExecutionEnvironment;
-import org.apache.flink.api.java.io.DiscardingOutputFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * {@link Pipeline.PipelineVisitor} for executing a {@link Pipeline} as a
- * Flink batch job.
- */
-class FlinkBatchPipelineTranslator extends FlinkPipelineTranslator {
-
-  private static final Logger LOG = LoggerFactory.getLogger(FlinkBatchPipelineTranslator.class);
-
-  /**
-   * The necessary context in the case of a batch job.
-   */
-  private final FlinkBatchTranslationContext batchContext;
-
-  private int depth = 0;
-
-  public FlinkBatchPipelineTranslator(ExecutionEnvironment env, PipelineOptions options) {
-    this.batchContext = new FlinkBatchTranslationContext(env, options);
-  }
-
-  @Override
-  @SuppressWarnings("rawtypes, unchecked")
-  public void translate(Pipeline pipeline) {
-    super.translate(pipeline);
-
-    // terminate dangling DataSets
-    for (DataSet<?> dataSet: batchContext.getDanglingDataSets().values()) {
-      dataSet.output(new DiscardingOutputFormat());
-    }
-  }
-
-  // --------------------------------------------------------------------------------------------
-  //  Pipeline Visitor Methods
-  // --------------------------------------------------------------------------------------------
-
-  @Override
-  public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) {
-    LOG.info("{} enterCompositeTransform- {}", genSpaces(this.depth), node.getFullName());
-    this.depth++;
-
-    BatchTransformTranslator<?> translator = getTranslator(node);
-
-    if (translator != null) {
-      applyBatchTransform(node.getTransform(), node, translator);
-      LOG.info("{} translated- {}", genSpaces(this.depth), node.getFullName());
-      return CompositeBehavior.DO_NOT_ENTER_TRANSFORM;
-    } else {
-      return CompositeBehavior.ENTER_TRANSFORM;
-    }
-  }
-
-  @Override
-  public void leaveCompositeTransform(TransformHierarchy.Node node) {
-    this.depth--;
-    LOG.info("{} leaveCompositeTransform- {}", genSpaces(this.depth), node.getFullName());
-  }
-
-  @Override
-  public void visitPrimitiveTransform(TransformHierarchy.Node node) {
-    LOG.info("{} visitPrimitiveTransform- {}", genSpaces(this.depth), node.getFullName());
-
-    // get the transformation corresponding to the node we are
-    // currently visiting and translate it into its Flink alternative.
-    PTransform<?, ?> transform = node.getTransform();
-    BatchTransformTranslator<?> translator =
-        FlinkBatchTransformTranslators.getTranslator(transform);
-    if (translator == null) {
-      LOG.info(node.getTransform().getClass().toString());
-      throw new UnsupportedOperationException("The transform " + transform
-          + " is currently not supported.");
-    }
-    applyBatchTransform(transform, node, translator);
-  }
-
-  private <T extends PTransform<?, ?>> void applyBatchTransform(
-      PTransform<?, ?> transform,
-      TransformHierarchy.Node node,
-      BatchTransformTranslator<?> translator) {
-
-    @SuppressWarnings("unchecked")
-    T typedTransform = (T) transform;
-
-    @SuppressWarnings("unchecked")
-    BatchTransformTranslator<T> typedTranslator = (BatchTransformTranslator<T>) translator;
-
-    // create the applied PTransform on the batchContext
-    batchContext.setCurrentTransform(node.toAppliedPTransform());
-    typedTranslator.translateNode(typedTransform, batchContext);
-  }
-
-  /**
-   * A translator of a {@link PTransform}.
-   */
-  public interface BatchTransformTranslator<TransformT extends PTransform> {
-    void translateNode(TransformT transform, FlinkBatchTranslationContext context);
-  }
-
-  /**
-   * Returns a translator for the given node, if it is possible, otherwise null.
-   */
-  private static BatchTransformTranslator<?> getTranslator(TransformHierarchy.Node node) {
-    PTransform<?, ?> transform = node.getTransform();
-
-    // Root of the graph is null
-    if (transform == null) {
-      return null;
-    }
-
-    return FlinkBatchTransformTranslators.getTranslator(transform);
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
deleted file mode 100644
index ff9521c..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchTransformTranslators.java
+++ /dev/null
@@ -1,723 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import static com.google.common.base.Preconditions.checkArgument;
-import static com.google.common.base.Preconditions.checkState;
-
-import com.google.common.collect.Lists;
-import com.google.common.collect.Maps;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import org.apache.beam.runners.flink.translation.functions.FlinkAssignWindows;
-import org.apache.beam.runners.flink.translation.functions.FlinkDoFnFunction;
-import org.apache.beam.runners.flink.translation.functions.FlinkMergingNonShuffleReduceFunction;
-import org.apache.beam.runners.flink.translation.functions.FlinkMergingPartialReduceFunction;
-import org.apache.beam.runners.flink.translation.functions.FlinkMergingReduceFunction;
-import org.apache.beam.runners.flink.translation.functions.FlinkMultiOutputPruningFunction;
-import org.apache.beam.runners.flink.translation.functions.FlinkPartialReduceFunction;
-import org.apache.beam.runners.flink.translation.functions.FlinkReduceFunction;
-import org.apache.beam.runners.flink.translation.functions.FlinkStatefulDoFnFunction;
-import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.runners.flink.translation.types.KvKeySelector;
-import org.apache.beam.runners.flink.translation.wrappers.SourceInputFormat;
-import org.apache.beam.sdk.coders.CannotProvideCoderException;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.CoderRegistry;
-import org.apache.beam.sdk.coders.KvCoder;
-import org.apache.beam.sdk.coders.ListCoder;
-import org.apache.beam.sdk.coders.VoidCoder;
-import org.apache.beam.sdk.io.BoundedSource;
-import org.apache.beam.sdk.io.Read;
-import org.apache.beam.sdk.transforms.Combine;
-import org.apache.beam.sdk.transforms.CombineFnBase;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.Flatten;
-import org.apache.beam.sdk.transforms.GroupByKey;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.ParDo;
-import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.transforms.join.RawUnionValue;
-import org.apache.beam.sdk.transforms.join.UnionCoder;
-import org.apache.beam.sdk.transforms.reflect.DoFnSignature;
-import org.apache.beam.sdk.transforms.reflect.DoFnSignatures;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
-import org.apache.beam.sdk.transforms.windowing.Window;
-import org.apache.beam.sdk.transforms.windowing.WindowFn;
-import org.apache.beam.sdk.util.Reshuffle;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.KV;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.PValue;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.api.common.functions.FilterFunction;
-import org.apache.flink.api.common.functions.FlatMapFunction;
-import org.apache.flink.api.common.typeinfo.TypeInformation;
-import org.apache.flink.api.java.DataSet;
-import org.apache.flink.api.java.operators.DataSource;
-import org.apache.flink.api.java.operators.FlatMapOperator;
-import org.apache.flink.api.java.operators.GroupCombineOperator;
-import org.apache.flink.api.java.operators.GroupReduceOperator;
-import org.apache.flink.api.java.operators.Grouping;
-import org.apache.flink.api.java.operators.MapPartitionOperator;
-import org.apache.flink.api.java.operators.SingleInputUdfOperator;
-import org.apache.flink.util.Collector;
-
-/**
- * Translators for transforming {@link PTransform PTransforms} to
- * Flink {@link DataSet DataSets}.
- */
-class FlinkBatchTransformTranslators {
-
-  // --------------------------------------------------------------------------------------------
-  //  Transform Translator Registry
-  // --------------------------------------------------------------------------------------------
-
-  @SuppressWarnings("rawtypes")
-  private static final Map<
-      Class<? extends PTransform>,
-      FlinkBatchPipelineTranslator.BatchTransformTranslator> TRANSLATORS = new HashMap<>();
-
-  static {
-    TRANSLATORS.put(View.CreatePCollectionView.class, new CreatePCollectionViewTranslatorBatch());
-
-    TRANSLATORS.put(Combine.PerKey.class, new CombinePerKeyTranslatorBatch());
-    TRANSLATORS.put(GroupByKey.class, new GroupByKeyTranslatorBatch());
-    TRANSLATORS.put(Reshuffle.class, new ReshuffleTranslatorBatch());
-
-    TRANSLATORS.put(Flatten.PCollections.class, new FlattenPCollectionTranslatorBatch());
-
-    TRANSLATORS.put(Window.Assign.class, new WindowAssignTranslatorBatch());
-
-    TRANSLATORS.put(ParDo.MultiOutput.class, new ParDoTranslatorBatch());
-
-    TRANSLATORS.put(Read.Bounded.class, new ReadSourceTranslatorBatch());
-  }
-
-
-  static FlinkBatchPipelineTranslator.BatchTransformTranslator<?> getTranslator(
-      PTransform<?, ?> transform) {
-    return TRANSLATORS.get(transform.getClass());
-  }
-
-  private static class ReadSourceTranslatorBatch<T>
-      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Read.Bounded<T>> {
-
-    @Override
-    public void translateNode(Read.Bounded<T> transform, FlinkBatchTranslationContext context) {
-      String name = transform.getName();
-      BoundedSource<T> source = transform.getSource();
-      PCollection<T> output = context.getOutput(transform);
-
-      TypeInformation<WindowedValue<T>> typeInformation = context.getTypeInfo(output);
-
-      DataSource<WindowedValue<T>> dataSource = new DataSource<>(
-          context.getExecutionEnvironment(),
-          new SourceInputFormat<>(source, context.getPipelineOptions()),
-          typeInformation,
-          name);
-
-      context.setOutputDataSet(output, dataSource);
-    }
-  }
-
-  private static class WindowAssignTranslatorBatch<T>
-      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Window.Assign<T>> {
-
-    @Override
-    public void translateNode(Window.Assign<T> transform, FlinkBatchTranslationContext context) {
-      PValue input = context.getInput(transform);
-
-      TypeInformation<WindowedValue<T>> resultTypeInfo =
-          context.getTypeInfo(context.getOutput(transform));
-
-      DataSet<WindowedValue<T>> inputDataSet = context.getInputDataSet(input);
-
-      @SuppressWarnings("unchecked")
-      final WindowingStrategy<T, ? extends BoundedWindow> windowingStrategy =
-          (WindowingStrategy<T, ? extends BoundedWindow>)
-              context.getOutput(transform).getWindowingStrategy();
-
-      WindowFn<T, ? extends BoundedWindow> windowFn = windowingStrategy.getWindowFn();
-
-      FlinkAssignWindows<T, ? extends BoundedWindow> assignWindowsFunction =
-          new FlinkAssignWindows<>(windowFn);
-
-      DataSet<WindowedValue<T>> resultDataSet = inputDataSet
-          .flatMap(assignWindowsFunction)
-          .name(context.getOutput(transform).getName())
-          .returns(resultTypeInfo);
-
-      context.setOutputDataSet(context.getOutput(transform), resultDataSet);
-    }
-  }
-
-  private static class GroupByKeyTranslatorBatch<K, InputT>
-      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<GroupByKey<K, InputT>> {
-
-    @Override
-    public void translateNode(
-        GroupByKey<K, InputT> transform,
-        FlinkBatchTranslationContext context) {
-
-      // for now, this is copied from the Combine.PerKey translater. Once we have the new runner API
-      // we can replace GroupByKey by a Combine.PerKey with the Concatenate CombineFn
-
-      DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
-          context.getInputDataSet(context.getInput(transform));
-
-      Combine.KeyedCombineFn<K, InputT, List<InputT>, List<InputT>> combineFn =
-          new Concatenate<InputT>().asKeyedFn();
-
-      KvCoder<K, InputT> inputCoder =
-          (KvCoder<K, InputT>) context.getInput(transform).getCoder();
-
-      Coder<List<InputT>> accumulatorCoder;
-
-      try {
-        accumulatorCoder =
-            combineFn.getAccumulatorCoder(
-                context.getInput(transform).getPipeline().getCoderRegistry(),
-                inputCoder.getKeyCoder(),
-                inputCoder.getValueCoder());
-      } catch (CannotProvideCoderException e) {
-        throw new RuntimeException(e);
-      }
-
-      WindowingStrategy<?, ?> windowingStrategy =
-          context.getInput(transform).getWindowingStrategy();
-
-      TypeInformation<WindowedValue<KV<K, List<InputT>>>> partialReduceTypeInfo =
-          new CoderTypeInformation<>(
-              WindowedValue.getFullCoder(
-                  KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
-                  windowingStrategy.getWindowFn().windowCoder()));
-
-
-      Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
-          inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder()));
-
-      FlinkPartialReduceFunction<K, InputT, List<InputT>, ?> partialReduceFunction;
-      FlinkReduceFunction<K, List<InputT>, List<InputT>, ?> reduceFunction;
-
-      if (windowingStrategy.getWindowFn().isNonMerging()) {
-        @SuppressWarnings("unchecked")
-        WindowingStrategy<?, BoundedWindow> boundedStrategy =
-            (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
-
-        partialReduceFunction = new FlinkPartialReduceFunction<>(
-            combineFn,
-            boundedStrategy,
-            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
-            context.getPipelineOptions());
-
-        reduceFunction = new FlinkReduceFunction<>(
-            combineFn,
-            boundedStrategy,
-            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
-            context.getPipelineOptions());
-
-      } else {
-        if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
-          throw new UnsupportedOperationException(
-              "Merging WindowFn with windows other than IntervalWindow are not supported.");
-        }
-
-        @SuppressWarnings("unchecked")
-        WindowingStrategy<?, IntervalWindow> intervalStrategy =
-            (WindowingStrategy<?, IntervalWindow>) windowingStrategy;
-
-        partialReduceFunction = new FlinkMergingPartialReduceFunction<>(
-            combineFn,
-            intervalStrategy,
-            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
-            context.getPipelineOptions());
-
-        reduceFunction = new FlinkMergingReduceFunction<>(
-            combineFn,
-            intervalStrategy,
-            Collections.<PCollectionView<?>, WindowingStrategy<?, ?>>emptyMap(),
-            context.getPipelineOptions());
-      }
-
-      // Partially GroupReduce the values into the intermediate format AccumT (combine)
-      GroupCombineOperator<
-          WindowedValue<KV<K, InputT>>,
-          WindowedValue<KV<K, List<InputT>>>> groupCombine =
-          new GroupCombineOperator<>(
-              inputGrouping,
-              partialReduceTypeInfo,
-              partialReduceFunction,
-              "GroupCombine: " + transform.getName());
-
-      Grouping<WindowedValue<KV<K, List<InputT>>>> intermediateGrouping =
-          groupCombine.groupBy(new KvKeySelector<List<InputT>, K>(inputCoder.getKeyCoder()));
-
-      // Fully reduce the values and create output format VO
-      GroupReduceOperator<
-          WindowedValue<KV<K, List<InputT>>>, WindowedValue<KV<K, List<InputT>>>> outputDataSet =
-          new GroupReduceOperator<>(
-              intermediateGrouping, partialReduceTypeInfo, reduceFunction, transform.getName());
-
-      context.setOutputDataSet(context.getOutput(transform), outputDataSet);
-
-    }
-
-  }
-
-  private static class ReshuffleTranslatorBatch<K, InputT>
-      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<Reshuffle<K, InputT>> {
-
-    @Override
-    public void translateNode(
-        Reshuffle<K, InputT> transform,
-        FlinkBatchTranslationContext context) {
-
-      DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
-          context.getInputDataSet(context.getInput(transform));
-
-      context.setOutputDataSet(context.getOutput(transform), inputDataSet.rebalance());
-
-    }
-
-  }
-
-  /**
-   * Combiner that combines {@code T}s into a single {@code List<T>} containing all inputs.
-   *
-   * <p>For internal use to translate {@link GroupByKey}. For a large {@link PCollection} this
-   * is expected to crash!
-   *
-   * <p>This is copied from the dataflow runner code.
-   *
-   * @param <T> the type of elements to concatenate.
-   */
-  private static class Concatenate<T> extends Combine.CombineFn<T, List<T>, List<T>> {
-    @Override
-    public List<T> createAccumulator() {
-      return new ArrayList<>();
-    }
-
-    @Override
-    public List<T> addInput(List<T> accumulator, T input) {
-      accumulator.add(input);
-      return accumulator;
-    }
-
-    @Override
-    public List<T> mergeAccumulators(Iterable<List<T>> accumulators) {
-      List<T> result = createAccumulator();
-      for (List<T> accumulator : accumulators) {
-        result.addAll(accumulator);
-      }
-      return result;
-    }
-
-    @Override
-    public List<T> extractOutput(List<T> accumulator) {
-      return accumulator;
-    }
-
-    @Override
-    public Coder<List<T>> getAccumulatorCoder(CoderRegistry registry, Coder<T> inputCoder) {
-      return ListCoder.of(inputCoder);
-    }
-
-    @Override
-    public Coder<List<T>> getDefaultOutputCoder(CoderRegistry registry, Coder<T> inputCoder) {
-      return ListCoder.of(inputCoder);
-    }
-  }
-
-
-  private static class CombinePerKeyTranslatorBatch<K, InputT, AccumT, OutputT>
-      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
-          Combine.PerKey<K, InputT, OutputT>> {
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public void translateNode(
-        Combine.PerKey<K, InputT, OutputT> transform,
-        FlinkBatchTranslationContext context) {
-      DataSet<WindowedValue<KV<K, InputT>>> inputDataSet =
-          context.getInputDataSet(context.getInput(transform));
-
-      CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT> combineFn =
-          (CombineFnBase.PerKeyCombineFn<K, InputT, AccumT, OutputT>) transform.getFn();
-
-      KvCoder<K, InputT> inputCoder =
-          (KvCoder<K, InputT>) context.getInput(transform).getCoder();
-
-      Coder<AccumT> accumulatorCoder;
-
-      try {
-        accumulatorCoder =
-            combineFn.getAccumulatorCoder(
-                context.getInput(transform).getPipeline().getCoderRegistry(),
-                inputCoder.getKeyCoder(),
-                inputCoder.getValueCoder());
-      } catch (CannotProvideCoderException e) {
-        throw new RuntimeException(e);
-      }
-
-      WindowingStrategy<?, ?> windowingStrategy =
-          context.getInput(transform).getWindowingStrategy();
-
-      TypeInformation<WindowedValue<KV<K, AccumT>>> partialReduceTypeInfo =
-          context.getTypeInfo(
-              KvCoder.of(inputCoder.getKeyCoder(), accumulatorCoder),
-              windowingStrategy);
-
-      Grouping<WindowedValue<KV<K, InputT>>> inputGrouping =
-          inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder()));
-
-      // construct a map from side input to WindowingStrategy so that
-      // the DoFn runner can map main-input windows to side input windows
-      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
-      for (PCollectionView<?> sideInput: transform.getSideInputs()) {
-        sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
-      }
-
-      if (windowingStrategy.getWindowFn().isNonMerging()) {
-        WindowingStrategy<?, BoundedWindow> boundedStrategy =
-            (WindowingStrategy<?, BoundedWindow>) windowingStrategy;
-
-        FlinkPartialReduceFunction<K, InputT, AccumT, ?> partialReduceFunction =
-            new FlinkPartialReduceFunction<>(
-                combineFn,
-                boundedStrategy,
-                sideInputStrategies,
-                context.getPipelineOptions());
-
-        FlinkReduceFunction<K, AccumT, OutputT, ?> reduceFunction =
-            new FlinkReduceFunction<>(
-                combineFn,
-                boundedStrategy,
-                sideInputStrategies,
-                context.getPipelineOptions());
-
-        // Partially GroupReduce the values into the intermediate format AccumT (combine)
-        GroupCombineOperator<
-            WindowedValue<KV<K, InputT>>,
-            WindowedValue<KV<K, AccumT>>> groupCombine =
-            new GroupCombineOperator<>(
-                inputGrouping,
-                partialReduceTypeInfo,
-                partialReduceFunction,
-                "GroupCombine: " + transform.getName());
-
-        transformSideInputs(transform.getSideInputs(), groupCombine, context);
-
-        TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo =
-            context.getTypeInfo(context.getOutput(transform));
-
-        Grouping<WindowedValue<KV<K, AccumT>>> intermediateGrouping =
-            groupCombine.groupBy(new KvKeySelector<AccumT, K>(inputCoder.getKeyCoder()));
-
-        // Fully reduce the values and create output format OutputT
-        GroupReduceOperator<
-            WindowedValue<KV<K, AccumT>>, WindowedValue<KV<K, OutputT>>> outputDataSet =
-            new GroupReduceOperator<>(
-                intermediateGrouping, reduceTypeInfo, reduceFunction, transform.getName());
-
-        transformSideInputs(transform.getSideInputs(), outputDataSet, context);
-
-        context.setOutputDataSet(context.getOutput(transform), outputDataSet);
-
-      } else {
-        if (!windowingStrategy.getWindowFn().windowCoder().equals(IntervalWindow.getCoder())) {
-          throw new UnsupportedOperationException(
-              "Merging WindowFn with windows other than IntervalWindow are not supported.");
-        }
-
-        // for merging windows we can't to a pre-shuffle combine step since
-        // elements would not be in their correct windows for side-input access
-
-        WindowingStrategy<?, IntervalWindow> intervalStrategy =
-            (WindowingStrategy<?, IntervalWindow>) windowingStrategy;
-
-        FlinkMergingNonShuffleReduceFunction<K, InputT, AccumT, OutputT, ?> reduceFunction =
-            new FlinkMergingNonShuffleReduceFunction<>(
-                combineFn,
-                intervalStrategy,
-                sideInputStrategies,
-                context.getPipelineOptions());
-
-        TypeInformation<WindowedValue<KV<K, OutputT>>> reduceTypeInfo =
-            context.getTypeInfo(context.getOutput(transform));
-
-        Grouping<WindowedValue<KV<K, InputT>>> grouping =
-            inputDataSet.groupBy(new KvKeySelector<InputT, K>(inputCoder.getKeyCoder()));
-
-        // Fully reduce the values and create output format OutputT
-        GroupReduceOperator<
-            WindowedValue<KV<K, InputT>>, WindowedValue<KV<K, OutputT>>> outputDataSet =
-            new GroupReduceOperator<>(
-                grouping, reduceTypeInfo, reduceFunction, transform.getName());
-
-        transformSideInputs(transform.getSideInputs(), outputDataSet, context);
-
-        context.setOutputDataSet(context.getOutput(transform), outputDataSet);
-      }
-
-
-    }
-  }
-
-  private static void rejectSplittable(DoFn<?, ?> doFn) {
-    DoFnSignature signature = DoFnSignatures.getSignature(doFn.getClass());
-    if (signature.processElement().isSplittable()) {
-      throw new UnsupportedOperationException(
-          String.format(
-              "%s does not currently support splittable DoFn: %s",
-              FlinkRunner.class.getSimpleName(), doFn));
-    }
-  }
-
-  private static class ParDoTranslatorBatch<InputT, OutputT>
-      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
-      ParDo.MultiOutput<InputT, OutputT>> {
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public void translateNode(
-        ParDo.MultiOutput<InputT, OutputT> transform,
-        FlinkBatchTranslationContext context) {
-      DoFn<InputT, OutputT> doFn = transform.getFn();
-      rejectSplittable(doFn);
-      DataSet<WindowedValue<InputT>> inputDataSet =
-          context.getInputDataSet(context.getInput(transform));
-
-      Map<TupleTag<?>, PValue> outputs = context.getOutputs(transform);
-
-      Map<TupleTag<?>, Integer> outputMap = Maps.newHashMap();
-      // put the main output at index 0, FlinkMultiOutputDoFnFunction  expects this
-      outputMap.put(transform.getMainOutputTag(), 0);
-      int count = 1;
-      for (TupleTag<?> tag : outputs.keySet()) {
-        if (!outputMap.containsKey(tag)) {
-          outputMap.put(tag, count++);
-        }
-      }
-
-      // assume that the windowing strategy is the same for all outputs
-      WindowingStrategy<?, ?> windowingStrategy = null;
-
-      // collect all output Coders and create a UnionCoder for our tagged outputs
-      List<Coder<?>> outputCoders = Lists.newArrayList();
-      for (PValue taggedValue : outputs.values()) {
-        checkState(
-            taggedValue instanceof PCollection,
-            "Within ParDo, got a non-PCollection output %s of type %s",
-            taggedValue,
-            taggedValue.getClass().getSimpleName());
-        PCollection<?> coll = (PCollection<?>) taggedValue;
-        outputCoders.add(coll.getCoder());
-        windowingStrategy = coll.getWindowingStrategy();
-      }
-
-      if (windowingStrategy == null) {
-        throw new IllegalStateException("No outputs defined.");
-      }
-
-      UnionCoder unionCoder = UnionCoder.of(outputCoders);
-
-      TypeInformation<WindowedValue<RawUnionValue>> typeInformation =
-          new CoderTypeInformation<>(
-              WindowedValue.getFullCoder(
-                  unionCoder,
-                  windowingStrategy.getWindowFn().windowCoder()));
-
-      List<PCollectionView<?>> sideInputs = transform.getSideInputs();
-
-      // construct a map from side input to WindowingStrategy so that
-      // the DoFn runner can map main-input windows to side input windows
-      Map<PCollectionView<?>, WindowingStrategy<?, ?>> sideInputStrategies = new HashMap<>();
-      for (PCollectionView<?> sideInput: sideInputs) {
-        sideInputStrategies.put(sideInput, sideInput.getWindowingStrategyInternal());
-      }
-
-      SingleInputUdfOperator<WindowedValue<InputT>, WindowedValue<RawUnionValue>, ?> outputDataSet;
-      DoFnSignature signature = DoFnSignatures.getSignature(transform.getFn().getClass());
-      if (signature.stateDeclarations().size() > 0
-          || signature.timerDeclarations().size() > 0) {
-
-        // Based on the fact that the signature is stateful, DoFnSignatures ensures
-        // that it is also keyed
-        KvCoder<?, InputT> inputCoder =
-            (KvCoder<?, InputT>) context.getInput(transform).getCoder();
-
-        FlinkStatefulDoFnFunction<?, ?, OutputT> doFnWrapper = new FlinkStatefulDoFnFunction<>(
-            (DoFn) doFn, windowingStrategy, sideInputStrategies, context.getPipelineOptions(),
-            outputMap, transform.getMainOutputTag()
-        );
-
-        Grouping<WindowedValue<InputT>> grouping =
-            inputDataSet.groupBy(new KvKeySelector(inputCoder.getKeyCoder()));
-
-        outputDataSet =
-            new GroupReduceOperator(grouping, typeInformation, doFnWrapper, transform.getName());
-
-      } else {
-        FlinkDoFnFunction<InputT, RawUnionValue> doFnWrapper =
-            new FlinkDoFnFunction(
-                doFn,
-                windowingStrategy,
-                sideInputStrategies,
-                context.getPipelineOptions(),
-                outputMap,
-                transform.getMainOutputTag());
-
-        outputDataSet = new MapPartitionOperator<>(
-            inputDataSet, typeInformation,
-            doFnWrapper, transform.getName());
-
-      }
-
-      transformSideInputs(sideInputs, outputDataSet, context);
-
-      for (Entry<TupleTag<?>, PValue> output : outputs.entrySet()) {
-        pruneOutput(
-            outputDataSet,
-            context,
-            outputMap.get(output.getKey()),
-            (PCollection) output.getValue());
-      }
-
-    }
-
-    private <T> void pruneOutput(
-        DataSet<WindowedValue<RawUnionValue>> taggedDataSet,
-        FlinkBatchTranslationContext context,
-        int integerTag,
-        PCollection<T> collection) {
-      TypeInformation<WindowedValue<T>> outputType = context.getTypeInfo(collection);
-
-      FlinkMultiOutputPruningFunction<T> pruningFunction =
-          new FlinkMultiOutputPruningFunction<>(integerTag);
-
-      FlatMapOperator<WindowedValue<RawUnionValue>, WindowedValue<T>> pruningOperator =
-          new FlatMapOperator<>(
-              taggedDataSet,
-              outputType,
-              pruningFunction,
-              collection.getName());
-
-      context.setOutputDataSet(collection, pruningOperator);
-    }
-  }
-
-  private static class FlattenPCollectionTranslatorBatch<T>
-      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
-      Flatten.PCollections<T>> {
-
-    @Override
-    @SuppressWarnings("unchecked")
-    public void translateNode(
-        Flatten.PCollections<T> transform,
-        FlinkBatchTranslationContext context) {
-
-      Map<TupleTag<?>, PValue> allInputs = context.getInputs(transform);
-      DataSet<WindowedValue<T>> result = null;
-
-      if (allInputs.isEmpty()) {
-
-        // create an empty dummy source to satisfy downstream operations
-        // we cannot create an empty source in Flink, therefore we have to
-        // add the flatMap that simply never forwards the single element
-        DataSource<String> dummySource =
-            context.getExecutionEnvironment().fromElements("dummy");
-        result = dummySource.flatMap(new FlatMapFunction<String, WindowedValue<T>>() {
-          @Override
-          public void flatMap(String s, Collector<WindowedValue<T>> collector) throws Exception {
-            // never return anything
-          }
-        }).returns(
-            new CoderTypeInformation<>(
-                WindowedValue.getFullCoder(
-                    (Coder<T>) VoidCoder.of(),
-                    GlobalWindow.Coder.INSTANCE)));
-      } else {
-        for (PValue taggedPc : allInputs.values()) {
-          checkArgument(
-              taggedPc instanceof PCollection,
-              "Got non-PCollection input to flatten: %s of type %s",
-              taggedPc,
-              taggedPc.getClass().getSimpleName());
-          PCollection<T> collection = (PCollection<T>) taggedPc;
-          DataSet<WindowedValue<T>> current = context.getInputDataSet(collection);
-          if (result == null) {
-            result = current;
-          } else {
-            result = result.union(current);
-          }
-        }
-      }
-
-      // insert a dummy filter, there seems to be a bug in Flink
-      // that produces duplicate elements after the union in some cases
-      // if we don't
-      result = result.filter(new FilterFunction<WindowedValue<T>>() {
-        @Override
-        public boolean filter(WindowedValue<T> tWindowedValue) throws Exception {
-          return true;
-        }
-      }).name("UnionFixFilter");
-      context.setOutputDataSet(context.getOutput(transform), result);
-    }
-  }
-
-  private static class CreatePCollectionViewTranslatorBatch<ElemT, ViewT>
-      implements FlinkBatchPipelineTranslator.BatchTransformTranslator<
-          View.CreatePCollectionView<ElemT, ViewT>> {
-
-    @Override
-    public void translateNode(
-        View.CreatePCollectionView<ElemT, ViewT> transform,
-        FlinkBatchTranslationContext context) {
-      DataSet<WindowedValue<ElemT>> inputDataSet =
-          context.getInputDataSet(context.getInput(transform));
-
-      PCollectionView<ViewT> input = transform.getView();
-
-      context.setSideInputDataSet(input, inputDataSet);
-    }
-  }
-
-  private static void transformSideInputs(
-      List<PCollectionView<?>> sideInputs,
-      SingleInputUdfOperator<?, ?, ?> outputDataSet,
-      FlinkBatchTranslationContext context) {
-    // get corresponding Flink broadcast DataSets
-    for (PCollectionView<?> input : sideInputs) {
-      DataSet<?> broadcastSet = context.getSideInputDataSet(input);
-      outputDataSet.withBroadcastSet(broadcastSet, input.getTagInternal().getId());
-    }
-  }
-
-  private FlinkBatchTransformTranslators() {}
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchTranslationContext.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchTranslationContext.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchTranslationContext.java
deleted file mode 100644
index 98dd0fb..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkBatchTranslationContext.java
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import com.google.common.collect.Iterables;
-import java.util.HashMap;
-import java.util.Map;
-import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.transforms.AppliedPTransform;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.util.WindowingStrategy;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.PValue;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.flink.api.common.typeinfo.TypeInformation;
-import org.apache.flink.api.java.DataSet;
-import org.apache.flink.api.java.ExecutionEnvironment;
-
-/**
- * Helper for {@link FlinkBatchPipelineTranslator} and translators in
- * {@link FlinkBatchTransformTranslators}.
- */
-class FlinkBatchTranslationContext {
-
-  private final Map<PValue, DataSet<?>> dataSets;
-  private final Map<PCollectionView<?>, DataSet<?>> broadcastDataSets;
-
-  /**
-   * For keeping track about which DataSets don't have a successor. We
-   * need to terminate these with a discarding sink because the Beam
-   * model allows dangling operations.
-   */
-  private final Map<PValue, DataSet<?>> danglingDataSets;
-
-  private final ExecutionEnvironment env;
-  private final PipelineOptions options;
-
-  private AppliedPTransform<?, ?, ?> currentTransform;
-
-  // ------------------------------------------------------------------------
-
-  public FlinkBatchTranslationContext(ExecutionEnvironment env, PipelineOptions options) {
-    this.env = env;
-    this.options = options;
-    this.dataSets = new HashMap<>();
-    this.broadcastDataSets = new HashMap<>();
-
-    this.danglingDataSets = new HashMap<>();
-  }
-
-  // ------------------------------------------------------------------------
-
-  public Map<PValue, DataSet<?>> getDanglingDataSets() {
-    return danglingDataSets;
-  }
-
-  public ExecutionEnvironment getExecutionEnvironment() {
-    return env;
-  }
-
-  public PipelineOptions getPipelineOptions() {
-    return options;
-  }
-
-  @SuppressWarnings("unchecked")
-  public <T> DataSet<WindowedValue<T>> getInputDataSet(PValue value) {
-    // assume that the DataSet is used as an input if retrieved here
-    danglingDataSets.remove(value);
-    return (DataSet<WindowedValue<T>>) dataSets.get(value);
-  }
-
-  public <T> void setOutputDataSet(PValue value, DataSet<WindowedValue<T>> set) {
-    if (!dataSets.containsKey(value)) {
-      dataSets.put(value, set);
-      danglingDataSets.put(value, set);
-    }
-  }
-
-  /**
-   * Sets the AppliedPTransform which carries input/output.
-   * @param currentTransform
-   */
-  public void setCurrentTransform(AppliedPTransform<?, ?, ?> currentTransform) {
-    this.currentTransform = currentTransform;
-  }
-
-  @SuppressWarnings("unchecked")
-  public <T> DataSet<T> getSideInputDataSet(PCollectionView<?> value) {
-    return (DataSet<T>) broadcastDataSets.get(value);
-  }
-
-  public <ViewT, ElemT> void setSideInputDataSet(
-      PCollectionView<ViewT> value,
-      DataSet<WindowedValue<ElemT>> set) {
-    if (!broadcastDataSets.containsKey(value)) {
-      broadcastDataSets.put(value, set);
-    }
-  }
-
-  @SuppressWarnings("unchecked")
-  public <T> TypeInformation<WindowedValue<T>> getTypeInfo(PCollection<T> collection) {
-    return getTypeInfo(collection.getCoder(), collection.getWindowingStrategy());
-  }
-
-  @SuppressWarnings("unchecked")
-  public <T> TypeInformation<WindowedValue<T>> getTypeInfo(
-      Coder<T> coder,
-      WindowingStrategy<?, ?> windowingStrategy) {
-    WindowedValue.FullWindowedValueCoder<T> windowedValueCoder =
-        WindowedValue.getFullCoder(
-            coder,
-            windowingStrategy.getWindowFn().windowCoder());
-
-    return new CoderTypeInformation<>(windowedValueCoder);
-  }
-
-  Map<TupleTag<?>, PValue> getInputs(PTransform<?, ?> transform) {
-    return currentTransform.getInputs();
-  }
-
-  @SuppressWarnings("unchecked")
-  <T extends PValue> T getInput(PTransform<T, ?> transform) {
-    return (T) Iterables.getOnlyElement(currentTransform.getInputs().values());
-  }
-
-  Map<TupleTag<?>, PValue> getOutputs(PTransform<?, ?> transform) {
-    return currentTransform.getOutputs();
-  }
-
-  @SuppressWarnings("unchecked")
-  <T extends PValue> T getOutput(PTransform<?, T> transform) {
-    return (T) Iterables.getOnlyElement(currentTransform.getOutputs().values());
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkDetachedRunnerResult.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkDetachedRunnerResult.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkDetachedRunnerResult.java
deleted file mode 100644
index bf4395f..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkDetachedRunnerResult.java
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import java.io.IOException;
-
-import org.apache.beam.sdk.AggregatorRetrievalException;
-import org.apache.beam.sdk.AggregatorValues;
-import org.apache.beam.sdk.PipelineResult;
-import org.apache.beam.sdk.metrics.MetricResults;
-import org.apache.beam.sdk.transforms.Aggregator;
-import org.joda.time.Duration;
-
-
-/**
- * Result of a detached execution of a {@link org.apache.beam.sdk.Pipeline} with Flink.
- * In detached execution, results and job execution are currently unavailable.
- */
-public class FlinkDetachedRunnerResult implements PipelineResult {
-
-  FlinkDetachedRunnerResult() {}
-
-  @Override
-  public State getState() {
-    return State.UNKNOWN;
-  }
-
-  @Override
-  public <T> AggregatorValues<T> getAggregatorValues(final Aggregator<?, T> aggregator)
-      throws AggregatorRetrievalException {
-    throw new AggregatorRetrievalException(
-        "Accumulators can't be retrieved for detached Job executions.",
-        new UnsupportedOperationException());
-  }
-
-  @Override
-  public MetricResults metrics() {
-    throw new UnsupportedOperationException("The FlinkRunner does not currently support metrics.");
-  }
-
-  @Override
-  public State cancel() throws IOException {
-    throw new UnsupportedOperationException("Cancelling is not yet supported.");
-  }
-
-  @Override
-  public State waitUntilFinish() {
-    return State.UNKNOWN;
-  }
-
-  @Override
-  public State waitUntilFinish(Duration duration) {
-    return State.UNKNOWN;
-  }
-
-  @Override
-  public String toString() {
-    return "FlinkDetachedRunnerResult{}";
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java
deleted file mode 100644
index ba00036..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineExecutionEnvironment.java
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import static com.google.common.base.Preconditions.checkNotNull;
-
-import java.util.List;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.flink.api.common.JobExecutionResult;
-import org.apache.flink.api.java.CollectionEnvironment;
-import org.apache.flink.api.java.ExecutionEnvironment;
-import org.apache.flink.runtime.state.AbstractStateBackend;
-import org.apache.flink.streaming.api.TimeCharacteristic;
-import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * The class that instantiates and manages the execution of a given job.
- * Depending on if the job is a Streaming or Batch processing one, it creates
- * the adequate execution environment ({@link ExecutionEnvironment}
- * or {@link StreamExecutionEnvironment}), the necessary {@link FlinkPipelineTranslator}
- * ({@link FlinkBatchPipelineTranslator} or {@link FlinkStreamingPipelineTranslator}) to
- * transform the Beam job into a Flink one, and executes the (translated) job.
- */
-class FlinkPipelineExecutionEnvironment {
-
-  private static final Logger LOG =
-      LoggerFactory.getLogger(FlinkPipelineExecutionEnvironment.class);
-
-  private final FlinkPipelineOptions options;
-
-  /**
-   * The Flink Batch execution environment. This is instantiated to either a
-   * {@link org.apache.flink.api.java.CollectionEnvironment},
-   * a {@link org.apache.flink.api.java.LocalEnvironment} or
-   * a {@link org.apache.flink.api.java.RemoteEnvironment}, depending on the configuration
-   * options.
-   */
-  private ExecutionEnvironment flinkBatchEnv;
-
-  /**
-   * The Flink Streaming execution environment. This is instantiated to either a
-   * {@link org.apache.flink.streaming.api.environment.LocalStreamEnvironment} or
-   * a {@link org.apache.flink.streaming.api.environment.RemoteStreamEnvironment}, depending
-   * on the configuration options, and more specifically, the url of the master.
-   */
-  private StreamExecutionEnvironment flinkStreamEnv;
-
-  /**
-   * Creates a {@link FlinkPipelineExecutionEnvironment} with the user-specified parameters in the
-   * provided {@link FlinkPipelineOptions}.
-   *
-   * @param options the user-defined pipeline options.
-   * */
-  FlinkPipelineExecutionEnvironment(FlinkPipelineOptions options) {
-    this.options = checkNotNull(options);
-  }
-
-  /**
-   * Depending on if the job is a Streaming or a Batch one, this method creates
-   * the necessary execution environment and pipeline translator, and translates
-   * the {@link org.apache.beam.sdk.values.PCollection} program into
-   * a {@link org.apache.flink.api.java.DataSet}
-   * or {@link org.apache.flink.streaming.api.datastream.DataStream} one.
-   * */
-  public void translate(FlinkRunner flinkRunner, Pipeline pipeline) {
-    this.flinkBatchEnv = null;
-    this.flinkStreamEnv = null;
-
-    PipelineTranslationOptimizer optimizer =
-        new PipelineTranslationOptimizer(TranslationMode.BATCH, options);
-
-    optimizer.translate(pipeline);
-    TranslationMode translationMode = optimizer.getTranslationMode();
-
-    FlinkPipelineTranslator translator;
-    if (translationMode == TranslationMode.STREAMING) {
-      this.flinkStreamEnv = createStreamExecutionEnvironment();
-      translator = new FlinkStreamingPipelineTranslator(flinkRunner, flinkStreamEnv, options);
-    } else {
-      this.flinkBatchEnv = createBatchExecutionEnvironment();
-      translator = new FlinkBatchPipelineTranslator(flinkBatchEnv, options);
-    }
-
-    translator.translate(pipeline);
-  }
-
-  /**
-   * Launches the program execution.
-   * */
-  public JobExecutionResult executePipeline() throws Exception {
-    final String jobName = options.getJobName();
-
-    if (flinkBatchEnv != null) {
-      return flinkBatchEnv.execute(jobName);
-    } else if (flinkStreamEnv != null) {
-      return flinkStreamEnv.execute(jobName);
-    } else {
-      throw new IllegalStateException("The Pipeline has not yet been translated.");
-    }
-  }
-
-  /**
-   * If the submitted job is a batch processing job, this method creates the adequate
-   * Flink {@link org.apache.flink.api.java.ExecutionEnvironment} depending
-   * on the user-specified options.
-   */
-  private ExecutionEnvironment createBatchExecutionEnvironment() {
-
-    LOG.info("Creating the required Batch Execution Environment.");
-
-    String masterUrl = options.getFlinkMaster();
-    ExecutionEnvironment flinkBatchEnv;
-
-    // depending on the master, create the right environment.
-    if (masterUrl.equals("[local]")) {
-      flinkBatchEnv = ExecutionEnvironment.createLocalEnvironment();
-    } else if (masterUrl.equals("[collection]")) {
-      flinkBatchEnv = new CollectionEnvironment();
-    } else if (masterUrl.equals("[auto]")) {
-      flinkBatchEnv = ExecutionEnvironment.getExecutionEnvironment();
-    } else if (masterUrl.matches(".*:\\d*")) {
-      String[] parts = masterUrl.split(":");
-      List<String> stagingFiles = options.getFilesToStage();
-      flinkBatchEnv = ExecutionEnvironment.createRemoteEnvironment(parts[0],
-          Integer.parseInt(parts[1]),
-          stagingFiles.toArray(new String[stagingFiles.size()]));
-    } else {
-      LOG.warn("Unrecognized Flink Master URL {}. Defaulting to [auto].", masterUrl);
-      flinkBatchEnv = ExecutionEnvironment.getExecutionEnvironment();
-    }
-
-    // set the correct parallelism.
-    if (options.getParallelism() != -1 && !(flinkBatchEnv instanceof CollectionEnvironment)) {
-      flinkBatchEnv.setParallelism(options.getParallelism());
-    }
-
-    // set parallelism in the options (required by some execution code)
-    options.setParallelism(flinkBatchEnv.getParallelism());
-
-    if (options.getObjectReuse()) {
-      flinkBatchEnv.getConfig().enableObjectReuse();
-    } else {
-      flinkBatchEnv.getConfig().disableObjectReuse();
-    }
-
-    return flinkBatchEnv;
-  }
-
-  /**
-   * If the submitted job is a stream processing job, this method creates the adequate
-   * Flink {@link org.apache.flink.streaming.api.environment.StreamExecutionEnvironment} depending
-   * on the user-specified options.
-   */
-  private StreamExecutionEnvironment createStreamExecutionEnvironment() {
-
-    LOG.info("Creating the required Streaming Environment.");
-
-    String masterUrl = options.getFlinkMaster();
-    StreamExecutionEnvironment flinkStreamEnv = null;
-
-    // depending on the master, create the right environment.
-    if (masterUrl.equals("[local]")) {
-      flinkStreamEnv = StreamExecutionEnvironment.createLocalEnvironment();
-    } else if (masterUrl.equals("[auto]")) {
-      flinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
-    } else if (masterUrl.matches(".*:\\d*")) {
-      String[] parts = masterUrl.split(":");
-      List<String> stagingFiles = options.getFilesToStage();
-      flinkStreamEnv = StreamExecutionEnvironment.createRemoteEnvironment(parts[0],
-          Integer.parseInt(parts[1]), stagingFiles.toArray(new String[stagingFiles.size()]));
-    } else {
-      LOG.warn("Unrecognized Flink Master URL {}. Defaulting to [auto].", masterUrl);
-      flinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
-    }
-
-    // set the correct parallelism.
-    if (options.getParallelism() != -1) {
-      flinkStreamEnv.setParallelism(options.getParallelism());
-    }
-
-    // set parallelism in the options (required by some execution code)
-    options.setParallelism(flinkStreamEnv.getParallelism());
-
-    if (options.getObjectReuse()) {
-      flinkStreamEnv.getConfig().enableObjectReuse();
-    } else {
-      flinkStreamEnv.getConfig().disableObjectReuse();
-    }
-
-    // default to event time
-    flinkStreamEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
-
-    // for the following 2 parameters, a value of -1 means that Flink will use
-    // the default values as specified in the configuration.
-    int numRetries = options.getNumberOfExecutionRetries();
-    if (numRetries != -1) {
-      flinkStreamEnv.setNumberOfExecutionRetries(numRetries);
-    }
-    long retryDelay = options.getExecutionRetryDelay();
-    if (retryDelay != -1) {
-      flinkStreamEnv.getConfig().setExecutionRetryDelay(retryDelay);
-    }
-
-    // A value of -1 corresponds to disabled checkpointing (see CheckpointConfig in Flink).
-    // If the value is not -1, then the validity checks are applied.
-    // By default, checkpointing is disabled.
-    long checkpointInterval = options.getCheckpointingInterval();
-    if (checkpointInterval != -1) {
-      if (checkpointInterval < 1) {
-        throw new IllegalArgumentException("The checkpoint interval must be positive");
-      }
-      flinkStreamEnv.enableCheckpointing(checkpointInterval);
-    }
-
-    // State backend
-    final AbstractStateBackend stateBackend = options.getStateBackend();
-    if (stateBackend != null) {
-      flinkStreamEnv.setStateBackend(stateBackend);
-    }
-
-    return flinkStreamEnv;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
deleted file mode 100644
index ef9afea..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-
-import com.fasterxml.jackson.annotation.JsonIgnore;
-import java.util.List;
-import org.apache.beam.sdk.options.ApplicationNameOptions;
-import org.apache.beam.sdk.options.Default;
-import org.apache.beam.sdk.options.Description;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.StreamingOptions;
-import org.apache.flink.runtime.state.AbstractStateBackend;
-
-/**
- * Options which can be used to configure a Flink PipelineRunner.
- */
-public interface FlinkPipelineOptions
-    extends PipelineOptions, ApplicationNameOptions, StreamingOptions {
-
-  /**
-   * List of local files to make available to workers.
-   *
-   * <p>Jars are placed on the worker's classpath.
-   *
-   * <p>The default value is the list of jars from the main program's classpath.
-   */
-  @Description("Jar-Files to send to all workers and put on the classpath. "
-      + "The default value is all files from the classpath.")
-  @JsonIgnore
-  List<String> getFilesToStage();
-  void setFilesToStage(List<String> value);
-
-  /**
-   * The url of the Flink JobManager on which to execute pipelines. This can either be
-   * the the address of a cluster JobManager, in the form "host:port" or one of the special
-   * Strings "[local]", "[collection]" or "[auto]". "[local]" will start a local Flink
-   * Cluster in the JVM, "[collection]" will execute the pipeline on Java Collections while
-   * "[auto]" will let the system decide where to execute the pipeline based on the environment.
-   */
-  @Description("Address of the Flink Master where the Pipeline should be executed. Can"
-      + " either be of the form \"host:port\" or one of the special values [local], "
-      + "[collection] or [auto].")
-  String getFlinkMaster();
-  void setFlinkMaster(String value);
-
-  @Description("The degree of parallelism to be used when distributing operations onto workers.")
-  @Default.InstanceFactory(DefaultParallelismFactory.class)
-  Integer getParallelism();
-  void setParallelism(Integer value);
-
-  @Description("The interval between consecutive checkpoints (i.e. snapshots of the current"
-      + "pipeline state used for fault tolerance).")
-  @Default.Long(-1L)
-  Long getCheckpointingInterval();
-  void setCheckpointingInterval(Long interval);
-
-  @Description("Sets the number of times that failed tasks are re-executed. "
-      + "A value of zero effectively disables fault tolerance. A value of -1 indicates "
-      + "that the system default value (as defined in the configuration) should be used.")
-  @Default.Integer(-1)
-  Integer getNumberOfExecutionRetries();
-  void setNumberOfExecutionRetries(Integer retries);
-
-  @Description("Sets the delay between executions. A value of {@code -1} "
-      + "indicates that the default value should be used.")
-  @Default.Long(-1L)
-  Long getExecutionRetryDelay();
-  void setExecutionRetryDelay(Long delay);
-
-  @Description("Sets the behavior of reusing objects.")
-  @Default.Boolean(false)
-  Boolean getObjectReuse();
-  void setObjectReuse(Boolean reuse);
-
-  /**
-   * State backend to store Beam's state during computation.
-   * Note: Only applicable when executing in streaming mode.
-   */
-  @Description("Sets the state backend to use in streaming mode. "
-      + "Otherwise the default is read from the Flink config.")
-  @JsonIgnore
-  AbstractStateBackend getStateBackend();
-  void setStateBackend(AbstractStateBackend stateBackend);
-
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineTranslator.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineTranslator.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineTranslator.java
deleted file mode 100644
index 65f416d..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkPipelineTranslator.java
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import org.apache.beam.sdk.Pipeline;
-
-/**
- * The role of this class is to translate the Beam operators to
- * their Flink counterparts. If we have a streaming job, this is instantiated as a
- * {@link FlinkStreamingPipelineTranslator}. In other case, i.e. for a batch job,
- * a {@link FlinkBatchPipelineTranslator} is created. Correspondingly, the
- * {@link org.apache.beam.sdk.values.PCollection}-based user-provided job is translated into
- * a {@link org.apache.flink.streaming.api.datastream.DataStream} (for streaming) or a
- * {@link org.apache.flink.api.java.DataSet} (for batch) one.
- */
-abstract class FlinkPipelineTranslator extends Pipeline.PipelineVisitor.Defaults {
-
-  /**
-   * Translates the pipeline by passing this class as a visitor.
-   * @param pipeline The pipeline to be translated
-   */
-  public void translate(Pipeline pipeline) {
-    pipeline.traverseTopologically(this);
-  }
-
-  /**
-   * Utility formatting method.
-   * @param n number of spaces to generate
-   * @return String with "|" followed by n spaces
-   */
-  protected static String genSpaces(int n) {
-    StringBuilder builder = new StringBuilder();
-    for (int i = 0; i < n; i++) {
-      builder.append("|   ");
-    }
-    return builder.toString();
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java
----------------------------------------------------------------------
diff --git a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java b/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java
deleted file mode 100644
index 096f030..0000000
--- a/runners/flink/runner/src/main/java/org/apache/beam/runners/flink/FlinkRunner.java
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.flink;
-
-import com.google.common.base.Joiner;
-import java.io.File;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.net.URLClassLoader;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TreeSet;
-import org.apache.beam.sdk.Pipeline;
-import org.apache.beam.sdk.PipelineResult;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsValidator;
-import org.apache.beam.sdk.runners.PipelineRunner;
-import org.apache.beam.sdk.runners.TransformHierarchy;
-import org.apache.beam.sdk.transforms.PTransform;
-import org.apache.beam.sdk.transforms.View;
-import org.apache.beam.sdk.values.PValue;
-import org.apache.flink.api.common.JobExecutionResult;
-import org.apache.flink.client.program.DetachedEnvironment;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * A {@link PipelineRunner} that executes the operations in the
- * pipeline by first translating them to a Flink Plan and then executing them either locally
- * or on a Flink cluster, depending on the configuration.
- */
-public class FlinkRunner extends PipelineRunner<PipelineResult> {
-
-  private static final Logger LOG = LoggerFactory.getLogger(FlinkRunner.class);
-
-  /**
-   * Provided options.
-   */
-  private final FlinkPipelineOptions options;
-
-  /**
-   * Construct a runner from the provided options.
-   *
-   * @param options Properties which configure the runner.
-   * @return The newly created runner.
-   */
-  public static FlinkRunner fromOptions(PipelineOptions options) {
-    FlinkPipelineOptions flinkOptions =
-        PipelineOptionsValidator.validate(FlinkPipelineOptions.class, options);
-    ArrayList<String> missing = new ArrayList<>();
-
-    if (flinkOptions.getAppName() == null) {
-      missing.add("appName");
-    }
-    if (missing.size() > 0) {
-      throw new IllegalArgumentException(
-          "Missing required values: " + Joiner.on(',').join(missing));
-    }
-
-    if (flinkOptions.getFilesToStage() == null) {
-      flinkOptions.setFilesToStage(detectClassPathResourcesToStage(
-          FlinkRunner.class.getClassLoader()));
-      LOG.info("PipelineOptions.filesToStage was not specified. "
-              + "Defaulting to files from the classpath: will stage {} files. "
-              + "Enable logging at DEBUG level to see which files will be staged.",
-          flinkOptions.getFilesToStage().size());
-      LOG.debug("Classpath elements: {}", flinkOptions.getFilesToStage());
-    }
-
-    // Set Flink Master to [auto] if no option was specified.
-    if (flinkOptions.getFlinkMaster() == null) {
-      flinkOptions.setFlinkMaster("[auto]");
-    }
-
-    return new FlinkRunner(flinkOptions);
-  }
-
-  private FlinkRunner(FlinkPipelineOptions options) {
-    this.options = options;
-    this.ptransformViewsWithNonDeterministicKeyCoders = new HashSet<>();
-  }
-
-  @Override
-  public PipelineResult run(Pipeline pipeline) {
-    logWarningIfPCollectionViewHasNonDeterministicKeyCoder(pipeline);
-
-    LOG.info("Executing pipeline using FlinkRunner.");
-
-    FlinkPipelineExecutionEnvironment env = new FlinkPipelineExecutionEnvironment(options);
-
-    LOG.info("Translating pipeline to Flink program.");
-    env.translate(this, pipeline);
-
-    JobExecutionResult result;
-    try {
-      LOG.info("Starting execution of Flink program.");
-      result = env.executePipeline();
-    } catch (Exception e) {
-      LOG.error("Pipeline execution failed", e);
-      throw new RuntimeException("Pipeline execution failed", e);
-    }
-
-    if (result instanceof DetachedEnvironment.DetachedJobExecutionResult) {
-      LOG.info("Pipeline submitted in Detached mode");
-      return new FlinkDetachedRunnerResult();
-    } else {
-      LOG.info("Execution finished in {} msecs", result.getNetRuntime());
-      Map<String, Object> accumulators = result.getAllAccumulatorResults();
-      if (accumulators != null && !accumulators.isEmpty()) {
-        LOG.info("Final aggregator values:");
-
-        for (Map.Entry<String, Object> entry : result.getAllAccumulatorResults().entrySet()) {
-          LOG.info("{} : {}", entry.getKey(), entry.getValue());
-        }
-      }
-
-      return new FlinkRunnerResult(accumulators, result.getNetRuntime());
-    }
-  }
-
-  /**
-   * For testing.
-   */
-  public FlinkPipelineOptions getPipelineOptions() {
-    return options;
-  }
-
-  @Override
-  public String toString() {
-    return "FlinkRunner#" + hashCode();
-  }
-
-  /**
-   * Attempts to detect all the resources the class loader has access to. This does not recurse
-   * to class loader parents stopping it from pulling in resources from the system class loader.
-   *
-   * @param classLoader The URLClassLoader to use to detect resources to stage.
-   * @return A list of absolute paths to the resources the class loader uses.
-   * @throws IllegalArgumentException If either the class loader is not a URLClassLoader or one
-   *   of the resources the class loader exposes is not a file resource.
-   */
-  protected static List<String> detectClassPathResourcesToStage(
-      ClassLoader classLoader) {
-    if (!(classLoader instanceof URLClassLoader)) {
-      String message = String.format("Unable to use ClassLoader to detect classpath elements. "
-          + "Current ClassLoader is %s, only URLClassLoaders are supported.", classLoader);
-      LOG.error(message);
-      throw new IllegalArgumentException(message);
-    }
-
-    List<String> files = new ArrayList<>();
-    for (URL url : ((URLClassLoader) classLoader).getURLs()) {
-      try {
-        files.add(new File(url.toURI()).getAbsolutePath());
-      } catch (IllegalArgumentException | URISyntaxException e) {
-        String message = String.format("Unable to convert url (%s) to file.", url);
-        LOG.error(message);
-        throw new IllegalArgumentException(message, e);
-      }
-    }
-    return files;
-  }
-
-  /** A set of {@link View}s with non-deterministic key coders. */
-  Set<PTransform<?, ?>> ptransformViewsWithNonDeterministicKeyCoders;
-
-  /**
-   * Records that the {@link PTransform} requires a deterministic key coder.
-   */
-  void recordViewUsesNonDeterministicKeyCoder(PTransform<?, ?> ptransform) {
-    ptransformViewsWithNonDeterministicKeyCoders.add(ptransform);
-  }
-
-  /** Outputs a warning about PCollection views without deterministic key coders. */
-  private void logWarningIfPCollectionViewHasNonDeterministicKeyCoder(Pipeline pipeline) {
-    // We need to wait till this point to determine the names of the transforms since only
-    // at this time do we know the hierarchy of the transforms otherwise we could
-    // have just recorded the full names during apply time.
-    if (!ptransformViewsWithNonDeterministicKeyCoders.isEmpty()) {
-      final SortedSet<String> ptransformViewNamesWithNonDeterministicKeyCoders = new TreeSet<>();
-      pipeline.traverseTopologically(new Pipeline.PipelineVisitor() {
-        @Override
-        public void visitValue(PValue value, TransformHierarchy.Node producer) {
-        }
-
-        @Override
-        public void visitPrimitiveTransform(TransformHierarchy.Node node) {
-          if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) {
-            ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName());
-          }
-        }
-
-        @Override
-        public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) {
-          if (ptransformViewsWithNonDeterministicKeyCoders.contains(node.getTransform())) {
-            ptransformViewNamesWithNonDeterministicKeyCoders.add(node.getFullName());
-          }
-          return CompositeBehavior.ENTER_TRANSFORM;
-        }
-
-        @Override
-        public void leaveCompositeTransform(TransformHierarchy.Node node) {
-        }
-      });
-
-      LOG.warn("Unable to use indexed implementation for View.AsMap and View.AsMultimap for {} "
-          + "because the key coder is not deterministic. Falling back to singleton implementation "
-          + "which may cause memory and/or performance problems. Future major versions of "
-          + "the Flink runner will require deterministic key coders.",
-          ptransformViewNamesWithNonDeterministicKeyCoders);
-    }
-  }
-}

[48/50] [abbrv] beam git commit: [BEAM-2015] Remove shared profile in runners/pom.xml and fix Dataflow ValidatesRunner PostCommit

Posted by dh...@apache.org.

[BEAM-2015] Remove shared profile in runners/pom.xml and fix Dataflow ValidatesRunner PostCommit


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/546aa61f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/546aa61f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/546aa61f

Branch: refs/heads/DSL_SQL
Commit: 546aa61f217dc59f95727970a8dbc7c4b2f76e54
Parents: 391fb77
Author: Luke Cwik <lc...@google.com>
Authored: Wed Apr 19 09:20:38 2017 -0700
Committer: Dan Halperin <dh...@google.com>
Committed: Wed Apr 19 12:07:33 2017 -0700

----------------------------------------------------------------------
 runners/apex/pom.xml                       |  1 +
 runners/direct-java/pom.xml                |  1 +
 runners/flink/pom.xml                      |  2 ++
 runners/google-cloud-dataflow-java/pom.xml | 43 +++++++++++++++++++++++++
 runners/pom.xml                            | 40 -----------------------
 runners/spark/pom.xml                      |  1 +
 6 files changed, 48 insertions(+), 40 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/546aa61f/runners/apex/pom.xml
----------------------------------------------------------------------
diff --git a/runners/apex/pom.xml b/runners/apex/pom.xml
index 40fc93c..f441e3d 100644
--- a/runners/apex/pom.xml
+++ b/runners/apex/pom.xml
@@ -229,6 +229,7 @@
                 </beamTestPipelineOptions>
               </systemPropertyVariables>
               <skipTests>${skipIntegrationTests}</skipTests>
+              <threadCount>4</threadCount>
             </configuration>
           </execution>
         </executions>

http://git-wip-us.apache.org/repos/asf/beam/blob/546aa61f/runners/direct-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/direct-java/pom.xml b/runners/direct-java/pom.xml
index 03ed791..fc28fd6 100644
--- a/runners/direct-java/pom.xml
+++ b/runners/direct-java/pom.xml
@@ -81,6 +81,7 @@
                   ]
                 </beamTestPipelineOptions>
               </systemPropertyVariables>
+              <threadCount>4</threadCount>
             </configuration>
           </execution>
         </executions>

http://git-wip-us.apache.org/repos/asf/beam/blob/546aa61f/runners/flink/pom.xml
----------------------------------------------------------------------
diff --git a/runners/flink/pom.xml b/runners/flink/pom.xml
index 351035e..808219b 100644
--- a/runners/flink/pom.xml
+++ b/runners/flink/pom.xml
@@ -75,6 +75,7 @@
                       ]
                     </beamTestPipelineOptions>
                   </systemPropertyVariables>
+                  <threadCount>4</threadCount>
                 </configuration>
               </execution>
 
@@ -108,6 +109,7 @@
                       ]
                     </beamTestPipelineOptions>
                   </systemPropertyVariables>
+                  <threadCount>4</threadCount>
                 </configuration>
               </execution>
             </executions>

http://git-wip-us.apache.org/repos/asf/beam/blob/546aa61f/runners/google-cloud-dataflow-java/pom.xml
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/pom.xml b/runners/google-cloud-dataflow-java/pom.xml
index e8aadb8..4cde923 100644
--- a/runners/google-cloud-dataflow-java/pom.xml
+++ b/runners/google-cloud-dataflow-java/pom.xml
@@ -38,6 +38,49 @@
     <dataflow.legacy_environment_major_version>6</dataflow.legacy_environment_major_version>
   </properties>
 
+  <profiles>
+    <!-- A profile that adds an integration test phase if and only if
+     the validatesRunnerPipelineOptions maven property has been set.
+     It should be set to a valid PipelineOptions JSON string. -->
+    <profile>
+      <id>validates-runner-tests</id>
+      <activation>
+        <property><name>validatesRunnerPipelineOptions</name></property>
+      </activation>
+      <build>
+        <pluginManagement>
+          <plugins>
+            <plugin>
+              <groupId>org.apache.maven.plugins</groupId>
+              <artifactId>maven-surefire-plugin</artifactId>
+              <executions>
+                <execution>
+                  <id>validates-runner-tests</id>
+                  <phase>integration-test</phase>
+                  <goals>
+                    <goal>test</goal>
+                  </goals>
+                  <configuration>
+                    <skip>false</skip>
+                    <groups>org.apache.beam.sdk.testing.ValidatesRunner</groups>
+                    <parallel>all</parallel>
+                    <threadCount>4</threadCount>
+                    <dependenciesToScan>
+                      <dependency>org.apache.beam:beam-sdks-java-core</dependency>
+                    </dependenciesToScan>
+                    <systemPropertyVariables>
+                      <beamTestPipelineOptions>${validatesRunnerPipelineOptions}</beamTestPipelineOptions>
+                    </systemPropertyVariables>
+                  </configuration>
+                </execution>
+              </executions>
+            </plugin>
+          </plugins>
+        </pluginManagement>
+      </build>
+    </profile>
+  </profiles>
+
   <build>
     <resources>
       <resource>

http://git-wip-us.apache.org/repos/asf/beam/blob/546aa61f/runners/pom.xml
----------------------------------------------------------------------
diff --git a/runners/pom.xml b/runners/pom.xml
index 150e987..8f3cabd 100644
--- a/runners/pom.xml
+++ b/runners/pom.xml
@@ -54,46 +54,6 @@
         </plugins>
       </build>
     </profile>
-
-    <!-- A profile that adds an integration test phase if and only if
-         the validatesRunnerPipelineOptions maven property has been set.
-         It should be set to a valid PipelineOptions JSON string. -->
-    <profile>
-      <id>validates-runner-tests</id>
-      <activation>
-        <property><name>validatesRunnerPipelineOptions</name></property>
-      </activation>
-      <build>
-        <pluginManagement>
-          <plugins>
-            <plugin>
-              <groupId>org.apache.maven.plugins</groupId>
-              <artifactId>maven-surefire-plugin</artifactId>
-              <executions>
-                <execution>
-                  <id>validates-runner-tests</id>
-                  <phase>integration-test</phase>
-                  <goals>
-                    <goal>test</goal>
-                  </goals>
-                  <configuration>
-                    <groups>org.apache.beam.sdk.testing.ValidatesRunner</groups>
-                    <parallel>all</parallel>
-                    <threadCount>4</threadCount>
-                    <dependenciesToScan>
-                      <dependency>org.apache.beam:beam-sdks-java-core</dependency>
-                    </dependenciesToScan>
-                    <systemPropertyVariables>
-                      <beamTestPipelineOptions>${validatesRunnerPipelineOptions}</beamTestPipelineOptions>
-                    </systemPropertyVariables>
-                  </configuration>
-                </execution>
-              </executions>
-            </plugin>
-          </plugins>
-        </pluginManagement>
-      </build>
-    </profile>
   </profiles>
 
   <build>

http://git-wip-us.apache.org/repos/asf/beam/blob/546aa61f/runners/spark/pom.xml
----------------------------------------------------------------------
diff --git a/runners/spark/pom.xml b/runners/spark/pom.xml
index 7493485..55788e6 100644
--- a/runners/spark/pom.xml
+++ b/runners/spark/pom.xml
@@ -102,6 +102,7 @@
                     <spark.ui.enabled>false</spark.ui.enabled>
                     <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
                   </systemPropertyVariables>
+                  <threadCount>4</threadCount>
                 </configuration>
               </execution>
             </executions>

[20/50] [abbrv] beam git commit: [BEAM-1914] XmlIO now complies with PTransform style guide

Posted by dh...@apache.org.

[BEAM-1914]�XmlIO now complies with PTransform style guide


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d0c0a60c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d0c0a60c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d0c0a60c

Branch: refs/heads/DSL_SQL
Commit: d0c0a60c83a9d2a6caa29f91f89d8c0ee3b0eb93
Parents: 57929fb
Author: Eugene Kirpichov <ki...@google.com>
Authored: Mon Apr 17 16:25:42 2017 -0700
Committer: Jean-Baptiste Onofr� <jb...@apache.org>
Committed: Wed Apr 19 10:34:46 2017 +0200

----------------------------------------------------------------------
 .../apache/beam/sdk/io/CompressedSource.java    |   4 +-
 .../main/java/org/apache/beam/sdk/io/XmlIO.java | 477 +++++++++++++++++++
 .../java/org/apache/beam/sdk/io/XmlSink.java    | 226 ++-------
 .../java/org/apache/beam/sdk/io/XmlSource.java  | 191 +-------
 .../sdk/transforms/display/DisplayData.java     |   6 +
 .../org/apache/beam/sdk/io/XmlSinkTest.java     |  89 ++--
 .../org/apache/beam/sdk/io/XmlSourceTest.java   | 248 ++++++----
 .../sdk/transforms/display/DisplayDataTest.java |  17 +
 8 files changed, 740 insertions(+), 518 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/d0c0a60c/sdks/java/core/src/main/java/org/apache/beam/sdk/io/CompressedSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/CompressedSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/CompressedSource.java
index ecd0fd9..1d940cb 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/CompressedSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/CompressedSource.java
@@ -46,10 +46,10 @@ import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
  * A Source that reads from compressed files. A {@code CompressedSources} wraps a delegate
  * {@link FileBasedSource} that is able to read the decompressed file format.
  *
- * <p>For example, use the following to read from a gzip-compressed XML file:
+ * <p>For example, use the following to read from a gzip-compressed file-based source:
  *
  * <pre> {@code
- * XmlSource mySource = XmlSource.from(...);
+ * FileBasedSource<T> mySource = ...;
  * PCollection<T> collection = p.apply(Read.from(CompressedSource
  *     .from(mySource)
  *     .withDecompression(CompressedSource.CompressionMode.GZIP)));

http://git-wip-us.apache.org/repos/asf/beam/blob/d0c0a60c/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlIO.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlIO.java
new file mode 100644
index 0000000..a53fb86
--- /dev/null
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlIO.java
@@ -0,0 +1,477 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io;
+
+import static com.google.common.base.Preconditions.checkNotNull;
+
+import com.google.auto.value.AutoValue;
+import com.google.common.annotations.VisibleForTesting;
+import javax.annotation.Nullable;
+import javax.xml.bind.JAXBContext;
+import javax.xml.bind.JAXBException;
+import org.apache.beam.sdk.runners.PipelineRunner;
+import org.apache.beam.sdk.transforms.PTransform;
+import org.apache.beam.sdk.transforms.display.DisplayData;
+import org.apache.beam.sdk.values.PBegin;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.beam.sdk.values.PDone;
+
+/** Transforms for reading and writing XML files using JAXB mappers. */
+public class XmlIO {
+  // CHECKSTYLE.OFF: JavadocStyle
+  /**
+   * Reads XML files. This source reads one or more XML files and
+   * creates a {@link PCollection} of a given type. Please note the example given below.
+   *
+   * <p>The XML file must be of the following form, where {@code root} and {@code record} are XML
+   * element names that are defined by the user:
+   *
+   * <pre>{@code
+   * <root>
+   * <record> ... </record>
+   * <record> ... </record>
+   * <record> ... </record>
+   * ...
+   * <record> ... </record>
+   * </root>
+   * }</pre>
+   *
+   * <p>Basically, the XML document should contain a single root element with an inner list
+   * consisting entirely of record elements. The records may contain arbitrary XML content; however,
+   * that content <b>must not</b> contain the start {@code <record>} or end {@code </record>} tags.
+   * This restriction enables reading from large XML files in parallel from different offsets in the
+   * file.
+   *
+   * <p>Root and/or record elements may additionally contain an arbitrary number of XML attributes.
+   * Additionally users must provide a class of a JAXB annotated Java type that can be used convert
+   * records into Java objects and vice versa using JAXB marshalling/unmarshalling mechanisms.
+   * Reading the source will generate a {@code PCollection} of the given JAXB annotated Java type.
+   * Optionally users may provide a minimum size of a bundle that should be created for the source.
+   *
+   * <p>The following example shows how to use this method in a Beam pipeline:
+   *
+   * <pre>{@code
+   * PCollection<String> output = p.apply(XmlIO.<Record>read()
+   *     .from(file.toPath().toString())
+   *     .withRootElement("root")
+   *     .withRecordElement("record")
+   *     .withRecordClass(Record.class));
+   * }</pre>
+   *
+   * <p>Currently, only XML files that use single-byte characters are supported. Using a file that
+   * contains multi-byte characters may result in data loss or duplication.
+   *
+   * <p>To use this method:
+   *
+   * <ol>
+   * <li>Explicitly declare a dependency on org.codehaus.woodstox:stax2-api
+   * <li>Include a compatible implementation on the classpath at run-time, such as
+   *     org.codehaus.woodstox:woodstox-core-asl
+   * </ol>
+   *
+   * <p>These dependencies have been declared as optional in the sdks/java/core/pom.xml file of
+   * Apache Beam.
+   *
+   * <h3>Permissions</h3>
+   * Permission requirements depend on the {@link org.apache.beam.sdk.runners.PipelineRunner
+   * PipelineRunner} that is used to execute the Beam pipeline. Please refer to the documentation of
+   * corresponding {@link PipelineRunner PipelineRunners} for more details.
+   *
+   * @param <T> Type of the objects that represent the records of the XML file. The {@code
+   *     PCollection} generated by this source will be of this type.
+   */
+  // CHECKSTYLE.ON: JavadocStyle
+  public static <T> Read<T> read() {
+    return new AutoValue_XmlIO_Read.Builder<T>()
+        .setMinBundleSize(Read.DEFAULT_MIN_BUNDLE_SIZE)
+        .setCompressionType(Read.CompressionType.AUTO)
+        .build();
+  }
+
+  // CHECKSTYLE.OFF: JavadocStyle
+  /**
+   * A {@link Sink} that outputs records as XML-formatted elements. Writes a {@link PCollection} of
+   * records from JAXB-annotated classes to a single file location.
+   *
+   * <p>Given a PCollection containing records of type T that can be marshalled to XML elements,
+   * this Sink will produce a single file consisting of a single root element that contains all of
+   * the elements in the PCollection.
+   *
+   * <p>XML Sinks are created with a base filename to write to, a root element name that will be
+   * used for the root element of the output files, and a class to bind to an XML element. This
+   * class will be used in the marshalling of records in an input PCollection to their XML
+   * representation and must be able to be bound using JAXB annotations (checked at pipeline
+   * construction time).
+   *
+   * <p>XML Sinks can be written to using the {@link Write} transform:
+   *
+   * <pre>{@code
+   * p.apply(XmlIO.<Type>write()
+   *      .withRecordClass(Type.class)
+   *      .withRootElement(root_element)
+   *      .toFilenamePrefix(output_filename));
+   * }</pre>
+   *
+   * <p>For example, consider the following class with JAXB annotations:
+   *
+   * <pre>
+   *  {@literal @}XmlRootElement(name = "word_count_result")
+   *  {@literal @}XmlType(propOrder = {"word", "frequency"})
+   *  public class WordFrequency {
+   *    private String word;
+   *    private long frequency;
+   *
+   *    public WordFrequency() { }
+   *
+   *    public WordFrequency(String word, long frequency) {
+   *      this.word = word;
+   *      this.frequency = frequency;
+   *    }
+   *
+   *    public void setWord(String word) {
+   *      this.word = word;
+   *    }
+   *
+   *    public void setFrequency(long frequency) {
+   *      this.frequency = frequency;
+   *    }
+   *
+   *    public long getFrequency() {
+   *      return frequency;
+   *    }
+   *
+   *    public String getWord() {
+   *      return word;
+   *    }
+   *  }
+   * </pre>
+   *
+   * <p>The following will produce XML output with a root element named "words" from a PCollection
+   * of WordFrequency objects:
+   *
+   * <pre>{@code
+   * p.apply(XmlIO.<WordFrequency>write()
+   *     .withRecordClass(WordFrequency.class)
+   *     .withRootElement("words")
+   *     .toFilenamePrefix(output_file));
+   * }</pre>
+   *
+   * <p>The output of which will look like:
+   *
+   * <pre>{@code
+   * <words>
+   *
+   *  <word_count_result>
+   *    <word>decreased</word>
+   *    <frequency>1</frequency>
+   *  </word_count_result>
+   *
+   *  <word_count_result>
+   *    <word>War</word>
+   *    <frequency>4</frequency>
+   *  </word_count_result>
+   *
+   *  <word_count_result>
+   *    <word>empress'</word>
+   *    <frequency>14</frequency>
+   *  </word_count_result>
+   *
+   *  <word_count_result>
+   *    <word>stoops</word>
+   *    <frequency>6</frequency>
+   *  </word_count_result>
+   *
+   *  ...
+   * </words>
+   * }</pre>
+   */
+  // CHECKSTYLE.ON: JavadocStyle
+  public static <T> Write<T> write() {
+    return new AutoValue_XmlIO_Write.Builder<T>().build();
+  }
+
+  /** Implementation of {@link #read}. */
+  @AutoValue
+  public abstract static class Read<T> extends PTransform<PBegin, PCollection<T>> {
+    private static final int DEFAULT_MIN_BUNDLE_SIZE = 8 * 1024;
+
+    @Nullable
+    abstract String getFileOrPatternSpec();
+
+    @Nullable
+    abstract String getRootElement();
+
+    @Nullable
+    abstract String getRecordElement();
+
+    @Nullable
+    abstract Class<T> getRecordClass();
+
+    abstract CompressionType getCompressionType();
+
+    abstract long getMinBundleSize();
+
+    abstract Builder<T> toBuilder();
+
+    @AutoValue.Builder
+    abstract static class Builder<T> {
+      abstract Builder<T> setFileOrPatternSpec(String fileOrPatternSpec);
+
+      abstract Builder<T> setRootElement(String rootElement);
+
+      abstract Builder<T> setRecordElement(String recordElement);
+
+      abstract Builder<T> setRecordClass(Class<T> recordClass);
+
+      abstract Builder<T> setMinBundleSize(long minBundleSize);
+
+      abstract Builder<T> setCompressionType(CompressionType compressionType);
+
+      abstract Read<T> build();
+    }
+
+    /** Strategy for determining the compression type of XML files being read. */
+    public enum CompressionType {
+      /** Automatically determine the compression type based on filename extension. */
+      AUTO(""),
+      /** Uncompressed (i.e., may be split). */
+      UNCOMPRESSED(""),
+      /** GZipped. */
+      GZIP(".gz"),
+      /** BZipped. */
+      BZIP2(".bz2"),
+      /** Zipped. */
+      ZIP(".zip"),
+      /** Deflate compressed. */
+      DEFLATE(".deflate");
+
+      private String filenameSuffix;
+
+      CompressionType(String suffix) {
+        this.filenameSuffix = suffix;
+      }
+
+      /**
+       * Determine if a given filename matches a compression type based on its extension.
+       * @param filename the filename to match
+       * @return true iff the filename ends with the compression type's known extension.
+       */
+      public boolean matches(String filename) {
+        return filename.toLowerCase().endsWith(filenameSuffix.toLowerCase());
+      }
+    }
+
+    /**
+     * Reads a single XML file or a set of XML files defined by a Java "glob"
+     * file pattern. Each XML file should be of the form defined in {@link #read}.
+     */
+    public Read<T> from(String fileOrPatternSpec) {
+      return toBuilder().setFileOrPatternSpec(fileOrPatternSpec).build();
+    }
+
+    /**
+     * Sets name of the root element of the XML document. This will be used to create a valid
+     * starting root element when initiating a bundle of records created from an XML document. This
+     * is a required parameter.
+     */
+    public Read<T> withRootElement(String rootElement) {
+      return toBuilder().setRootElement(rootElement).build();
+    }
+
+    /**
+     * Sets name of the record element of the XML document. This will be used to determine offset of
+     * the first record of a bundle created from the XML document. This is a required parameter.
+     */
+    public Read<T> withRecordElement(String recordElement) {
+      return toBuilder().setRecordElement(recordElement).build();
+    }
+
+    /**
+     * Sets a JAXB annotated class that can be populated using a record of the provided XML file.
+     * This will be used when unmarshalling record objects from the XML file. This is a required
+     * parameter.
+     */
+    public Read<T> withRecordClass(Class<T> recordClass) {
+      return toBuilder().setRecordClass(recordClass).build();
+    }
+
+    /**
+     * Sets a parameter {@code minBundleSize} for the minimum bundle size of the source. Please
+     * refer to {@link OffsetBasedSource} for the definition of minBundleSize. This is an optional
+     * parameter.
+     */
+    public Read<T> withMinBundleSize(long minBundleSize) {
+      return toBuilder().setMinBundleSize(minBundleSize).build();
+    }
+
+    /**
+     * Decompresses all input files using the specified compression type.
+     *
+     * <p>If no compression type is specified, the default is {@link CompressionType#AUTO}.
+     * In this mode, the compression type of the file is determined by its extension.
+     * Supports .gz, .bz2, .zip and .deflate compression.
+     */
+    public Read<T> withCompressionType(CompressionType compressionType) {
+      return toBuilder().setCompressionType(compressionType).build();
+    }
+
+    @Override
+    public void validate(PBegin input) {
+      checkNotNull(
+          getRootElement(),
+          "rootElement is null. Use builder method withRootElement() to set this.");
+      checkNotNull(
+          getRecordElement(),
+          "recordElement is null. Use builder method withRecordElement() to set this.");
+      checkNotNull(
+          getRecordClass(),
+          "recordClass is null. Use builder method withRecordClass() to set this.");
+    }
+
+    @Override
+    public void populateDisplayData(DisplayData.Builder builder) {
+      builder
+          .addIfNotDefault(
+              DisplayData.item("minBundleSize", getMinBundleSize())
+                  .withLabel("Minimum Bundle Size"),
+              1L)
+          .add(DisplayData.item("filePattern", getFileOrPatternSpec()).withLabel("File Pattern"))
+          .addIfNotNull(
+              DisplayData.item("rootElement", getRootElement()).withLabel("XML Root Element"))
+          .addIfNotNull(
+              DisplayData.item("recordElement", getRecordElement()).withLabel("XML Record Element"))
+          .addIfNotNull(
+              DisplayData.item("recordClass", getRecordClass()).withLabel("XML Record Class"));
+    }
+
+    @VisibleForTesting
+    BoundedSource<T> createSource() {
+      XmlSource<T> source = new XmlSource<>(this);
+      switch (getCompressionType()) {
+        case UNCOMPRESSED:
+          return source;
+        case AUTO:
+          return CompressedSource.from(source);
+        case BZIP2:
+          return CompressedSource.from(source)
+              .withDecompression(CompressedSource.CompressionMode.BZIP2);
+        case GZIP:
+          return CompressedSource.from(source)
+              .withDecompression(CompressedSource.CompressionMode.GZIP);
+        case ZIP:
+          return CompressedSource.from(source)
+              .withDecompression(CompressedSource.CompressionMode.ZIP);
+        case DEFLATE:
+          return CompressedSource.from(source)
+              .withDecompression(CompressedSource.CompressionMode.DEFLATE);
+        default:
+          throw new IllegalArgumentException("Unknown compression type: " + getCompressionType());
+      }
+    }
+
+    @Override
+    public PCollection<T> expand(PBegin input) {
+      return input.apply(org.apache.beam.sdk.io.Read.from(createSource()));
+    }
+  }
+
+  /** Implementation of {@link #write}. */
+  @AutoValue
+  public abstract static class Write<T> extends PTransform<PCollection<T>, PDone> {
+    @Nullable
+    abstract String getFilenamePrefix();
+
+    @Nullable
+    abstract Class<T> getRecordClass();
+
+    @Nullable
+    abstract String getRootElement();
+
+    abstract Builder<T> toBuilder();
+
+    @AutoValue.Builder
+    abstract static class Builder<T> {
+      abstract Builder<T> setFilenamePrefix(String baseOutputFilename);
+
+      abstract Builder<T> setRecordClass(Class<T> recordClass);
+
+      abstract Builder<T> setRootElement(String rootElement);
+
+      abstract Write<T> build();
+    }
+
+
+    /**
+     * Writes to files with the given path prefix.
+     *
+     * <p>Output files will have the name {@literal {filenamePrefix}-0000i-of-0000n.xml} where n is
+     * the number of output bundles.
+     */
+    public Write<T> toFilenamePrefix(String filenamePrefix) {
+      return toBuilder().setFilenamePrefix(filenamePrefix).build();
+    }
+
+    /**
+     * Writes objects of the given class mapped to XML elements using JAXB.
+     *
+     * <p>The specified class must be able to be used to create a JAXB context.
+     */
+    public Write<T> withRecordClass(Class<T> recordClass) {
+      return toBuilder().setRecordClass(recordClass).build();
+    }
+
+    /**
+     * Sets the enclosing root element for the generated XML files.
+     */
+    public Write<T> withRootElement(String rootElement) {
+      return toBuilder().setRootElement(rootElement).build();
+    }
+
+    @Override
+    public void validate(PCollection<T> input) {
+      checkNotNull(getRecordClass(), "Missing a class to bind to a JAXB context.");
+      checkNotNull(getRootElement(), "Missing a root element name.");
+      checkNotNull(getFilenamePrefix(), "Missing a filename to write to.");
+      try {
+        JAXBContext.newInstance(getRecordClass());
+      } catch (JAXBException e) {
+        throw new RuntimeException("Error binding classes to a JAXB Context.", e);
+      }
+    }
+
+    @Override
+    public PDone expand(PCollection<T> input) {
+      return input.apply(org.apache.beam.sdk.io.Write.to(createSink()));
+    }
+
+    @VisibleForTesting
+    XmlSink<T> createSink() {
+      return new XmlSink<>(this);
+    }
+
+    @Override
+    public void populateDisplayData(DisplayData.Builder builder) {
+      createSink().populateFileBasedDisplayData(builder);
+      builder
+          .addIfNotNull(DisplayData.item("rootElement", getRootElement())
+              .withLabel("XML Root Element"))
+          .addIfNotNull(DisplayData.item("recordClass", getRecordClass())
+              .withLabel("XML Record Class"));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/d0c0a60c/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
index 2159c8f..7700329 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSink.java
@@ -17,226 +17,58 @@
  */
 package org.apache.beam.sdk.io;
 
-import static com.google.common.base.Preconditions.checkNotNull;
-
 import java.io.OutputStream;
 import java.nio.channels.Channels;
 import java.nio.channels.WritableByteChannel;
 import javax.xml.bind.JAXBContext;
-import javax.xml.bind.JAXBException;
 import javax.xml.bind.Marshaller;
 import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.FileBasedSink.FileBasedWriteOperation;
-import org.apache.beam.sdk.io.FileBasedSink.FileBasedWriter;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.beam.sdk.values.PCollection;
 
-// CHECKSTYLE.OFF: JavadocStyle
-/**
- * A {@link Sink} that outputs records as XML-formatted elements. Writes a {@link PCollection} of
- * records from JAXB-annotated classes to a single file location.
- *
- * <p>Given a PCollection containing records of type T that can be marshalled to XML elements, this
- * Sink will produce a single file consisting of a single root element that contains all of the
- * elements in the PCollection.
- *
- * <p>XML Sinks are created with a base filename to write to, a root element name that will be used
- * for the root element of the output files, and a class to bind to an XML element. This class
- * will be used in the marshalling of records in an input PCollection to their XML representation
- * and must be able to be bound using JAXB annotations (checked at pipeline construction time).
- *
- * <p>XML Sinks can be written to using the {@link Write} transform:
- *
- * <pre>
- * p.apply(Write.to(
- *      XmlSink.ofRecordClass(Type.class)
- *          .withRootElementName(root_element)
- *          .toFilenamePrefix(output_filename)));
- * </pre>
- *
- * <p>For example, consider the following class with JAXB annotations:
- *
- * <pre>
- *  {@literal @}XmlRootElement(name = "word_count_result")
- *  {@literal @}XmlType(propOrder = {"word", "frequency"})
- *  public class WordFrequency {
- *    private String word;
- *    private long frequency;
- *
- *    public WordFrequency() { }
- *
- *    public WordFrequency(String word, long frequency) {
- *      this.word = word;
- *      this.frequency = frequency;
- *    }
- *
- *    public void setWord(String word) {
- *      this.word = word;
- *    }
- *
- *    public void setFrequency(long frequency) {
- *      this.frequency = frequency;
- *    }
- *
- *    public long getFrequency() {
- *      return frequency;
- *    }
- *
- *    public String getWord() {
- *      return word;
- *    }
- *  }
- * </pre>
- *
- * <p>The following will produce XML output with a root element named "words" from a PCollection of
- * WordFrequency objects:
- * <pre>
- * p.apply(Write.to(
- *  XmlSink.ofRecordClass(WordFrequency.class)
- *      .withRootElement("words")
- *      .toFilenamePrefix(output_file)));
- * </pre>
- *
- * <p>The output of which will look like:
- * <pre>
- * {@code
- * <words>
- *
- *  <word_count_result>
- *    <word>decreased</word>
- *    <frequency>1</frequency>
- *  </word_count_result>
- *
- *  <word_count_result>
- *    <word>War</word>
- *    <frequency>4</frequency>
- *  </word_count_result>
- *
- *  <word_count_result>
- *    <word>empress'</word>
- *    <frequency>14</frequency>
- *  </word_count_result>
- *
- *  <word_count_result>
- *    <word>stoops</word>
- *    <frequency>6</frequency>
- *  </word_count_result>
- *
- *  ...
- * </words>
- * }</pre>
- */
-// CHECKSTYLE.ON: JavadocStyle
-@SuppressWarnings("checkstyle:javadocstyle")
-public class XmlSink {
+/** Implementation of {@link XmlIO#write}. */
+class XmlSink<T> extends FileBasedSink<T> {
   protected static final String XML_EXTENSION = "xml";
 
-  /**
-   * Returns a builder for an XmlSink. You'll need to configure the class to bind, the root
-   * element name, and the output file prefix with {@link Bound#ofRecordClass}, {@link
-   * Bound#withRootElement}, and {@link Bound#toFilenamePrefix}, respectively.
-   */
-  public static Bound<?> write() {
-    return new Bound<>(null, null, null);
+  private final XmlIO.Write<T> spec;
+
+  XmlSink(XmlIO.Write<T> spec) {
+    super(spec.getFilenamePrefix(), XML_EXTENSION);
+    this.spec = spec;
   }
 
   /**
-   * Returns an XmlSink that writes objects as XML entities.
-   *
-   * <p>Output files will have the name {@literal {baseOutputFilename}-0000i-of-0000n.xml} where n
-   * is the number of output bundles.
-   *
-   * @param klass the class of the elements to write.
-   * @param rootElementName the enclosing root element.
-   * @param baseOutputFilename the output filename prefix.
+   * Validates that the root element, class to bind to a JAXB context, and filenamePrefix have
+   * been set and that the class can be bound in a JAXB context.
    */
-  public static <T> Bound<T> writeOf(
-      Class<T> klass, String rootElementName, String baseOutputFilename) {
-    return new Bound<>(klass, rootElementName, baseOutputFilename);
+  @Override
+  public void validate(PipelineOptions options) {
+    spec.validate(null);
   }
 
   /**
-   * A {@link FileBasedSink} that writes objects as XML elements.
+   * Creates an {@link XmlWriteOperation}.
    */
-  public static class Bound<T> extends FileBasedSink<T> {
-    final Class<T> classToBind;
-    final String rootElementName;
-
-    private Bound(Class<T> classToBind, String rootElementName, String baseOutputFilename) {
-      super(baseOutputFilename, XML_EXTENSION);
-      this.classToBind = classToBind;
-      this.rootElementName = rootElementName;
-    }
-
-    /**
-     * Returns an XmlSink that writes objects of the class specified as XML elements.
-     *
-     * <p>The specified class must be able to be used to create a JAXB context.
-     */
-    public <T> Bound<T> ofRecordClass(Class<T> classToBind) {
-      return new Bound<>(classToBind, rootElementName, getBaseOutputFilenameProvider().get());
-    }
-
-    /**
-     * Returns an XmlSink that writes to files with the given prefix.
-     *
-     * <p>Output files will have the name {@literal {filenamePrefix}-0000i-of-0000n.xml} where n is
-     * the number of output bundles.
-     */
-    public Bound<T> toFilenamePrefix(String baseOutputFilename) {
-      return new Bound<>(classToBind, rootElementName, baseOutputFilename);
-    }
-
-    /**
-     * Returns an XmlSink that writes XML files with an enclosing root element of the
-     * supplied name.
-     */
-    public Bound<T> withRootElement(String rootElementName) {
-      return new Bound<>(classToBind, rootElementName, getBaseOutputFilenameProvider().get());
-    }
-
-    /**
-     * Validates that the root element, class to bind to a JAXB context, and filenamePrefix have
-     * been set and that the class can be bound in a JAXB context.
-     */
-    @Override
-    public void validate(PipelineOptions options) {
-      checkNotNull(classToBind, "Missing a class to bind to a JAXB context.");
-      checkNotNull(rootElementName, "Missing a root element name.");
-      checkNotNull(getBaseOutputFilenameProvider().get(), "Missing a filename to write to.");
-      try {
-        JAXBContext.newInstance(classToBind);
-      } catch (JAXBException e) {
-        throw new RuntimeException("Error binding classes to a JAXB Context.", e);
-      }
-    }
+  @Override
+  public XmlWriteOperation<T> createWriteOperation(PipelineOptions options) {
+    return new XmlWriteOperation<>(this);
+  }
 
-    /**
-     * Creates an {@link XmlWriteOperation}.
-     */
-    @Override
-    public XmlWriteOperation<T> createWriteOperation(PipelineOptions options) {
-      return new XmlWriteOperation<>(this);
-    }
+  @Override
+  public void populateDisplayData(DisplayData.Builder builder) {
+    spec.populateDisplayData(builder);
+  }
 
-    @Override
-    public void populateDisplayData(DisplayData.Builder builder) {
-      super.populateDisplayData(builder);
-      builder
-          .addIfNotNull(DisplayData.item("rootElement", rootElementName)
-            .withLabel("XML Root Element"))
-          .addIfNotNull(DisplayData.item("recordClass", classToBind)
-            .withLabel("XML Record Class"));
-    }
+  void populateFileBasedDisplayData(DisplayData.Builder builder) {
+    super.populateDisplayData(builder);
   }
 
   /**
    * {@link Sink.WriteOperation} for XML {@link Sink}s.
    */
   protected static final class XmlWriteOperation<T> extends FileBasedWriteOperation<T> {
-    public XmlWriteOperation(XmlSink.Bound<T> sink) {
+    public XmlWriteOperation(XmlSink<T> sink) {
       super(sink);
     }
 
@@ -247,7 +79,7 @@ public class XmlSink {
     public XmlWriter<T> createWriter(PipelineOptions options) throws Exception {
       JAXBContext context;
       Marshaller marshaller;
-      context = JAXBContext.newInstance(getSink().classToBind);
+      context = JAXBContext.newInstance(getSink().spec.getRecordClass());
       marshaller = context.createMarshaller();
       marshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, Boolean.TRUE);
       marshaller.setProperty(Marshaller.JAXB_FRAGMENT, Boolean.TRUE);
@@ -259,8 +91,8 @@ public class XmlSink {
      * Return the XmlSink.Bound for this write operation.
      */
     @Override
-    public XmlSink.Bound<T> getSink() {
-      return (XmlSink.Bound<T>) super.getSink();
+    public XmlSink<T> getSink() {
+      return (XmlSink<T>) super.getSink();
     }
   }
 
@@ -289,7 +121,7 @@ public class XmlSink {
      */
     @Override
     protected void writeHeader() throws Exception {
-      String rootElementName = getWriteOperation().getSink().rootElementName;
+      String rootElementName = getWriteOperation().getSink().spec.getRootElement();
       os.write(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "<" + rootElementName + ">\n"));
     }
 
@@ -298,7 +130,7 @@ public class XmlSink {
      */
     @Override
     protected void writeFooter() throws Exception {
-      String rootElementName = getWriteOperation().getSink().rootElementName;
+      String rootElementName = getWriteOperation().getSink().spec.getRootElement();
       os.write(CoderUtils.encodeToByteArray(StringUtf8Coder.of(), "\n</" + rootElementName + ">"));
     }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/d0c0a60c/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSource.java
index 6bf2015..7416c85 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/XmlSource.java
@@ -17,8 +17,6 @@
  */
 package org.apache.beam.sdk.io;
 
-import static com.google.common.base.Preconditions.checkNotNull;
-
 import java.io.ByteArrayInputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
@@ -45,154 +43,29 @@ import javax.xml.stream.XMLStreamReader;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.JAXBCoder;
 import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.runners.PipelineRunner;
 import org.apache.beam.sdk.transforms.display.DisplayData;
-import org.apache.beam.sdk.values.PCollection;
 import org.codehaus.stax2.XMLInputFactory2;
 
-// CHECKSTYLE.OFF: JavadocStyle
-/**
- * A source that can be used to read XML files. This source reads one or more
- * XML files and creates a {@link PCollection} of a given type. A {@link Read} transform can be
- * created by passing an {@link XmlSource} object to {@link Read#from}. Please note the
- * example given below.
- *
- * <p>The XML file must be of the following form, where {@code root} and {@code record} are XML
- * element names that are defined by the user:
- *
- * <pre>
- * {@code
- * <root>
- * <record> ... </record>
- * <record> ... </record>
- * <record> ... </record>
- * ...
- * <record> ... </record>
- * </root>
- * }
- * </pre>
- *
- * <p>Basically, the XML document should contain a single root element with an inner list consisting
- * entirely of record elements. The records may contain arbitrary XML content; however, that content
- * <b>must not</b> contain the start {@code <record>} or end {@code </record>} tags. This
- * restriction enables reading from large XML files in parallel from different offsets in the file.
- *
- * <p>Root and/or record elements may additionally contain an arbitrary number of XML attributes.
- * Additionally users must provide a class of a JAXB annotated Java type that can be used convert
- * records into Java objects and vice versa using JAXB marshalling/unmarshalling mechanisms. Reading
- * the source will generate a {@code PCollection} of the given JAXB annotated Java type.
- * Optionally users may provide a minimum size of a bundle that should be created for the source.
- *
- * <p>The following example shows how to read from {@link XmlSource} in a Beam pipeline:
- *
- * <pre>
- * {@code
- * XmlSource<String> source = XmlSource.<String>from(file.toPath().toString())
- *     .withRootElement("root")
- *     .withRecordElement("record")
- *     .withRecordClass(Record.class);
- * PCollection<String> output = p.apply(Read.from(source));
- * }
- * </pre>
- *
- * <p>Currently, only XML files that use single-byte characters are supported. Using a file that
- * contains multi-byte characters may result in data loss or duplication.
- *
- * <p>To use {@link XmlSource}:
- * <ol>
- *   <li>Explicitly declare a dependency on org.codehaus.woodstox:stax2-api</li>
- *   <li>Include a compatible implementation on the classpath at run-time,
- *       such as org.codehaus.woodstox:woodstox-core-asl</li>
- * </ol>
- *
- * <p>These dependencies have been declared as optional in the sdks/java/core/pom.xml file of
- * Apache Beam.
- *
- * <h3>Permissions</h3>
- * Permission requirements depend on the
- * {@link org.apache.beam.sdk.runners.PipelineRunner PipelineRunner} that is
- * used to execute the Beam pipeline. Please refer to the documentation of corresponding
- * {@link PipelineRunner PipelineRunners} for more details.
- *
- * @param <T> Type of the objects that represent the records of the XML file. The
- *        {@code PCollection} generated by this source will be of this type.
- */
-// CHECKSTYLE.ON: JavadocStyle
+/** Implementation of {@link XmlIO#read}. */
 public class XmlSource<T> extends FileBasedSource<T> {
 
   private static final String XML_VERSION = "1.1";
-  private static final int DEFAULT_MIN_BUNDLE_SIZE = 8 * 1024;
-  private final String rootElement;
-  private final String recordElement;
-  private final Class<T> recordClass;
-
-  /**
-   * Creates an XmlSource for a single XML file or a set of XML files defined by a Java "glob" file
-   * pattern. Each XML file should be of the form defined in {@link XmlSource}.
-   */
-  public static <T> XmlSource<T> from(String fileOrPatternSpec) {
-    return new XmlSource<>(fileOrPatternSpec, DEFAULT_MIN_BUNDLE_SIZE, null, null, null);
-  }
-
-  /**
-   * Sets name of the root element of the XML document. This will be used to create a valid starting
-   * root element when initiating a bundle of records created from an XML document. This is a
-   * required parameter.
-   */
-  public XmlSource<T> withRootElement(String rootElement) {
-    return new XmlSource<>(
-        getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
-  }
 
-  /**
-   * Sets name of the record element of the XML document. This will be used to determine offset of
-   * the first record of a bundle created from the XML document. This is a required parameter.
-   */
-  public XmlSource<T> withRecordElement(String recordElement) {
-    return new XmlSource<>(
-        getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
-  }
+  private final XmlIO.Read<T> spec;
 
-  /**
-   * Sets a JAXB annotated class that can be populated using a record of the provided XML file. This
-   * will be used when unmarshalling record objects from the XML file.  This is a required
-   * parameter.
-   */
-  public XmlSource<T> withRecordClass(Class<T> recordClass) {
-    return new XmlSource<>(
-        getFileOrPatternSpec(), getMinBundleSize(), rootElement, recordElement, recordClass);
+  XmlSource(XmlIO.Read<T> spec) {
+    super(spec.getFileOrPatternSpec(), spec.getMinBundleSize());
+    this.spec = spec;
   }
 
-  /**
-   * Sets a parameter {@code minBundleSize} for the minimum bundle size of the source. Please refer
-   * to {@link OffsetBasedSource} for the definition of minBundleSize.  This is an optional
-   * parameter.
-   */
-  public XmlSource<T> withMinBundleSize(long minBundleSize) {
-    return new XmlSource<>(
-        getFileOrPatternSpec(), minBundleSize, rootElement, recordElement, recordClass);
-  }
-
-  private XmlSource(String fileOrPattern, long minBundleSize, String rootElement,
-      String recordElement, Class<T> recordClass) {
-    super(fileOrPattern, minBundleSize);
-    this.rootElement = rootElement;
-    this.recordElement = recordElement;
-    this.recordClass = recordClass;
-  }
-
-  private XmlSource(String fileOrPattern, long minBundleSize, long startOffset, long endOffset,
-      String rootElement, String recordElement, Class<T> recordClass) {
-    super(fileOrPattern, minBundleSize, startOffset, endOffset);
-    this.rootElement = rootElement;
-    this.recordElement = recordElement;
-    this.recordClass = recordClass;
+  private XmlSource(XmlIO.Read<T> spec, long startOffset, long endOffset) {
+    super(spec.getFileOrPatternSpec(), spec.getMinBundleSize(), startOffset, endOffset);
+    this.spec = spec;
   }
 
   @Override
   protected FileBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end) {
-    return new XmlSource<T>(
-        fileName, getMinBundleSize(), start, end, rootElement, recordElement, recordClass);
+    return new XmlSource<T>(spec.from(fileName), start, end);
   }
 
   @Override
@@ -203,42 +76,17 @@ public class XmlSource<T> extends FileBasedSource<T> {
   @Override
   public void validate() {
     super.validate();
-    checkNotNull(
-        rootElement, "rootElement is null. Use builder method withRootElement() to set this.");
-    checkNotNull(
-        recordElement,
-        "recordElement is null. Use builder method withRecordElement() to set this.");
-    checkNotNull(
-        recordClass, "recordClass is null. Use builder method withRecordClass() to set this.");
+    spec.validate(null);
   }
 
   @Override
   public void populateDisplayData(DisplayData.Builder builder) {
-    super.populateDisplayData(builder);
-    builder
-        .addIfNotNull(DisplayData.item("rootElement", rootElement)
-          .withLabel("XML Root Element"))
-        .addIfNotNull(DisplayData.item("recordElement", recordElement)
-          .withLabel("XML Record Element"))
-        .addIfNotNull(DisplayData.item("recordClass", recordClass)
-          .withLabel("XML Record Class"));
+    spec.populateDisplayData(builder);
   }
 
   @Override
   public Coder<T> getDefaultOutputCoder() {
-    return JAXBCoder.of(recordClass);
-  }
-
-  public String getRootElement() {
-    return rootElement;
-  }
-
-  public String getRecordElement() {
-    return recordElement;
-  }
-
-  public Class<T> getRecordClass() {
-    return recordClass;
+    return JAXBCoder.of(spec.getRecordClass());
   }
 
   /**
@@ -289,7 +137,7 @@ public class XmlSource<T> extends FileBasedSource<T> {
 
       // Set up a JAXB Unmarshaller that can be used to unmarshall record objects.
       try {
-        JAXBContext jaxbContext = JAXBContext.newInstance(getCurrentSource().recordClass);
+        JAXBContext jaxbContext = JAXBContext.newInstance(getCurrentSource().spec.getRecordClass());
         jaxbUnmarshaller = jaxbContext.createUnmarshaller();
 
         // Throw errors if validation fails. JAXB by default ignores validation errors.
@@ -334,8 +182,10 @@ public class XmlSource<T> extends FileBasedSource<T> {
       // this XML parsing may fail or may produce incorrect results.
 
       byte[] dummyStartDocumentBytes =
-          ("<?xml version=\"" + XML_VERSION + "\" encoding=\"UTF-8\" ?>"
-              + "<" + getCurrentSource().rootElement + ">").getBytes(StandardCharsets.UTF_8);
+          (String.format(
+                  "<?xml version=\"%s\" encoding=\"UTF-8\" ?><%s>",
+                  XML_VERSION, getCurrentSource().spec.getRootElement()))
+              .getBytes(StandardCharsets.UTF_8);
       preambleByteBuffer.write(dummyStartDocumentBytes);
       // Gets the byte offset (in the input file) of the first record in ReadableByteChannel. This
       // method returns the offset and stores any bytes that should be used when creating the XML
@@ -383,7 +233,7 @@ public class XmlSource<T> extends FileBasedSource<T> {
 
       ByteBuffer buf = ByteBuffer.allocate(BUF_SIZE);
       byte[] recordStartBytes =
-          ("<" + getCurrentSource().recordElement).getBytes(StandardCharsets.UTF_8);
+          ("<" + getCurrentSource().spec.getRecordElement()).getBytes(StandardCharsets.UTF_8);
 
       outer: while (channel.read(buf) > 0) {
         buf.flip();
@@ -494,7 +344,7 @@ public class XmlSource<T> extends FileBasedSource<T> {
           int event = parser.next();
           if (event == XMLStreamConstants.START_ELEMENT) {
             String localName = parser.getLocalName();
-            if (localName.equals(getCurrentSource().recordElement)) {
+            if (localName.equals(getCurrentSource().spec.getRecordElement())) {
               break;
             }
           }
@@ -521,7 +371,8 @@ public class XmlSource<T> extends FileBasedSource<T> {
             return false;
           }
         }
-        JAXBElement<T> jb = jaxbUnmarshaller.unmarshal(parser, getCurrentSource().recordClass);
+        JAXBElement<T> jb =
+            jaxbUnmarshaller.unmarshal(parser, getCurrentSource().spec.getRecordClass());
         currentRecord = jb.getValue();
         return true;
       } catch (JAXBException | XMLStreamException e) {

http://git-wip-us.apache.org/repos/asf/beam/blob/d0c0a60c/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/display/DisplayData.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/display/DisplayData.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/display/DisplayData.java
index 669dc6d..3c4337b 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/display/DisplayData.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/display/DisplayData.java
@@ -778,6 +778,12 @@ public class DisplayData implements Serializable {
       visitedComponents.add(subComponent);
       visitedPathMap.put(path, subComponent);
       Class<?> namespace = subComponent.getClass();
+      // Common case: AutoValue classes such as AutoValue_FooIO_Read. It's more useful
+      // to show the user the FooIO.Read class, which is the direct superclass of the AutoValue
+      // generated class.
+      if (namespace.getSimpleName().startsWith("AutoValue_")) {
+        namespace = namespace.getSuperclass();
+      }
 
       Path prevPath = latestPath;
       Class<?> prevNs = latestNs;

http://git-wip-us.apache.org/repos/asf/beam/blob/d0c0a60c/sdks/java/core/src/test/java/org/apache/beam/sdk/io/XmlSinkTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/XmlSinkTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/XmlSinkTest.java
index 63b5d11..7f559d1 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/XmlSinkTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/XmlSinkTest.java
@@ -59,7 +59,6 @@ public class XmlSinkTest {
   @Rule
   public ExpectedException thrown = ExpectedException.none();
 
-  private Class<Bird> testClass = Bird.class;
   private String testRootElement = "testElement";
   private String testFilePrefix = "/path/to/testPrefix";
 
@@ -70,7 +69,12 @@ public class XmlSinkTest {
   public void testXmlWriter() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     XmlWriteOperation<Bird> writeOp =
-        XmlSink.writeOf(Bird.class, "birds", testFilePrefix).createWriteOperation(options);
+        XmlIO.<Bird>write()
+            .toFilenamePrefix(testFilePrefix)
+            .withRecordClass(Bird.class)
+            .withRootElement("birds")
+            .createSink()
+            .createWriteOperation(options);
     XmlWriter<Bird> writer = writeOp.createWriter(options);
 
     List<Bird> bundle =
@@ -85,51 +89,37 @@ public class XmlSinkTest {
    * Builder methods correctly initialize an XML Sink.
    */
   @Test
-  public void testBuildXmlSink() {
-    XmlSink.Bound<Bird> sink =
-        XmlSink.write()
+  public void testBuildXmlWriteTransform() {
+    XmlIO.Write<Bird> write =
+        XmlIO.<Bird>write()
             .toFilenamePrefix(testFilePrefix)
-            .ofRecordClass(testClass)
+            .withRecordClass(Bird.class)
             .withRootElement(testRootElement);
-    assertEquals(testClass, sink.classToBind);
-    assertEquals(testRootElement, sink.rootElementName);
-    assertEquals(testFilePrefix, sink.getBaseOutputFilenameProvider().get());
+    assertEquals(Bird.class, write.getRecordClass());
+    assertEquals(testRootElement, write.getRootElement());
+    assertEquals(testFilePrefix, write.getFilenamePrefix());
   }
 
-  /**
-   * Alternate builder method correctly initializes an XML Sink.
-   */
+  /** Validation ensures no fields are missing. */
   @Test
-  public void testBuildXmlSinkDirect() {
-    XmlSink.Bound<Bird> sink =
-        XmlSink.writeOf(Bird.class, testRootElement, testFilePrefix);
-    assertEquals(testClass, sink.classToBind);
-    assertEquals(testRootElement, sink.rootElementName);
-    assertEquals(testFilePrefix, sink.getBaseOutputFilenameProvider().get());
+  public void testValidateXmlSinkMissingRecordClass() {
+    thrown.expect(NullPointerException.class);
+    XmlIO.<Bird>write()
+        .withRootElement(testRootElement)
+        .toFilenamePrefix(testFilePrefix)
+        .validate(null);
   }
 
-  /**
-   * Validation ensures no fields are missing.
-   */
   @Test
-  public void testValidateXmlSinkMissingFields() {
-    XmlSink.Bound<Bird> sink;
-    sink = XmlSink.writeOf(null, testRootElement, testFilePrefix);
-    validateAndFailIfSucceeds(sink, NullPointerException.class);
-    sink = XmlSink.writeOf(testClass, null, testFilePrefix);
-    validateAndFailIfSucceeds(sink, NullPointerException.class);
-    sink = XmlSink.writeOf(testClass, testRootElement, null);
-    validateAndFailIfSucceeds(sink, NullPointerException.class);
+  public void testValidateXmlSinkMissingRootElement() {
+    thrown.expect(NullPointerException.class);
+    XmlIO.<Bird>write().withRecordClass(Bird.class).toFilenamePrefix(testFilePrefix).validate(null);
   }
 
-  /**
-   * Call validate and fail if validation does not throw the expected exception.
-   */
-  private <T> void validateAndFailIfSucceeds(
-      XmlSink.Bound<T> sink, Class<? extends Exception> expected) {
-    thrown.expect(expected);
-    PipelineOptions options = PipelineOptionsFactory.create();
-    sink.validate(options);
+  @Test
+  public void testValidateXmlSinkMissingFilePrefix() {
+    thrown.expect(NullPointerException.class);
+    XmlIO.<Bird>write().withRecordClass(Bird.class).withRootElement(testRootElement).validate(null);
   }
 
   /**
@@ -138,13 +128,13 @@ public class XmlSinkTest {
   @Test
   public void testCreateWriteOperations() {
     PipelineOptions options = PipelineOptionsFactory.create();
-    XmlSink.Bound<Bird> sink =
-        XmlSink.writeOf(testClass, testRootElement, testFilePrefix);
+    XmlSink<Bird> sink =
+        XmlIO.<Bird>write()
+            .withRecordClass(Bird.class)
+            .withRootElement(testRootElement)
+            .toFilenamePrefix(testFilePrefix)
+            .createSink();
     XmlWriteOperation<Bird> writeOp = sink.createWriteOperation(options);
-    assertEquals(testClass, writeOp.getSink().classToBind);
-    assertEquals(testFilePrefix, writeOp.getSink().getBaseOutputFilenameProvider().get());
-    assertEquals(testRootElement, writeOp.getSink().rootElementName);
-   // assertEquals(XmlSink.XML_EXTENSION, writeOp.getSink().getFilenamePolicy().extension);
     Path outputPath = new File(testFilePrefix).toPath();
     Path tempPath = new File(writeOp.tempDirectory.get()).toPath();
     assertEquals(outputPath.getParent(), tempPath.getParent());
@@ -159,7 +149,11 @@ public class XmlSinkTest {
   public void testCreateWriter() throws Exception {
     PipelineOptions options = PipelineOptionsFactory.create();
     XmlWriteOperation<Bird> writeOp =
-        XmlSink.writeOf(testClass, testRootElement, testFilePrefix)
+        XmlIO.<Bird>write()
+            .withRecordClass(Bird.class)
+            .withRootElement(testRootElement)
+            .toFilenamePrefix(testFilePrefix)
+            .createSink()
             .createWriteOperation(options);
     XmlWriter<Bird> writer = writeOp.createWriter(options);
     Path outputPath = new File(testFilePrefix).toPath();
@@ -167,18 +161,17 @@ public class XmlSinkTest {
     assertEquals(outputPath.getParent(), tempPath.getParent());
     assertThat(
         tempPath.getFileName().toString(), containsString("temp-beam-" + outputPath.getFileName()));
-    assertEquals(testRootElement, writer.getWriteOperation().getSink().rootElementName);
     assertNotNull(writer.marshaller);
   }
 
   @Test
   public void testDisplayData() {
-    XmlSink.Bound<Integer> sink = XmlSink.write()
+    XmlIO.Write<Integer> write = XmlIO.<Integer>write()
         .toFilenamePrefix("foobar")
         .withRootElement("bird")
-        .ofRecordClass(Integer.class);
+        .withRecordClass(Integer.class);
 
-    DisplayData displayData = DisplayData.from(sink);
+    DisplayData displayData = DisplayData.from(write);
 
     assertThat(displayData, hasDisplayItem("fileNamePattern", "foobar-SSSSS-of-NNNNN.xml"));
     assertThat(displayData, hasDisplayItem("rootElement", "bird"));

http://git-wip-us.apache.org/repos/asf/beam/blob/d0c0a60c/sdks/java/core/src/test/java/org/apache/beam/sdk/io/XmlSourceTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/XmlSourceTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/XmlSourceTest.java
index 5f71f30..0120b8b 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/XmlSourceTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/XmlSourceTest.java
@@ -285,12 +285,14 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLTiny");
     Files.write(file.toPath(), tinyXML.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
+            .withMinBundleSize(1024)
+            .createSource();
 
     List<Train> expectedResults = ImmutableList.of(
         new Train("Thomas", Train.TRAIN_NUMBER_UNDEFINED, null, null),
@@ -308,12 +310,14 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLTiny");
     Files.write(file.toPath(), xmlWithMultiByteChars.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
+            .withMinBundleSize(1024)
+            .createSource();
 
     List<Train> expectedResults = ImmutableList.of(
         new Train("Thomas�", Train.TRAIN_NUMBER_UNDEFINED, null, null),
@@ -334,12 +338,14 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLTiny");
     Files.write(file.toPath(), xmlWithMultiByteElementName.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("\u0daf\u0dd4\u0db8\u0dca\u0dbb\u0dd2\u0dba\u0db1\u0dca")
             .withRecordElement("\u0daf\u0dd4\u0db8\u0dca\u0dbb\u0dd2\u0dba")
             .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
+            .withMinBundleSize(1024)
+            .createSource();
 
     List<Train> expectedResults = ImmutableList.of(
         new Train("Thomas", Train.TRAIN_NUMBER_UNDEFINED, null, null),
@@ -357,18 +363,20 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLTiny");
     Files.write(file.toPath(), tinyXML.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
-            .withMinBundleSize(10);
-    List<? extends FileBasedSource<Train>> splits = source.split(50, null);
+            .withMinBundleSize(10)
+            .createSource();
+    List<? extends BoundedSource<Train>> splits = source.split(50, null);
 
     assertTrue(splits.size() > 2);
 
     List<Train> results = new ArrayList<>();
-    for (FileBasedSource<Train> split : splits) {
+    for (BoundedSource<Train> split : splits) {
       results.addAll(readEverythingFromReader(split.createReader(null)));
     }
 
@@ -394,12 +402,14 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
+            .withMinBundleSize(1024)
+            .createSource();
 
     List<Train> expectedResults =
         ImmutableList.of(new Train("Thomas", 1, "blue", null), new Train("Henry", 3, "green", null),
@@ -417,10 +427,12 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRecordElement("train")
-            .withRecordClass(Train.class);
+            .withRecordClass(Train.class)
+            .createSource();
 
     exception.expect(NullPointerException.class);
     exception.expectMessage(
@@ -433,10 +445,12 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
-            .withRecordClass(Train.class);
+            .withRecordClass(Train.class)
+            .createSource();
 
     exception.expect(NullPointerException.class);
     exception.expectMessage(
@@ -449,10 +463,12 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
-            .withRecordElement("train");
+            .withRecordElement("train")
+            .createSource();
 
     exception.expect(NullPointerException.class);
     exception.expectMessage(
@@ -465,11 +481,13 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("something")
             .withRecordElement("train")
-            .withRecordClass(Train.class);
+            .withRecordClass(Train.class)
+            .createSource();
 
     exception.expectMessage("Unexpected close tag </trains>; expected </something>.");
     readEverythingFromReader(source.createReader(null));
@@ -480,11 +498,13 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("something")
-            .withRecordClass(Train.class);
+            .withRecordClass(Train.class)
+            .createSource();
 
     assertEquals(readEverythingFromReader(source.createReader(null)), new ArrayList<Train>());
   }
@@ -500,11 +520,13 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<WrongTrainType> source =
-        XmlSource.<WrongTrainType>from(file.toPath().toString())
+    BoundedSource<WrongTrainType> source =
+        XmlIO.<WrongTrainType>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
-            .withRecordClass(WrongTrainType.class);
+            .withRecordClass(WrongTrainType.class)
+            .createSource();
 
     exception.expect(RuntimeException.class);
 
@@ -525,11 +547,13 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
-            .withRecordClass(Train.class);
+            .withRecordClass(Train.class)
+            .createSource();
 
     List<Train> expectedResults =
         ImmutableList.of(new Train("Thomas", 1, "blue", null), new Train("Henry", 3, "green", null),
@@ -548,12 +572,14 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXMLWithEmptyTags.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
+            .withMinBundleSize(1024)
+            .createSource();
 
     List<Train> expectedResults = ImmutableList.of(new Train("Thomas", 1, "blue", null),
         new Train("Henry", 3, "green", null), new Train("Toby", 7, "brown", null),
@@ -572,14 +598,15 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXML.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
-            .withRootElement("trains")
-            .withRecordElement("train")
-            .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
-
-    PCollection<Train> output = p.apply("ReadFileData", Read.from(source));
+    PCollection<Train> output =
+        p.apply(
+            "ReadFileData",
+            XmlIO.<Train>read()
+                .from(file.toPath().toString())
+                .withRootElement("trains")
+                .withRecordElement("train")
+                .withRecordClass(Train.class)
+                .withMinBundleSize(1024));
 
     List<Train> expectedResults =
         ImmutableList.of(new Train("Thomas", 1, "blue", null), new Train("Henry", 3, "green", null),
@@ -595,12 +622,14 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXMLWithAttributes.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
+            .withMinBundleSize(1024)
+            .createSource();
 
     List<Train> expectedResults = ImmutableList.of(new Train("Thomas", 1, "blue", "small"),
         new Train("Henry", 3, "green", "big"), new Train("Toby", 7, "brown", "small"),
@@ -618,12 +647,14 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXMLWithSpaces.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
+            .withMinBundleSize(1024)
+            .createSource();
 
     List<Train> expectedResults = ImmutableList.of(new Train("Thomas   ", 1, "blue", null),
         new Train("Henry", 3, "green", null), new Train("Toby", 7, "  brown  ", null),
@@ -642,12 +673,14 @@ public class XmlSourceTest {
     List<Train> trains = generateRandomTrainList(100);
     File file = createRandomTrainXML(fileName, trains);
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
+            .withMinBundleSize(1024)
+            .createSource();
 
     assertThat(
         trainsToStrings(trains),
@@ -662,13 +695,15 @@ public class XmlSourceTest {
     List<Train> trains = generateRandomTrainList(100);
     File file = createRandomTrainXML(fileName, trains);
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
-            .withRootElement("trains")
-            .withRecordElement("train")
-            .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
-    PCollection<Train> output = p.apply("ReadFileData", Read.from(source));
+    PCollection<Train> output =
+        p.apply(
+            "ReadFileData",
+            XmlIO.<Train>read()
+                .from(file.toPath().toString())
+                .withRootElement("trains")
+                .withRecordElement("train")
+                .withRecordClass(Train.class)
+                .withMinBundleSize(1024));
 
     PAssert.that(output).containsInAnyOrder(trains);
     p.run();
@@ -680,18 +715,20 @@ public class XmlSourceTest {
     List<Train> trains = generateRandomTrainList(10);
     File file = createRandomTrainXML(fileName, trains);
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
-            .withMinBundleSize(10);
-    List<? extends FileBasedSource<Train>> splits = source.split(100, null);
+            .withMinBundleSize(10)
+            .createSource();
+    List<? extends BoundedSource<Train>> splits = source.split(100, null);
 
     assertTrue(splits.size() > 2);
 
     List<Train> results = new ArrayList<>();
-    for (FileBasedSource<Train> split : splits) {
+    for (BoundedSource<Train> split : splits) {
       results.addAll(readEverythingFromReader(split.createReader(null)));
     }
 
@@ -704,19 +741,21 @@ public class XmlSourceTest {
     List<Train> trains = generateRandomTrainList(100);
     File file = createRandomTrainXML(fileName, trains);
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
-            .withMinBundleSize(10);
-    List<? extends FileBasedSource<Train>> splits = source.split(256, null);
+            .withMinBundleSize(10)
+            .createSource();
+    List<? extends BoundedSource<Train>> splits = source.split(256, null);
 
     // Not a trivial split
     assertTrue(splits.size() > 2);
 
     List<Train> results = new ArrayList<>();
-    for (FileBasedSource<Train> split : splits) {
+    for (BoundedSource<Train> split : splits) {
       results.addAll(readEverythingFromReader(split.createReader(null)));
     }
     assertThat(trainsToStrings(trains), containsInAnyOrder(trainsToStrings(results).toArray()));
@@ -729,14 +768,16 @@ public class XmlSourceTest {
     List<Train> trains = generateRandomTrainList(100);
     File file = createRandomTrainXML(fileName, trains);
 
-    XmlSource<Train> fileSource =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> fileSource =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
             .withRecordClass(Train.class)
-            .withMinBundleSize(10);
+            .withMinBundleSize(10)
+            .createSource();
 
-    List<? extends FileBasedSource<Train>> splits =
+    List<? extends BoundedSource<Train>> splits =
         fileSource.split(file.length() / 3, null);
     for (BoundedSource<Train> splitSource : splits) {
       int numItems = readEverythingFromReader(splitSource.createReader(null)).size();
@@ -771,11 +812,13 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXMLWithAllFeaturesSingleByte.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("trains")
             .withRecordElement("train")
-            .withRecordClass(Train.class);
+            .withRecordClass(Train.class)
+            .createSource();
     assertSplitAtFractionExhaustive(source, options);
   }
 
@@ -788,11 +831,13 @@ public class XmlSourceTest {
     File file = tempFolder.newFile("trainXMLSmall");
     Files.write(file.toPath(), trainXMLWithAllFeaturesMultiByte.getBytes(StandardCharsets.UTF_8));
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.toPath().toString())
+    BoundedSource<Train> source =
+        XmlIO.<Train>read()
+            .from(file.toPath().toString())
             .withRootElement("\u0daf\u0dd4\u0db8\u0dca\u0dbb\u0dd2\u0dba\u0db1\u0dca")
             .withRecordElement("\u0daf\u0dd4\u0db8\u0dca\u0dbb\u0dd2\u0dba")
-            .withRecordClass(Train.class);
+            .withRecordClass(Train.class)
+            .createSource();
     assertSplitAtFractionExhaustive(source, options);
   }
 
@@ -808,13 +853,15 @@ public class XmlSourceTest {
     generateRandomTrainList(8);
     createRandomTrainXML("otherfile.xml", trains1);
 
-    XmlSource<Train> source =
-        XmlSource.<Train>from(file.getParent() + "/" + "temp*.xml")
-            .withRootElement("trains")
-            .withRecordElement("train")
-            .withRecordClass(Train.class)
-            .withMinBundleSize(1024);
-    PCollection<Train> output = p.apply("ReadFileData", Read.from(source));
+    PCollection<Train> output =
+        p.apply(
+            "ReadFileData",
+            XmlIO.<Train>read()
+                .from(file.getParent() + "/" + "temp*.xml")
+                .withRootElement("trains")
+                .withRecordElement("train")
+                .withRecordClass(Train.class)
+                .withMinBundleSize(1024));
 
     List<Train> expectedResults = new ArrayList<>();
     expectedResults.addAll(trains1);
@@ -827,15 +874,14 @@ public class XmlSourceTest {
 
   @Test
   public void testDisplayData() {
-
-
-    XmlSource<?> source = XmlSource
-        .<Integer>from("foo.xml")
-        .withRootElement("bird")
-        .withRecordElement("cat")
-        .withMinBundleSize(1234)
-        .withRecordClass(Integer.class);
-    DisplayData displayData = DisplayData.from(source);
+    DisplayData displayData =
+        DisplayData.from(
+            XmlIO.<Integer>read()
+                .from("foo.xml")
+                .withRootElement("bird")
+                .withRecordElement("cat")
+                .withMinBundleSize(1234)
+                .withRecordClass(Integer.class));
 
     assertThat(displayData, hasDisplayItem("filePattern", "foo.xml"));
     assertThat(displayData, hasDisplayItem("rootElement", "bird"));

http://git-wip-us.apache.org/repos/asf/beam/blob/d0c0a60c/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/display/DisplayDataTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/display/DisplayDataTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/display/DisplayDataTest.java
index c617f06..9b24b69 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/display/DisplayDataTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/display/DisplayDataTest.java
@@ -44,8 +44,10 @@ import static org.junit.Assert.fail;
 
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.auto.value.AutoValue;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.ImmutableMultimap;
+import com.google.common.collect.Iterables;
 import com.google.common.collect.Multimap;
 import com.google.common.testing.EqualsTester;
 import java.io.IOException;
@@ -1299,6 +1301,21 @@ public class DisplayDataTest implements Serializable {
     DisplayData.from(component);
   }
 
+  @AutoValue
+  abstract static class Foo implements HasDisplayData {
+    @Override
+    public void populateDisplayData(Builder builder) {
+      builder.add(DisplayData.item("someKey", "someValue"));
+    }
+  }
+
+  @Test
+  public void testAutoValue() {
+    DisplayData data = DisplayData.from(new AutoValue_DisplayDataTest_Foo());
+    Item item = Iterables.getOnlyElement(data.asMap().values());
+    assertEquals(Foo.class, item.getNamespace());
+  }
+
   private String quoted(Object obj) {
     return String.format("\"%s\"", obj);
   }

[19/50] [abbrv] beam git commit: This closes #2415

Posted by dh...@apache.org.

This closes #2415


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/57929fb8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/57929fb8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/57929fb8

Branch: refs/heads/DSL_SQL
Commit: 57929fb802d0cb6a6b6c3f14819d473dc2ace113
Parents: e0df7d8 7d13061
Author: Eugene Kirpichov <ki...@google.com>
Authored: Tue Apr 18 21:13:05 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 21:13:05 2017 -0700

----------------------------------------------------------------------
 .../apache/beam/sdk/util/IOChannelUtils.java    |    9 +
 .../sdk/io/gcp/bigquery/BatchLoadBigQuery.java  |  180 ---
 .../beam/sdk/io/gcp/bigquery/BatchLoads.java    |  225 +++
 .../sdk/io/gcp/bigquery/BigQueryHelpers.java    |   13 +
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    |  113 +-
 .../io/gcp/bigquery/BigQueryTableSource.java    |    4 +-
 .../beam/sdk/io/gcp/bigquery/CreateTables.java  |  127 ++
 .../io/gcp/bigquery/GenerateShardedTable.java   |   47 +
 .../beam/sdk/io/gcp/bigquery/PrepareWrite.java  |   81 +
 .../beam/sdk/io/gcp/bigquery/ShardedKey.java    |   25 +-
 .../sdk/io/gcp/bigquery/StreamWithDeDup.java    |   90 --
 .../sdk/io/gcp/bigquery/StreamingInserts.java   |   79 +
 .../sdk/io/gcp/bigquery/StreamingWriteFn.java   |   81 +-
 .../io/gcp/bigquery/StreamingWriteTables.java   |   86 ++
 .../sdk/io/gcp/bigquery/TableDestination.java   |   76 +
 .../io/gcp/bigquery/TableDestinationCoder.java  |   60 +
 .../sdk/io/gcp/bigquery/TableRowWriter.java     |   19 +-
 .../sdk/io/gcp/bigquery/TagWithUniqueIds.java   |   62 +
 .../gcp/bigquery/TagWithUniqueIdsAndTable.java  |  135 --
 .../beam/sdk/io/gcp/bigquery/WriteBundles.java  |   82 --
 .../io/gcp/bigquery/WriteBundlesToFiles.java    |  157 ++
 .../sdk/io/gcp/bigquery/WritePartition.java     |  163 +-
 .../beam/sdk/io/gcp/bigquery/WriteRename.java   |   71 +-
 .../beam/sdk/io/gcp/bigquery/WriteTables.java   |   58 +-
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     | 1393 +++++++-----------
 .../io/gcp/bigquery/FakeBigQueryServices.java   |  166 +++
 .../sdk/io/gcp/bigquery/FakeDatasetService.java |  208 +++
 .../sdk/io/gcp/bigquery/FakeJobService.java     |  395 +++++
 .../sdk/io/gcp/bigquery/TableContainer.java     |   61 +
 29 files changed, 2642 insertions(+), 1624 deletions(-)
----------------------------------------------------------------------

[05/50] [abbrv] beam git commit: Separates side input test and side output test

Posted by dh...@apache.org.

Separates side input test and side output test


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a51bdd26
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a51bdd26
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a51bdd26

Branch: refs/heads/DSL_SQL
Commit: a51bdd266f9c877cb407de986a465fc9c7de76ff
Parents: a9bcc8b
Author: Eugene Kirpichov <ki...@google.com>
Authored: Sat Apr 15 16:38:35 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Apr 18 18:02:06 2017 -0700

----------------------------------------------------------------------
 .../beam/sdk/transforms/SplittableDoFnTest.java | 63 ++++++++++++++------
 1 file changed, 44 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/a51bdd26/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
index 9e8c12e..30329f4 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
@@ -61,7 +62,7 @@ import org.junit.runners.JUnit4;
  * Tests for <a href="https://s.apache.org/splittable-do-fn>splittable</a> {@link DoFn} behavior.
  */
 @RunWith(JUnit4.class)
-public class SplittableDoFnTest {
+public class SplittableDoFnTest implements Serializable {
 
   static class PairStringWithIndexToLength extends DoFn<String, KV<String, Integer>> {
     @ProcessElement
@@ -216,22 +217,18 @@ public class SplittableDoFnTest {
     p.run();
   }
 
-  private static class SDFWithSideInputsAndOutputs extends DoFn<Integer, String> {
+  private static class SDFWithSideInput extends DoFn<Integer, String> {
     private final PCollectionView<String> sideInput;
-    private final TupleTag<String> additionalOutput;
 
-    private SDFWithSideInputsAndOutputs(
-        PCollectionView<String> sideInput, TupleTag<String> additionalOutput) {
+    private SDFWithSideInput(PCollectionView<String> sideInput) {
       this.sideInput = sideInput;
-      this.additionalOutput = additionalOutput;
     }
 
     @ProcessElement
     public void process(ProcessContext c, OffsetRangeTracker tracker) {
       checkState(tracker.tryClaim(tracker.currentRestriction().getFrom()));
       String side = c.sideInput(sideInput);
-      c.output("main:" + side + ":" + c.element());
-      c.output(additionalOutput, "additional:" + side + ":" + c.element());
+      c.output(side + ":" + c.element());
     }
 
     @GetInitialRestriction
@@ -242,27 +239,55 @@ public class SplittableDoFnTest {
 
   @Test
   @Category({ValidatesRunner.class, UsesSplittableParDo.class})
-  public void testSideInputsAndOutputs() throws Exception {
-
+  public void testSideInput() throws Exception {
     PCollectionView<String> sideInput =
         p.apply("side input", Create.of("foo")).apply(View.<String>asSingleton());
-    TupleTag<String> mainOutputTag = new TupleTag<>("main");
-    TupleTag<String> additionalOutputTag = new TupleTag<>("additional");
+
+    PCollection<String> res =
+        p.apply("input", Create.of(0, 1, 2))
+            .apply(ParDo.of(new SDFWithSideInput(sideInput)).withSideInputs(sideInput));
+
+    PAssert.that(res).containsInAnyOrder(Arrays.asList("foo:0", "foo:1", "foo:2"));
+
+    p.run();
+  }
+
+  private static class SDFWithAdditionalOutput extends DoFn<Integer, String> {
+    private final TupleTag<String> additionalOutput;
+
+    private SDFWithAdditionalOutput(TupleTag<String> additionalOutput) {
+      this.additionalOutput = additionalOutput;
+    }
+
+    @ProcessElement
+    public void process(ProcessContext c, OffsetRangeTracker tracker) {
+      checkState(tracker.tryClaim(tracker.currentRestriction().getFrom()));
+      c.output("main:" + c.element());
+      c.output(additionalOutput, "additional:" + c.element());
+    }
+
+    @GetInitialRestriction
+    public OffsetRange getInitialRestriction(Integer value) {
+      return new OffsetRange(0, 1);
+    }
+  }
+
+  @Test
+  @Category({ValidatesRunner.class, UsesSplittableParDo.class})
+  public void testAdditionalOutput() throws Exception {
+    TupleTag<String> mainOutputTag = new TupleTag<String>("main") {};
+    TupleTag<String> additionalOutputTag = new TupleTag<String>("additional") {};
 
     PCollectionTuple res =
         p.apply("input", Create.of(0, 1, 2))
             .apply(
-                ParDo.of(new SDFWithSideInputsAndOutputs(sideInput, additionalOutputTag))
-                    .withSideInputs(sideInput)
+                ParDo.of(new SDFWithAdditionalOutput(additionalOutputTag))
                     .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag)));
-    res.get(mainOutputTag).setCoder(StringUtf8Coder.of());
-    res.get(additionalOutputTag).setCoder(StringUtf8Coder.of());
 
     PAssert.that(res.get(mainOutputTag))
-        .containsInAnyOrder(Arrays.asList("main:foo:0", "main:foo:1", "main:foo:2"));
+        .containsInAnyOrder(Arrays.asList("main:0", "main:1", "main:2"));
     PAssert.that(res.get(additionalOutputTag))
-        .containsInAnyOrder(
-            Arrays.asList("additional:foo:0", "additional:foo:1", "additional:foo:2"));
+        .containsInAnyOrder(Arrays.asList("additional:0", "additional:1", "additional:2"));
 
     p.run();
   }

[26/50] [abbrv] beam git commit: [BEAM-1994] Remove Flink examples package

Posted by dh...@apache.org.

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkSplitStateInternals.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkSplitStateInternals.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkSplitStateInternals.java
new file mode 100644
index 0000000..2bf0bf1
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkSplitStateInternals.java
@@ -0,0 +1,260 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
+
+import com.google.common.collect.Iterators;
+import java.util.Collections;
+import org.apache.beam.runners.core.StateInternals;
+import org.apache.beam.runners.core.StateNamespace;
+import org.apache.beam.runners.core.StateTag;
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.CombineWithContext;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.util.state.BagState;
+import org.apache.beam.sdk.util.state.CombiningState;
+import org.apache.beam.sdk.util.state.MapState;
+import org.apache.beam.sdk.util.state.ReadableState;
+import org.apache.beam.sdk.util.state.SetState;
+import org.apache.beam.sdk.util.state.State;
+import org.apache.beam.sdk.util.state.StateContext;
+import org.apache.beam.sdk.util.state.StateContexts;
+import org.apache.beam.sdk.util.state.ValueState;
+import org.apache.beam.sdk.util.state.WatermarkHoldState;
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.state.ListStateDescriptor;
+import org.apache.flink.runtime.state.OperatorStateBackend;
+
+/**
+ * {@link StateInternals} that uses a Flink {@link OperatorStateBackend}
+ * to manage the split-distribute state.
+ *
+ * <p>Elements in ListState will be redistributed in round robin fashion
+ * to operators when restarting with a different parallelism.
+ *
+ *  <p>Note:
+ *  Ignore index of key and namespace.
+ *  Just implement BagState.
+ */
+public class FlinkSplitStateInternals<K> implements StateInternals<K> {
+
+  private final OperatorStateBackend stateBackend;
+
+  public FlinkSplitStateInternals(OperatorStateBackend stateBackend) {
+    this.stateBackend = stateBackend;
+  }
+
+  @Override
+  public K getKey() {
+    return null;
+  }
+
+  @Override
+  public <T extends State> T state(
+      final StateNamespace namespace,
+      StateTag<? super K, T> address) {
+
+    return state(namespace, address, StateContexts.nullContext());
+  }
+
+  @Override
+  public <T extends State> T state(
+      final StateNamespace namespace,
+      StateTag<? super K, T> address,
+      final StateContext<?> context) {
+
+    return address.bind(new StateTag.StateBinder<K>() {
+
+      @Override
+      public <T> ValueState<T> bindValue(
+          StateTag<? super K, ValueState<T>> address,
+          Coder<T> coder) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", ValueState.class.getSimpleName()));
+      }
+
+      @Override
+      public <T> BagState<T> bindBag(
+          StateTag<? super K, BagState<T>> address,
+          Coder<T> elemCoder) {
+
+        return new FlinkSplitBagState<>(stateBackend, address, namespace, elemCoder);
+      }
+
+      @Override
+      public <T> SetState<T> bindSet(
+          StateTag<? super K, SetState<T>> address,
+          Coder<T> elemCoder) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", SetState.class.getSimpleName()));
+      }
+
+      @Override
+      public <KeyT, ValueT> MapState<KeyT, ValueT> bindMap(
+          StateTag<? super K, MapState<KeyT, ValueT>> spec,
+          Coder<KeyT> mapKeyCoder, Coder<ValueT> mapValueCoder) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", MapState.class.getSimpleName()));
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT>
+      bindCombiningValue(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          Combine.CombineFn<InputT, AccumT, OutputT> combineFn) {
+        throw new UnsupportedOperationException("bindCombiningValue is not supported.");
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValue(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
+        throw new UnsupportedOperationException("bindKeyedCombiningValue is not supported.");
+
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValueWithContext(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          CombineWithContext.KeyedCombineFnWithContext<
+              ? super K, InputT, AccumT, OutputT> combineFn) {
+        throw new UnsupportedOperationException(
+            "bindKeyedCombiningValueWithContext is not supported.");
+      }
+
+      @Override
+      public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
+          StateTag<? super K, WatermarkHoldState<W>> address,
+          OutputTimeFn<? super W> outputTimeFn) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", CombiningState.class.getSimpleName()));
+      }
+    });
+  }
+
+  private static class FlinkSplitBagState<K, T> implements BagState<T> {
+
+    private final ListStateDescriptor<T> descriptor;
+    private OperatorStateBackend flinkStateBackend;
+    private final StateNamespace namespace;
+    private final StateTag<? super K, BagState<T>> address;
+
+    FlinkSplitBagState(
+        OperatorStateBackend flinkStateBackend,
+        StateTag<? super K, BagState<T>> address,
+        StateNamespace namespace,
+        Coder<T> coder) {
+      this.flinkStateBackend = flinkStateBackend;
+      this.namespace = namespace;
+      this.address = address;
+
+      CoderTypeInformation<T> typeInfo =
+          new CoderTypeInformation<>(coder);
+
+      descriptor = new ListStateDescriptor<>(address.getId(),
+          typeInfo.createSerializer(new ExecutionConfig()));
+    }
+
+    @Override
+    public void add(T input) {
+      try {
+        flinkStateBackend.getOperatorState(descriptor).add(input);
+      } catch (Exception e) {
+        throw new RuntimeException("Error updating state.", e);
+      }
+    }
+
+    @Override
+    public BagState<T> readLater() {
+      return this;
+    }
+
+    @Override
+    public Iterable<T> read() {
+      try {
+        Iterable<T> result = flinkStateBackend.getOperatorState(descriptor).get();
+        return result != null ? result : Collections.<T>emptyList();
+      } catch (Exception e) {
+        throw new RuntimeException("Error updating state.", e);
+      }
+    }
+
+    @Override
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            Iterable<T> result = flinkStateBackend.getOperatorState(descriptor).get();
+            // PartitionableListState.get() return empty collection When there is no element,
+            // KeyedListState different. (return null)
+            return result == null || Iterators.size(result.iterator()) == 0;
+          } catch (Exception e) {
+            throw new RuntimeException("Error reading state.", e);
+          }
+
+        }
+
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+      };
+    }
+
+    @Override
+    public void clear() {
+      try {
+        flinkStateBackend.getOperatorState(descriptor).clear();
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkSplitBagState<?, ?> that = (FlinkSplitBagState<?, ?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkStateInternals.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkStateInternals.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkStateInternals.java
new file mode 100644
index 0000000..4f961e5
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/FlinkStateInternals.java
@@ -0,0 +1,1053 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
+
+import com.google.common.collect.Lists;
+import java.nio.ByteBuffer;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.beam.runners.core.StateInternals;
+import org.apache.beam.runners.core.StateNamespace;
+import org.apache.beam.runners.core.StateTag;
+import org.apache.beam.runners.flink.translation.types.CoderTypeInformation;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.InstantCoder;
+import org.apache.beam.sdk.transforms.Combine;
+import org.apache.beam.sdk.transforms.CombineWithContext;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.OutputTimeFn;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.util.CombineContextFactory;
+import org.apache.beam.sdk.util.state.BagState;
+import org.apache.beam.sdk.util.state.CombiningState;
+import org.apache.beam.sdk.util.state.MapState;
+import org.apache.beam.sdk.util.state.ReadableState;
+import org.apache.beam.sdk.util.state.SetState;
+import org.apache.beam.sdk.util.state.State;
+import org.apache.beam.sdk.util.state.StateContext;
+import org.apache.beam.sdk.util.state.StateContexts;
+import org.apache.beam.sdk.util.state.ValueState;
+import org.apache.beam.sdk.util.state.WatermarkHoldState;
+import org.apache.flink.api.common.state.ListStateDescriptor;
+import org.apache.flink.api.common.state.ValueStateDescriptor;
+import org.apache.flink.api.common.typeutils.base.StringSerializer;
+import org.apache.flink.runtime.state.KeyedStateBackend;
+import org.joda.time.Instant;
+
+/**
+ * {@link StateInternals} that uses a Flink {@link KeyedStateBackend} to manage state.
+ *
+ * <p>Note: In the Flink streaming runner the key is always encoded
+ * using an {@link Coder} and stored in a {@link ByteBuffer}.
+ */
+public class FlinkStateInternals<K> implements StateInternals<K> {
+
+  private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
+  private Coder<K> keyCoder;
+
+  // on recovery, these will no be properly set because we don't
+  // know which watermark hold states there are in the Flink State Backend
+  private final Map<String, Instant> watermarkHolds = new HashMap<>();
+
+  public FlinkStateInternals(KeyedStateBackend<ByteBuffer> flinkStateBackend, Coder<K> keyCoder) {
+    this.flinkStateBackend = flinkStateBackend;
+    this.keyCoder = keyCoder;
+  }
+
+  /**
+   * Returns the minimum over all watermark holds.
+   */
+  public Instant watermarkHold() {
+    long min = Long.MAX_VALUE;
+    for (Instant hold: watermarkHolds.values()) {
+      min = Math.min(min, hold.getMillis());
+    }
+    return new Instant(min);
+  }
+
+  @Override
+  public K getKey() {
+    ByteBuffer keyBytes = flinkStateBackend.getCurrentKey();
+    try {
+      return CoderUtils.decodeFromByteArray(keyCoder, keyBytes.array());
+    } catch (CoderException e) {
+      throw new RuntimeException("Error decoding key.", e);
+    }
+  }
+
+  @Override
+  public <T extends State> T state(
+      final StateNamespace namespace,
+      StateTag<? super K, T> address) {
+
+    return state(namespace, address, StateContexts.nullContext());
+  }
+
+  @Override
+  public <T extends State> T state(
+      final StateNamespace namespace,
+      StateTag<? super K, T> address,
+      final StateContext<?> context) {
+
+    return address.bind(new StateTag.StateBinder<K>() {
+
+      @Override
+      public <T> ValueState<T> bindValue(
+          StateTag<? super K, ValueState<T>> address,
+          Coder<T> coder) {
+
+        return new FlinkValueState<>(flinkStateBackend, address, namespace, coder);
+      }
+
+      @Override
+      public <T> BagState<T> bindBag(
+          StateTag<? super K, BagState<T>> address,
+          Coder<T> elemCoder) {
+
+        return new FlinkBagState<>(flinkStateBackend, address, namespace, elemCoder);
+      }
+
+      @Override
+      public <T> SetState<T> bindSet(
+          StateTag<? super K, SetState<T>> address,
+          Coder<T> elemCoder) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", SetState.class.getSimpleName()));
+      }
+
+      @Override
+      public <KeyT, ValueT> MapState<KeyT, ValueT> bindMap(
+          StateTag<? super K, MapState<KeyT, ValueT>> spec,
+          Coder<KeyT> mapKeyCoder, Coder<ValueT> mapValueCoder) {
+        throw new UnsupportedOperationException(
+            String.format("%s is not supported", MapState.class.getSimpleName()));
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT>
+      bindCombiningValue(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          Combine.CombineFn<InputT, AccumT, OutputT> combineFn) {
+
+        return new FlinkCombiningState<>(
+            flinkStateBackend, address, combineFn, namespace, accumCoder);
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValue(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn) {
+        return new FlinkKeyedCombiningState<>(
+            flinkStateBackend,
+            address,
+            combineFn,
+            namespace,
+            accumCoder,
+            FlinkStateInternals.this);
+      }
+
+      @Override
+      public <InputT, AccumT, OutputT>
+      CombiningState<InputT, AccumT, OutputT> bindKeyedCombiningValueWithContext(
+          StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+          Coder<AccumT> accumCoder,
+          CombineWithContext.KeyedCombineFnWithContext<
+              ? super K, InputT, AccumT, OutputT> combineFn) {
+        return new FlinkCombiningStateWithContext<>(
+            flinkStateBackend,
+            address,
+            combineFn,
+            namespace,
+            accumCoder,
+            FlinkStateInternals.this,
+            CombineContextFactory.createFromStateContext(context));
+      }
+
+      @Override
+      public <W extends BoundedWindow> WatermarkHoldState<W> bindWatermark(
+          StateTag<? super K, WatermarkHoldState<W>> address,
+          OutputTimeFn<? super W> outputTimeFn) {
+
+        return new FlinkWatermarkHoldState<>(
+            flinkStateBackend, FlinkStateInternals.this, address, namespace, outputTimeFn);
+      }
+    });
+  }
+
+  private static class FlinkValueState<K, T> implements ValueState<T> {
+
+    private final StateNamespace namespace;
+    private final StateTag<? super K, ValueState<T>> address;
+    private final ValueStateDescriptor<T> flinkStateDescriptor;
+    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
+
+    FlinkValueState(
+        KeyedStateBackend<ByteBuffer> flinkStateBackend,
+        StateTag<? super K, ValueState<T>> address,
+        StateNamespace namespace,
+        Coder<T> coder) {
+
+      this.namespace = namespace;
+      this.address = address;
+      this.flinkStateBackend = flinkStateBackend;
+
+      CoderTypeInformation<T> typeInfo = new CoderTypeInformation<>(coder);
+
+      flinkStateDescriptor = new ValueStateDescriptor<>(address.getId(), typeInfo, null);
+    }
+
+    @Override
+    public void write(T input) {
+      try {
+        flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).update(input);
+      } catch (Exception e) {
+        throw new RuntimeException("Error updating state.", e);
+      }
+    }
+
+    @Override
+    public ValueState<T> readLater() {
+      return this;
+    }
+
+    @Override
+    public T read() {
+      try {
+        return flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).value();
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public void clear() {
+      try {
+        flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).clear();
+      } catch (Exception e) {
+        throw new RuntimeException("Error clearing state.", e);
+      }
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkValueState<?, ?> that = (FlinkValueState<?, ?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+  }
+
+  private static class FlinkBagState<K, T> implements BagState<T> {
+
+    private final StateNamespace namespace;
+    private final StateTag<? super K, BagState<T>> address;
+    private final ListStateDescriptor<T> flinkStateDescriptor;
+    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
+
+    FlinkBagState(
+        KeyedStateBackend<ByteBuffer> flinkStateBackend,
+        StateTag<? super K, BagState<T>> address,
+        StateNamespace namespace,
+        Coder<T> coder) {
+
+      this.namespace = namespace;
+      this.address = address;
+      this.flinkStateBackend = flinkStateBackend;
+
+      CoderTypeInformation<T> typeInfo = new CoderTypeInformation<>(coder);
+
+      flinkStateDescriptor = new ListStateDescriptor<>(address.getId(), typeInfo);
+    }
+
+    @Override
+    public void add(T input) {
+      try {
+        flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).add(input);
+      } catch (Exception e) {
+        throw new RuntimeException("Error adding to bag state.", e);
+      }
+    }
+
+    @Override
+    public BagState<T> readLater() {
+      return this;
+    }
+
+    @Override
+    public Iterable<T> read() {
+      try {
+        Iterable<T> result = flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).get();
+
+        return result != null ? result : Collections.<T>emptyList();
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            Iterable<T> result = flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor).get();
+            return result == null;
+          } catch (Exception e) {
+            throw new RuntimeException("Error reading state.", e);
+          }
+
+        }
+
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+      };
+    }
+
+    @Override
+    public void clear() {
+      try {
+        flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).clear();
+      } catch (Exception e) {
+        throw new RuntimeException("Error clearing state.", e);
+      }
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkBagState<?, ?> that = (FlinkBagState<?, ?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+  }
+
+  private static class FlinkCombiningState<K, InputT, AccumT, OutputT>
+      implements CombiningState<InputT, AccumT, OutputT> {
+
+    private final StateNamespace namespace;
+    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
+    private final Combine.CombineFn<InputT, AccumT, OutputT> combineFn;
+    private final ValueStateDescriptor<AccumT> flinkStateDescriptor;
+    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
+
+    FlinkCombiningState(
+        KeyedStateBackend<ByteBuffer> flinkStateBackend,
+        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+        Combine.CombineFn<InputT, AccumT, OutputT> combineFn,
+        StateNamespace namespace,
+        Coder<AccumT> accumCoder) {
+
+      this.namespace = namespace;
+      this.address = address;
+      this.combineFn = combineFn;
+      this.flinkStateBackend = flinkStateBackend;
+
+      CoderTypeInformation<AccumT> typeInfo = new CoderTypeInformation<>(accumCoder);
+
+      flinkStateDescriptor = new ValueStateDescriptor<>(address.getId(), typeInfo, null);
+    }
+
+    @Override
+    public CombiningState<InputT, AccumT, OutputT> readLater() {
+      return this;
+    }
+
+    @Override
+    public void add(InputT value) {
+      try {
+        org.apache.flink.api.common.state.ValueState<AccumT> state =
+            flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor);
+
+        AccumT current = state.value();
+        if (current == null) {
+          current = combineFn.createAccumulator();
+        }
+        current = combineFn.addInput(current, value);
+        state.update(current);
+      } catch (Exception e) {
+        throw new RuntimeException("Error adding to state." , e);
+      }
+    }
+
+    @Override
+    public void addAccum(AccumT accum) {
+      try {
+        org.apache.flink.api.common.state.ValueState<AccumT> state =
+            flinkStateBackend.getPartitionedState(
+              namespace.stringKey(),
+              StringSerializer.INSTANCE,
+              flinkStateDescriptor);
+
+        AccumT current = state.value();
+        if (current == null) {
+          state.update(accum);
+        } else {
+          current = combineFn.mergeAccumulators(Lists.newArrayList(current, accum));
+          state.update(current);
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error adding to state.", e);
+      }
+    }
+
+    @Override
+    public AccumT getAccum() {
+      try {
+        return flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).value();
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
+      return combineFn.mergeAccumulators(accumulators);
+    }
+
+    @Override
+    public OutputT read() {
+      try {
+        org.apache.flink.api.common.state.ValueState<AccumT> state =
+            flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor);
+
+        AccumT accum = state.value();
+        if (accum != null) {
+          return combineFn.extractOutput(accum);
+        } else {
+          return combineFn.extractOutput(combineFn.createAccumulator());
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            return flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor).value() == null;
+          } catch (Exception e) {
+            throw new RuntimeException("Error reading state.", e);
+          }
+
+        }
+
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+      };
+    }
+
+    @Override
+    public void clear() {
+      try {
+        flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).clear();
+      } catch (Exception e) {
+        throw new RuntimeException("Error clearing state.", e);
+      }
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkCombiningState<?, ?, ?, ?> that =
+          (FlinkCombiningState<?, ?, ?, ?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+  }
+
+  private static class FlinkKeyedCombiningState<K, InputT, AccumT, OutputT>
+      implements CombiningState<InputT, AccumT, OutputT> {
+
+    private final StateNamespace namespace;
+    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
+    private final Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn;
+    private final ValueStateDescriptor<AccumT> flinkStateDescriptor;
+    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
+    private final FlinkStateInternals<K> flinkStateInternals;
+
+    FlinkKeyedCombiningState(
+        KeyedStateBackend<ByteBuffer> flinkStateBackend,
+        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+        Combine.KeyedCombineFn<? super K, InputT, AccumT, OutputT> combineFn,
+        StateNamespace namespace,
+        Coder<AccumT> accumCoder,
+        FlinkStateInternals<K> flinkStateInternals) {
+
+      this.namespace = namespace;
+      this.address = address;
+      this.combineFn = combineFn;
+      this.flinkStateBackend = flinkStateBackend;
+      this.flinkStateInternals = flinkStateInternals;
+
+      CoderTypeInformation<AccumT> typeInfo = new CoderTypeInformation<>(accumCoder);
+
+      flinkStateDescriptor = new ValueStateDescriptor<>(address.getId(), typeInfo, null);
+    }
+
+    @Override
+    public CombiningState<InputT, AccumT, OutputT> readLater() {
+      return this;
+    }
+
+    @Override
+    public void add(InputT value) {
+      try {
+        org.apache.flink.api.common.state.ValueState<AccumT> state =
+            flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor);
+
+        AccumT current = state.value();
+        if (current == null) {
+          current = combineFn.createAccumulator(flinkStateInternals.getKey());
+        }
+        current = combineFn.addInput(flinkStateInternals.getKey(), current, value);
+        state.update(current);
+      } catch (Exception e) {
+        throw new RuntimeException("Error adding to state." , e);
+      }
+    }
+
+    @Override
+    public void addAccum(AccumT accum) {
+      try {
+        org.apache.flink.api.common.state.ValueState<AccumT> state =
+            flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor);
+
+        AccumT current = state.value();
+        if (current == null) {
+          state.update(accum);
+        } else {
+          current = combineFn.mergeAccumulators(
+              flinkStateInternals.getKey(),
+              Lists.newArrayList(current, accum));
+          state.update(current);
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error adding to state.", e);
+      }
+    }
+
+    @Override
+    public AccumT getAccum() {
+      try {
+        return flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).value();
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
+      return combineFn.mergeAccumulators(flinkStateInternals.getKey(), accumulators);
+    }
+
+    @Override
+    public OutputT read() {
+      try {
+        org.apache.flink.api.common.state.ValueState<AccumT> state =
+            flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor);
+
+        AccumT accum = state.value();
+        if (accum != null) {
+          return combineFn.extractOutput(flinkStateInternals.getKey(), accum);
+        } else {
+          return combineFn.extractOutput(
+              flinkStateInternals.getKey(),
+              combineFn.createAccumulator(flinkStateInternals.getKey()));
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            return flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor).value() == null;
+          } catch (Exception e) {
+            throw new RuntimeException("Error reading state.", e);
+          }
+
+        }
+
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+      };
+    }
+
+    @Override
+    public void clear() {
+      try {
+        flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).clear();
+      } catch (Exception e) {
+        throw new RuntimeException("Error clearing state.", e);
+      }
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkKeyedCombiningState<?, ?, ?, ?> that =
+          (FlinkKeyedCombiningState<?, ?, ?, ?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+  }
+
+  private static class FlinkCombiningStateWithContext<K, InputT, AccumT, OutputT>
+      implements CombiningState<InputT, AccumT, OutputT> {
+
+    private final StateNamespace namespace;
+    private final StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address;
+    private final CombineWithContext.KeyedCombineFnWithContext<
+        ? super K, InputT, AccumT, OutputT> combineFn;
+    private final ValueStateDescriptor<AccumT> flinkStateDescriptor;
+    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
+    private final FlinkStateInternals<K> flinkStateInternals;
+    private final CombineWithContext.Context context;
+
+    FlinkCombiningStateWithContext(
+        KeyedStateBackend<ByteBuffer> flinkStateBackend,
+        StateTag<? super K, CombiningState<InputT, AccumT, OutputT>> address,
+        CombineWithContext.KeyedCombineFnWithContext<
+            ? super K, InputT, AccumT, OutputT> combineFn,
+        StateNamespace namespace,
+        Coder<AccumT> accumCoder,
+        FlinkStateInternals<K> flinkStateInternals,
+        CombineWithContext.Context context) {
+
+      this.namespace = namespace;
+      this.address = address;
+      this.combineFn = combineFn;
+      this.flinkStateBackend = flinkStateBackend;
+      this.flinkStateInternals = flinkStateInternals;
+      this.context = context;
+
+      CoderTypeInformation<AccumT> typeInfo = new CoderTypeInformation<>(accumCoder);
+
+      flinkStateDescriptor = new ValueStateDescriptor<>(address.getId(), typeInfo, null);
+    }
+
+    @Override
+    public CombiningState<InputT, AccumT, OutputT> readLater() {
+      return this;
+    }
+
+    @Override
+    public void add(InputT value) {
+      try {
+        org.apache.flink.api.common.state.ValueState<AccumT> state =
+            flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor);
+
+        AccumT current = state.value();
+        if (current == null) {
+          current = combineFn.createAccumulator(flinkStateInternals.getKey(), context);
+        }
+        current = combineFn.addInput(flinkStateInternals.getKey(), current, value, context);
+        state.update(current);
+      } catch (Exception e) {
+        throw new RuntimeException("Error adding to state." , e);
+      }
+    }
+
+    @Override
+    public void addAccum(AccumT accum) {
+      try {
+        org.apache.flink.api.common.state.ValueState<AccumT> state =
+            flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor);
+
+        AccumT current = state.value();
+        if (current == null) {
+          state.update(accum);
+        } else {
+          current = combineFn.mergeAccumulators(
+              flinkStateInternals.getKey(),
+              Lists.newArrayList(current, accum),
+              context);
+          state.update(current);
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error adding to state.", e);
+      }
+    }
+
+    @Override
+    public AccumT getAccum() {
+      try {
+        return flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).value();
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public AccumT mergeAccumulators(Iterable<AccumT> accumulators) {
+      return combineFn.mergeAccumulators(flinkStateInternals.getKey(), accumulators, context);
+    }
+
+    @Override
+    public OutputT read() {
+      try {
+        org.apache.flink.api.common.state.ValueState<AccumT> state =
+            flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor);
+
+        AccumT accum = state.value();
+        return combineFn.extractOutput(flinkStateInternals.getKey(), accum, context);
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            return flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor).value() == null;
+          } catch (Exception e) {
+            throw new RuntimeException("Error reading state.", e);
+          }
+
+        }
+
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+      };
+    }
+
+    @Override
+    public void clear() {
+      try {
+        flinkStateBackend.getPartitionedState(
+            namespace.stringKey(),
+            StringSerializer.INSTANCE,
+            flinkStateDescriptor).clear();
+      } catch (Exception e) {
+        throw new RuntimeException("Error clearing state.", e);
+      }
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkCombiningStateWithContext<?, ?, ?, ?> that =
+          (FlinkCombiningStateWithContext<?, ?, ?, ?>) o;
+
+      return namespace.equals(that.namespace) && address.equals(that.address);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = namespace.hashCode();
+      result = 31 * result + address.hashCode();
+      return result;
+    }
+  }
+
+  private static class FlinkWatermarkHoldState<K, W extends BoundedWindow>
+      implements WatermarkHoldState<W> {
+    private final StateTag<? super K, WatermarkHoldState<W>> address;
+    private final OutputTimeFn<? super W> outputTimeFn;
+    private final StateNamespace namespace;
+    private final KeyedStateBackend<ByteBuffer> flinkStateBackend;
+    private final FlinkStateInternals<K> flinkStateInternals;
+    private final ValueStateDescriptor<Instant> flinkStateDescriptor;
+
+    public FlinkWatermarkHoldState(
+        KeyedStateBackend<ByteBuffer> flinkStateBackend,
+        FlinkStateInternals<K> flinkStateInternals,
+        StateTag<? super K, WatermarkHoldState<W>> address,
+        StateNamespace namespace,
+        OutputTimeFn<? super W> outputTimeFn) {
+      this.address = address;
+      this.outputTimeFn = outputTimeFn;
+      this.namespace = namespace;
+      this.flinkStateBackend = flinkStateBackend;
+      this.flinkStateInternals = flinkStateInternals;
+
+      CoderTypeInformation<Instant> typeInfo = new CoderTypeInformation<>(InstantCoder.of());
+      flinkStateDescriptor = new ValueStateDescriptor<>(address.getId(), typeInfo, null);
+    }
+
+    @Override
+    public OutputTimeFn<? super W> getOutputTimeFn() {
+      return outputTimeFn;
+    }
+
+    @Override
+    public WatermarkHoldState<W> readLater() {
+      return this;
+    }
+
+    @Override
+    public ReadableState<Boolean> isEmpty() {
+      return new ReadableState<Boolean>() {
+        @Override
+        public Boolean read() {
+          try {
+            return flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor).value() == null;
+          } catch (Exception e) {
+            throw new RuntimeException("Error reading state.", e);
+          }
+        }
+
+        @Override
+        public ReadableState<Boolean> readLater() {
+          return this;
+        }
+      };
+
+    }
+
+    @Override
+    public void add(Instant value) {
+      try {
+        org.apache.flink.api.common.state.ValueState<Instant> state =
+            flinkStateBackend.getPartitionedState(
+              namespace.stringKey(),
+              StringSerializer.INSTANCE,
+              flinkStateDescriptor);
+
+        Instant current = state.value();
+        if (current == null) {
+          state.update(value);
+          flinkStateInternals.watermarkHolds.put(namespace.stringKey(), value);
+        } else {
+          Instant combined = outputTimeFn.combine(current, value);
+          state.update(combined);
+          flinkStateInternals.watermarkHolds.put(namespace.stringKey(), combined);
+        }
+      } catch (Exception e) {
+        throw new RuntimeException("Error updating state.", e);
+      }
+    }
+
+    @Override
+    public Instant read() {
+      try {
+        org.apache.flink.api.common.state.ValueState<Instant> state =
+            flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor);
+        return state.value();
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public void clear() {
+      flinkStateInternals.watermarkHolds.remove(namespace.stringKey());
+      try {
+        org.apache.flink.api.common.state.ValueState<Instant> state =
+            flinkStateBackend.getPartitionedState(
+                namespace.stringKey(),
+                StringSerializer.INSTANCE,
+                flinkStateDescriptor);
+        state.clear();
+      } catch (Exception e) {
+        throw new RuntimeException("Error reading state.", e);
+      }
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) {
+        return true;
+      }
+      if (o == null || getClass() != o.getClass()) {
+        return false;
+      }
+
+      FlinkWatermarkHoldState<?, ?> that = (FlinkWatermarkHoldState<?, ?>) o;
+
+      if (!address.equals(that.address)) {
+        return false;
+      }
+      if (!outputTimeFn.equals(that.outputTimeFn)) {
+        return false;
+      }
+      return namespace.equals(that.namespace);
+
+    }
+
+    @Override
+    public int hashCode() {
+      int result = address.hashCode();
+      result = 31 * result + outputTimeFn.hashCode();
+      result = 31 * result + namespace.hashCode();
+      return result;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupCheckpointedOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupCheckpointedOperator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupCheckpointedOperator.java
new file mode 100644
index 0000000..b38a520
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupCheckpointedOperator.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
+
+import java.io.DataOutputStream;
+
+/**
+ * This interface is used to checkpoint key-groups state.
+ */
+public interface KeyGroupCheckpointedOperator extends KeyGroupRestoringOperator{
+  /**
+   * Snapshots the state for a given {@code keyGroupIdx}.
+   *
+   * <p>AbstractStreamOperator would call this hook in
+   * AbstractStreamOperator.snapshotState() while iterating over the key groups.
+   * @param keyGroupIndex the id of the key-group to be put in the snapshot.
+   * @param out the stream to write to.
+   */
+  void snapshotKeyGroupState(int keyGroupIndex, DataOutputStream out) throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupRestoringOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupRestoringOperator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupRestoringOperator.java
new file mode 100644
index 0000000..2bdfc6e
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/KeyGroupRestoringOperator.java
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming.state;
+
+import java.io.DataInputStream;
+
+/**
+ * This interface is used to restore key-groups state.
+ */
+public interface KeyGroupRestoringOperator {
+  /**
+   * Restore the state for a given {@code keyGroupIndex}.
+   * @param keyGroupIndex the id of the key-group to be put in the snapshot.
+   * @param in the stream to read from.
+   */
+  void restoreKeyGroupState(int keyGroupIndex, DataInputStream in) throws Exception;
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/package-info.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/package-info.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/package-info.java
new file mode 100644
index 0000000..0004e9e
--- /dev/null
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/state/package-info.java
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Internal state implementation of the Beam runner for Apache Flink.
+ */
+package org.apache.beam.runners.flink.translation.wrappers.streaming.state;

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/main/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/resources/log4j.properties b/runners/flink/src/main/resources/log4j.properties
new file mode 100644
index 0000000..4b6a708
--- /dev/null
+++ b/runners/flink/src/main/resources/log4j.properties
@@ -0,0 +1,23 @@
+################################################################################
+#  Licensed to the Apache Software Foundation (ASF) under one
+#  or more contributor license agreements.  See the NOTICE file
+#  distributed with this work for additional information
+#  regarding copyright ownership.  The ASF licenses this file
+#  to you under the Apache License, Version 2.0 (the
+#  "License"); you may not use this file except in compliance
+#  with the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+log4j.rootLogger=OFF,console
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/EncodedValueComparatorTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/EncodedValueComparatorTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/EncodedValueComparatorTest.java
new file mode 100644
index 0000000..10d6d9d
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/EncodedValueComparatorTest.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import org.apache.beam.runners.flink.translation.types.EncodedValueComparator;
+import org.apache.beam.runners.flink.translation.types.EncodedValueTypeInformation;
+import org.apache.beam.sdk.coders.CoderException;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.typeutils.ComparatorTestBase;
+import org.apache.flink.api.common.typeutils.TypeComparator;
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.junit.Assert;
+
+/**
+ * Test for {@link EncodedValueComparator}.
+ */
+public class EncodedValueComparatorTest extends ComparatorTestBase<byte[]> {
+
+  @Override
+  protected TypeComparator<byte[]> createComparator(boolean ascending) {
+    return new EncodedValueTypeInformation().createComparator(ascending, new ExecutionConfig());
+  }
+
+  @Override
+  protected TypeSerializer<byte[]> createSerializer() {
+    return new EncodedValueTypeInformation().createSerializer(new ExecutionConfig());
+  }
+
+  @Override
+  protected void deepEquals(String message, byte[] should, byte[] is) {
+    Assert.assertArrayEquals(message, should, is);
+  }
+
+  @Override
+  protected byte[][] getSortedTestData() {
+    StringUtf8Coder coder = StringUtf8Coder.of();
+
+    try {
+      return new byte[][]{
+          CoderUtils.encodeToByteArray(coder, ""),
+          CoderUtils.encodeToByteArray(coder, "Lorem Ipsum Dolor Omit Longer"),
+          CoderUtils.encodeToByteArray(coder, "aaaa"),
+          CoderUtils.encodeToByteArray(coder, "abcd"),
+          CoderUtils.encodeToByteArray(coder, "abce"),
+          CoderUtils.encodeToByteArray(coder, "abdd"),
+          CoderUtils.encodeToByteArray(coder, "accd"),
+          CoderUtils.encodeToByteArray(coder, "bbcd")
+      };
+    } catch (CoderException e) {
+      throw new RuntimeException("Could not encode values.", e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkRunnerRegistrarTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkRunnerRegistrarTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkRunnerRegistrarTest.java
new file mode 100644
index 0000000..d9d174c
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkRunnerRegistrarTest.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.flink;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.junit.Test;
+
+/**
+ * Tests the proper registration of the Flink runner.
+ */
+public class FlinkRunnerRegistrarTest {
+
+  @Test
+  public void testFullName() {
+    String[] args =
+        new String[] {String.format("--runner=%s", FlinkRunner.class.getName())};
+    PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create();
+    assertEquals(opts.getRunner(), FlinkRunner.class);
+  }
+
+  @Test
+  public void testClassName() {
+    String[] args =
+        new String[] {String.format("--runner=%s", FlinkRunner.class.getSimpleName())};
+    PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create();
+    assertEquals(opts.getRunner(), FlinkRunner.class);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
new file mode 100644
index 0000000..d6240c4
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/FlinkTestPipeline.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.PipelineResult;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.runners.PipelineRunner;
+
+/**
+ * {@link org.apache.beam.sdk.Pipeline} for testing Dataflow programs on the
+ * {@link FlinkRunner}.
+ */
+public class FlinkTestPipeline extends Pipeline {
+
+  /**
+   * Creates and returns a new test pipeline for batch execution.
+   *
+   * <p>Use {@link org.apache.beam.sdk.testing.PAssert} to add tests, then call
+   * {@link Pipeline#run} to execute the pipeline and check the tests.
+   */
+  public static FlinkTestPipeline createForBatch() {
+    return create(false);
+  }
+
+  /**
+   * Creates and returns a new test pipeline for streaming execution.
+   *
+   * <p>Use {@link org.apache.beam.sdk.testing.PAssert} to add tests, then call
+   * {@link Pipeline#run} to execute the pipeline and check the tests.
+   *
+   * @return The Test Pipeline
+   */
+  public static FlinkTestPipeline createForStreaming() {
+    return create(true);
+  }
+
+  /**
+   * Creates and returns a new test pipeline for streaming or batch execution.
+   *
+   * <p>Use {@link org.apache.beam.sdk.testing.PAssert} to add tests, then call
+   * {@link Pipeline#run} to execute the pipeline and check the tests.
+   *
+   * @param streaming <code>True</code> for streaming mode, <code>False</code> for batch.
+   * @return The Test Pipeline.
+   */
+  private static FlinkTestPipeline create(boolean streaming) {
+    TestFlinkRunner flinkRunner = TestFlinkRunner.create(streaming);
+    return new FlinkTestPipeline(flinkRunner, flinkRunner.getPipelineOptions());
+  }
+
+  private FlinkTestPipeline(PipelineRunner<? extends PipelineResult> runner,
+              PipelineOptions options) {
+    super(runner, options);
+  }
+}
+

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/PipelineOptionsTest.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/PipelineOptionsTest.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/PipelineOptionsTest.java
new file mode 100644
index 0000000..06187f6
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/PipelineOptionsTest.java
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.util.Collections;
+import java.util.HashMap;
+import org.apache.beam.runners.flink.translation.utils.SerializedPipelineOptions;
+import org.apache.beam.runners.flink.translation.wrappers.streaming.DoFnOperator;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.options.Default;
+import org.apache.beam.sdk.options.Description;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.util.WindowingStrategy;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.commons.lang3.SerializationUtils;
+import org.apache.flink.api.common.ExecutionConfig;
+import org.apache.flink.api.common.typeinfo.TypeHint;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.runtime.state.memory.MemoryStateBackend;
+import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness;
+import org.joda.time.Instant;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+/**
+ * Tests for serialization and deserialization of {@link PipelineOptions} in {@link DoFnOperator}.
+ */
+public class PipelineOptionsTest {
+
+  /**
+   * Pipeline options.
+   */
+  public interface MyOptions extends FlinkPipelineOptions {
+    @Description("Bla bla bla")
+    @Default.String("Hello")
+    String getTestOption();
+    void setTestOption(String value);
+  }
+
+  private static MyOptions options;
+  private static SerializedPipelineOptions serializedOptions;
+
+  private static final String[] args = new String[]{"--testOption=nothing"};
+
+  @BeforeClass
+  public static void beforeTest() {
+    options = PipelineOptionsFactory.fromArgs(args).as(MyOptions.class);
+    serializedOptions = new SerializedPipelineOptions(options);
+  }
+
+  @Test
+  public void testDeserialization() {
+    MyOptions deserializedOptions = serializedOptions.getPipelineOptions().as(MyOptions.class);
+    assertEquals("nothing", deserializedOptions.getTestOption());
+  }
+
+  @Test
+  public void testIgnoredFieldSerialization() {
+    FlinkPipelineOptions options = PipelineOptionsFactory.as(FlinkPipelineOptions.class);
+    options.setStateBackend(new MemoryStateBackend());
+
+    FlinkPipelineOptions deserialized =
+        new SerializedPipelineOptions(options).getPipelineOptions().as(FlinkPipelineOptions.class);
+
+    assertNull(deserialized.getStateBackend());
+  }
+
+  @Test
+  public void testCaching() {
+    PipelineOptions deserializedOptions =
+        serializedOptions.getPipelineOptions().as(PipelineOptions.class);
+
+    assertNotNull(deserializedOptions);
+    assertTrue(deserializedOptions == serializedOptions.getPipelineOptions());
+    assertTrue(deserializedOptions == serializedOptions.getPipelineOptions());
+    assertTrue(deserializedOptions == serializedOptions.getPipelineOptions());
+  }
+
+  @Test(expected = Exception.class)
+  public void testNonNull() {
+    new SerializedPipelineOptions(null);
+  }
+
+  @Test(expected = Exception.class)
+  public void parDoBaseClassPipelineOptionsNullTest() {
+    DoFnOperator<String, String, String> doFnOperator = new DoFnOperator<>(
+        new TestDoFn(),
+        WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()),
+        new TupleTag<String>("main-output"),
+        Collections.<TupleTag<?>>emptyList(),
+        new DoFnOperator.DefaultOutputManagerFactory<String>(),
+        WindowingStrategy.globalDefault(),
+        new HashMap<Integer, PCollectionView<?>>(),
+        Collections.<PCollectionView<?>>emptyList(),
+        null,
+        null);
+
+  }
+
+  /**
+   * Tests that PipelineOptions are present after serialization.
+   */
+  @Test
+  public void parDoBaseClassPipelineOptionsSerializationTest() throws Exception {
+
+    DoFnOperator<String, String, String> doFnOperator = new DoFnOperator<>(
+        new TestDoFn(),
+        WindowedValue.getValueOnlyCoder(StringUtf8Coder.of()),
+        new TupleTag<String>("main-output"),
+        Collections.<TupleTag<?>>emptyList(),
+        new DoFnOperator.DefaultOutputManagerFactory<String>(),
+        WindowingStrategy.globalDefault(),
+        new HashMap<Integer, PCollectionView<?>>(),
+        Collections.<PCollectionView<?>>emptyList(),
+        options,
+        null);
+
+    final byte[] serialized = SerializationUtils.serialize(doFnOperator);
+
+    @SuppressWarnings("unchecked")
+    DoFnOperator<Object, Object, Object> deserialized =
+        (DoFnOperator<Object, Object, Object>) SerializationUtils.deserialize(serialized);
+
+    TypeInformation<WindowedValue<Object>> typeInformation = TypeInformation.of(
+        new TypeHint<WindowedValue<Object>>() {});
+
+    OneInputStreamOperatorTestHarness<WindowedValue<Object>, Object> testHarness =
+        new OneInputStreamOperatorTestHarness<>(deserialized,
+            typeInformation.createSerializer(new ExecutionConfig()));
+
+    testHarness.open();
+
+    // execute once to access options
+    testHarness.processElement(new StreamRecord<>(
+        WindowedValue.of(
+            new Object(),
+            Instant.now(),
+            GlobalWindow.INSTANCE,
+            PaneInfo.NO_FIRING)));
+
+    testHarness.close();
+
+  }
+
+
+  private static class TestDoFn extends DoFn<String, String> {
+
+    @ProcessElement
+    public void processElement(ProcessContext c) throws Exception {
+      Assert.assertNotNull(c.getPipelineOptions());
+      Assert.assertEquals(
+          options.getTestOption(),
+          c.getPipelineOptions().as(MyOptions.class).getTestOption());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
new file mode 100644
index 0000000..44c9017
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceITCase.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import com.google.common.base.Joiner;
+import java.io.File;
+import java.net.URI;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.CountingInput;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.flink.test.util.JavaProgramTestBase;
+
+/**
+ * Reads from a bounded source in batch execution.
+ */
+public class ReadSourceITCase extends JavaProgramTestBase {
+
+  protected String resultPath;
+
+  public ReadSourceITCase(){
+  }
+
+  private static final String[] EXPECTED_RESULT = new String[] {
+     "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
+
+  @Override
+  protected void preSubmit() throws Exception {
+    resultPath = getTempDirPath("result");
+
+    // need to create the dir, otherwise Beam sinks don't
+    // work for these tests
+
+    if (!new File(new URI(resultPath)).mkdirs()) {
+      throw new RuntimeException("Could not create output dir.");
+    }
+  }
+
+  @Override
+  protected void postSubmit() throws Exception {
+    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
+  }
+
+  @Override
+  protected void testProgram() throws Exception {
+    runProgram(resultPath);
+  }
+
+  private static void runProgram(String resultPath) throws Exception {
+
+    Pipeline p = FlinkTestPipeline.createForBatch();
+
+    PCollection<String> result = p
+        .apply(CountingInput.upTo(10))
+        .apply(ParDo.of(new DoFn<Long, String>() {
+          @ProcessElement
+          public void processElement(ProcessContext c) throws Exception {
+            c.output(c.element().toString());
+          }
+        }));
+
+    result.apply(TextIO.Write.to(new URI(resultPath).getPath() + "/part"));
+
+    p.run();
+  }
+}
+
+

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingITCase.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingITCase.java
new file mode 100644
index 0000000..79b7882
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/ReadSourceStreamingITCase.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.runners.flink;
+
+import com.google.common.base.Joiner;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.io.CountingInput;
+import org.apache.beam.sdk.io.TextIO;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.flink.streaming.util.StreamingProgramTestBase;
+
+/**
+ * Reads from a bounded source in streaming.
+ */
+public class ReadSourceStreamingITCase extends StreamingProgramTestBase {
+
+  protected String resultPath;
+
+  public ReadSourceStreamingITCase(){
+  }
+
+  private static final String[] EXPECTED_RESULT = new String[] {
+     "0", "1", "2", "3", "4", "5", "6", "7", "8", "9"};
+
+  @Override
+  protected void preSubmit() throws Exception {
+    resultPath = getTempDirPath("result");
+  }
+
+  @Override
+  protected void postSubmit() throws Exception {
+    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
+  }
+
+  @Override
+  protected void testProgram() throws Exception {
+    runProgram(resultPath);
+  }
+
+  private static void runProgram(String resultPath) {
+
+    Pipeline p = FlinkTestPipeline.createForStreaming();
+
+    p
+      .apply(CountingInput.upTo(10))
+      .apply(ParDo.of(new DoFn<Long, String>() {
+          @ProcessElement
+          public void processElement(ProcessContext c) throws Exception {
+            c.output(c.element().toString());
+          }
+        }))
+      .apply(TextIO.Write.to(resultPath));
+
+    p.run();
+  }
+}
+
+

http://git-wip-us.apache.org/repos/asf/beam/blob/cdd2544b/runners/flink/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java b/runners/flink/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
new file mode 100644
index 0000000..38b790e
--- /dev/null
+++ b/runners/flink/src/test/java/org/apache/beam/runners/flink/WriteSinkITCase.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.runners.flink;
+
+import static org.junit.Assert.assertNotNull;
+
+import com.google.common.base.Joiner;
+import com.google.common.collect.ImmutableList;
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URI;
+import org.apache.beam.sdk.Pipeline;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.io.Sink;
+import org.apache.beam.sdk.io.Write;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.flink.core.fs.FileSystem;
+import org.apache.flink.core.fs.Path;
+import org.apache.flink.test.util.JavaProgramTestBase;
+
+/**
+ * Tests the translation of custom Write sinks.
+ */
+public class WriteSinkITCase extends JavaProgramTestBase {
+
+  protected String resultPath;
+
+  public WriteSinkITCase(){
+  }
+
+  static final String[] EXPECTED_RESULT = new String[] {
+      "Joe red 3", "Mary blue 4", "Max yellow 23"};
+
+  @Override
+  protected void preSubmit() throws Exception {
+    resultPath = getTempDirPath("result-" + System.nanoTime());
+  }
+
+  @Override
+  protected void postSubmit() throws Exception {
+    compareResultsByLinesInMemory(Joiner.on('\n').join(EXPECTED_RESULT), resultPath);
+  }
+
+  @Override
+  protected void testProgram() throws Exception {
+    runProgram(resultPath);
+  }
+
+  @Override
+  public void stopCluster() throws Exception {
+    try {
+      super.stopCluster();
+    } catch (final IOException ioe) {
+      if (ioe.getMessage().startsWith("Unable to delete file")) {
+        // that's ok for the test itself, just the OS playing with us on cleanup phase
+      }
+    }
+  }
+
+  private static void runProgram(String resultPath) {
+    Pipeline p = FlinkTestPipeline.createForBatch();
+
+    p.apply(Create.of(ImmutableList.copyOf(EXPECTED_RESULT))).setCoder(StringUtf8Coder.of())
+      .apply("CustomSink", Write.to(new MyCustomSink(resultPath)));
+
+    p.run();
+  }
+
+  /**
+   * Simple custom sink which writes to a file.
+   */
+  private static class MyCustomSink extends Sink<String> {
+
+    private final String resultPath;
+
+    public MyCustomSink(String resultPath) {
+      this.resultPath = resultPath;
+    }
+
+    @Override
+    public void validate(PipelineOptions options) {
+      assertNotNull(options);
+    }
+
+    @Override
+    public WriteOperation<String, ?> createWriteOperation(PipelineOptions options) {
+      return new MyWriteOperation();
+    }
+
+    private class MyWriteOperation extends WriteOperation<String, String> {
+
+      @Override
+      public Coder<String> getWriterResultCoder() {
+        return StringUtf8Coder.of();
+      }
+
+      @Override
+      public void initialize(PipelineOptions options) throws Exception {
+
+      }
+
+      @Override
+      public void setWindowedWrites(boolean windowedWrites) {
+
+      }
+
+      @Override
+      public void finalize(Iterable<String> writerResults, PipelineOptions options)
+          throws Exception {
+
+      }
+
+      @Override
+      public Writer<String, String> createWriter(PipelineOptions options) throws Exception {
+        return new MyWriter();
+      }
+
+      @Override
+      public Sink<String> getSink() {
+        return MyCustomSink.this;
+      }
+
+      /**
+       * Simple Writer which writes to a file.
+       */
+      private class MyWriter extends Writer<String, String> {
+
+        private PrintWriter internalWriter;
+
+        @Override
+        public final void openWindowed(String uId,
+                                       BoundedWindow window,
+                                       PaneInfo paneInfo,
+                                       int shard,
+                                       int numShards) throws Exception {
+          throw new UnsupportedOperationException("Windowed writes not supported.");
+        }
+
+        @Override
+        public final void openUnwindowed(String uId, int shard, int numShards) throws Exception {
+          Path path = new Path(resultPath + "/" + uId);
+          FileSystem.get(new URI("file:///")).create(path, false);
+          internalWriter = new PrintWriter(new File(path.toUri()));
+        }
+
+        @Override
+        public void cleanup() throws Exception {
+
+        }
+
+        @Override
+        public void write(String value) throws Exception {
+          internalWriter.println(value);
+        }
+
+        @Override
+        public String close() throws Exception {
+          internalWriter.close();
+          return resultPath;
+        }
+
+        @Override
+        public WriteOperation<String, String> getWriteOperation() {
+          return MyWriteOperation.this;
+        }
+      }
+    }
+  }
+
+}
+