You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@beam.apache.org by jb...@apache.org on 2017/07/20 19:52:57 UTC

[01/50] [abbrv] beam git commit: [BEAM-2578] Fix DebuggingWordCountTest on Windows platform

Repository: beam
Updated Branches:
  refs/heads/DSL_SQL 152115e89 -> ada24c059


[BEAM-2578] Fix DebuggingWordCountTest on Windows platform


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/53ce5829
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/53ce5829
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/53ce5829

Branch: refs/heads/DSL_SQL
Commit: 53ce58296b8a4a8359caf1593289f0881e85172d
Parents: 36c55eb
Author: eralmas7 <er...@yahoo.com>
Authored: Mon Jul 10 23:14:24 2017 +0530
Committer: Jean-Baptiste Onofré <jb...@apache.org>
Committed: Sun Jul 16 21:28:15 2017 +0200

----------------------------------------------------------------------
 .../org/apache/beam/examples/DebuggingWordCountTest.java | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/53ce5829/examples/java/src/test/java/org/apache/beam/examples/DebuggingWordCountTest.java
----------------------------------------------------------------------
diff --git a/examples/java/src/test/java/org/apache/beam/examples/DebuggingWordCountTest.java b/examples/java/src/test/java/org/apache/beam/examples/DebuggingWordCountTest.java
index 054277a..be48a99 100644
--- a/examples/java/src/test/java/org/apache/beam/examples/DebuggingWordCountTest.java
+++ b/examples/java/src/test/java/org/apache/beam/examples/DebuggingWordCountTest.java
@@ -35,6 +35,13 @@ import org.junit.runners.JUnit4;
 public class DebuggingWordCountTest {
   @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
 
+  private String getFilePath(String filePath) {
+      if (filePath.contains(":")) {
+          return filePath.replace("\\", "/").split(":")[1];
+      }
+      return filePath;
+  }
+
   @Test
   public void testDebuggingWordCount() throws Exception {
     File inputFile = tmpFolder.newFile();
@@ -45,8 +52,8 @@ public class DebuggingWordCountTest {
         StandardCharsets.UTF_8);
     WordCountOptions options =
         TestPipeline.testingPipelineOptions().as(WordCountOptions.class);
-    options.setInputFile(inputFile.getAbsolutePath());
-    options.setOutput(outputFile.getAbsolutePath());
+    options.setInputFile(getFilePath(inputFile.getAbsolutePath()));
+    options.setOutput(getFilePath(outputFile.getAbsolutePath()));
     DebuggingWordCount.main(TestPipeline.convertToArgs(options));
   }
 }


[31/50] [abbrv] beam git commit: This closes #3475: [BEAM-2544] Fix flaky AvroIOTest

Posted by jb...@apache.org.
This closes #3475: [BEAM-2544] Fix flaky AvroIOTest


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/be5b9347
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/be5b9347
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/be5b9347

Branch: refs/heads/DSL_SQL
Commit: be5b9347bc44de1f042c76e1ba3f47a13772c72b
Parents: dd9e866 911edba
Author: Eugene Kirpichov <ki...@google.com>
Authored: Tue Jul 18 15:49:54 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Jul 18 15:49:54 2017 -0700

----------------------------------------------------------------------
 .../java/org/apache/beam/sdk/io/AvroIOTest.java | 46 +++++++++++---------
 1 file changed, 25 insertions(+), 21 deletions(-)
----------------------------------------------------------------------



[47/50] [abbrv] beam git commit: This closes #3585

Posted by jb...@apache.org.
This closes #3585


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/c8e3744a
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/c8e3744a
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/c8e3744a

Branch: refs/heads/DSL_SQL
Commit: c8e3744adc6dfdfcfd32221bbbd05ed5b2511c81
Parents: 2e51bde 0a5157e
Author: chamikara@google.com <ch...@google.com>
Authored: Thu Jul 20 10:18:55 2017 -0700
Committer: chamikara@google.com <ch...@google.com>
Committed: Thu Jul 20 10:18:55 2017 -0700

----------------------------------------------------------------------
 .../io/gcp/datastore/v1/datastoreio.py          | 84 ++++++++++++++---
 .../io/gcp/datastore/v1/datastoreio_test.py     | 53 +++++++++--
 .../apache_beam/io/gcp/datastore/v1/helper.py   | 35 ++++++--
 .../apache_beam/io/gcp/datastore/v1/util.py     | 95 ++++++++++++++++++++
 .../io/gcp/datastore/v1/util_test.py            | 67 ++++++++++++++
 5 files changed, 310 insertions(+), 24 deletions(-)
----------------------------------------------------------------------



[23/50] [abbrv] beam git commit: Change PR template from 1234 to XXX

Posted by jb...@apache.org.
Change PR template from 1234 to XXX


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b827f656
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b827f656
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b827f656

Branch: refs/heads/DSL_SQL
Commit: b827f65622f9cd9203803b76935aac422c179803
Parents: d2201f9
Author: Sourabh Bajaj <so...@google.com>
Authored: Tue Jul 18 10:30:44 2017 -0700
Committer: Sourabh Bajaj <so...@google.com>
Committed: Tue Jul 18 10:30:44 2017 -0700

----------------------------------------------------------------------
 .github/PULL_REQUEST_TEMPLATE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/b827f656/.github/PULL_REQUEST_TEMPLATE.md
----------------------------------------------------------------------
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index eeee750..bd361b7 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -2,7 +2,7 @@ Follow this checklist to help us incorporate your contribution quickly and easil
 
  - [ ] Make sure there is a [JIRA issue](https://issues.apache.org/jira/projects/BEAM/issues/) filed for the change (usually before you start working on it).  Trivial changes like typos do not require a JIRA issue.  Your pull request should address just this issue, without pulling in other changes.
  - [ ] Each commit in the pull request should have a meaningful subject line and body.
- - [ ] Format the pull request title like `[BEAM-1234] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-1234` with the appropriate JIRA issue.
+ - [ ] Format the pull request title like `[BEAM-XXX] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-XXX` with the appropriate JIRA issue.
  - [ ] Write a pull request description that is detailed enough to understand what the pull request does, how, and why.
  - [ ] Run `mvn clean verify` to make sure basic checks pass. A more thorough check will be performed on your pull request automatically.
  - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf).


[24/50] [abbrv] beam git commit: This closes #3587: Change PR template from 1234 to XXX

Posted by jb...@apache.org.
This closes #3587: Change PR template from 1234 to XXX


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/5a0b74c9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/5a0b74c9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/5a0b74c9

Branch: refs/heads/DSL_SQL
Commit: 5a0b74c9b8654cd034a55145a60e666c579caab6
Parents: d2201f9 b827f65
Author: Eugene Kirpichov <ki...@google.com>
Authored: Tue Jul 18 11:18:53 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Jul 18 11:18:53 2017 -0700

----------------------------------------------------------------------
 .github/PULL_REQUEST_TEMPLATE.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------



[27/50] [abbrv] beam git commit: This closes #3589

Posted by jb...@apache.org.
This closes #3589


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2d5b6d74
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2d5b6d74
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2d5b6d74

Branch: refs/heads/DSL_SQL
Commit: 2d5b6d745cec07fc59c77eacad7bb90880a0946a
Parents: 2c2d8a3 d14cef0
Author: Ahmet Altay <al...@google.com>
Authored: Tue Jul 18 13:11:34 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Tue Jul 18 13:11:34 2017 -0700

----------------------------------------------------------------------
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------



[07/50] [abbrv] beam git commit: Fix split package in SDK harness

Posted by jb...@apache.org.
Fix split package in SDK harness

The Java SDK harness defined classes both in its own namespace
org.apache.beam.fn.harness and the org.apache.beam.runners.core namespace,
resulting in a split package across multiple jars.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f1b4700f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f1b4700f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f1b4700f

Branch: refs/heads/DSL_SQL
Commit: f1b4700f32c5ea39559145d6f5db3909439f6c80
Parents: 7e4719c
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Jul 17 13:46:46 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Jul 17 13:46:46 2017 -0700

----------------------------------------------------------------------
 .../beam/fn/harness/BeamFnDataReadRunner.java   | 173 ++++++
 .../beam/fn/harness/BeamFnDataWriteRunner.java  | 159 ++++++
 .../beam/fn/harness/BoundedSourceRunner.java    | 167 ++++++
 .../apache/beam/fn/harness/FnApiDoFnRunner.java | 548 +++++++++++++++++++
 .../fn/harness/PTransformRunnerFactory.java     |  81 +++
 .../harness/control/ProcessBundleHandler.java   |   4 +-
 .../beam/runners/core/BeamFnDataReadRunner.java | 173 ------
 .../runners/core/BeamFnDataWriteRunner.java     | 159 ------
 .../beam/runners/core/BoundedSourceRunner.java  | 167 ------
 .../beam/runners/core/FnApiDoFnRunner.java      | 547 ------------------
 .../runners/core/PTransformRunnerFactory.java   |  81 ---
 .../apache/beam/runners/core/package-info.java  |  22 -
 .../fn/harness/BeamFnDataReadRunnerTest.java    | 281 ++++++++++
 .../fn/harness/BeamFnDataWriteRunnerTest.java   | 269 +++++++++
 .../fn/harness/BoundedSourceRunnerTest.java     | 187 +++++++
 .../beam/fn/harness/FnApiDoFnRunnerTest.java    | 210 +++++++
 .../control/ProcessBundleHandlerTest.java       |   2 +-
 .../runners/core/BeamFnDataReadRunnerTest.java  | 281 ----------
 .../runners/core/BeamFnDataWriteRunnerTest.java | 269 ---------
 .../runners/core/BoundedSourceRunnerTest.java   | 187 -------
 .../beam/runners/core/FnApiDoFnRunnerTest.java  | 210 -------
 21 files changed, 2078 insertions(+), 2099 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataReadRunner.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataReadRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataReadRunner.java
new file mode 100644
index 0000000..e2c17b0
--- /dev/null
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataReadRunner.java
@@ -0,0 +1,173 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.fn.harness;
+
+import static com.google.common.collect.Iterables.getOnlyElement;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.auto.service.AutoService;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Multimap;
+import com.google.protobuf.BytesValue;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+import org.apache.beam.fn.harness.data.BeamFnDataClient;
+import org.apache.beam.fn.harness.fn.ThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingRunnable;
+import org.apache.beam.fn.v1.BeamFnApi;
+import org.apache.beam.runners.dataflow.util.CloudObject;
+import org.apache.beam.runners.dataflow.util.CloudObjects;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.KV;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Registers as a consumer for data over the Beam Fn API. Multiplexes any received data
+ * to all consumers in the specified output map.
+ *
+ * <p>Can be re-used serially across {@link org.apache.beam.fn.v1.BeamFnApi.ProcessBundleRequest}s.
+ * For each request, call {@link #registerInputLocation()} to start and call
+ * {@link #blockTillReadFinishes()} to finish.
+ */
+public class BeamFnDataReadRunner<OutputT> {
+
+  private static final Logger LOG = LoggerFactory.getLogger(BeamFnDataReadRunner.class);
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+  private static final String URN = "urn:org.apache.beam:source:runner:0.1";
+
+  /** A registrar which provides a factory to handle reading from the Fn Api Data Plane. */
+  @AutoService(PTransformRunnerFactory.Registrar.class)
+  public static class Registrar implements
+      PTransformRunnerFactory.Registrar {
+
+    @Override
+    public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
+      return ImmutableMap.of(URN, new Factory());
+    }
+  }
+
+  /** A factory for {@link BeamFnDataReadRunner}s. */
+  static class Factory<OutputT>
+      implements PTransformRunnerFactory<BeamFnDataReadRunner<OutputT>> {
+
+    @Override
+    public BeamFnDataReadRunner<OutputT> createRunnerForPTransform(
+        PipelineOptions pipelineOptions,
+        BeamFnDataClient beamFnDataClient,
+        String pTransformId,
+        RunnerApi.PTransform pTransform,
+        Supplier<String> processBundleInstructionId,
+        Map<String, RunnerApi.PCollection> pCollections,
+        Map<String, RunnerApi.Coder> coders,
+        Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
+        Consumer<ThrowingRunnable> addStartFunction,
+        Consumer<ThrowingRunnable> addFinishFunction) throws IOException {
+
+      BeamFnApi.Target target = BeamFnApi.Target.newBuilder()
+          .setPrimitiveTransformReference(pTransformId)
+          .setName(getOnlyElement(pTransform.getOutputsMap().keySet()))
+          .build();
+      RunnerApi.Coder coderSpec = coders.get(pCollections.get(
+          getOnlyElement(pTransform.getOutputsMap().values())).getCoderId());
+      Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers =
+          (Collection) pCollectionIdsToConsumers.get(
+              getOnlyElement(pTransform.getOutputsMap().values()));
+
+      BeamFnDataReadRunner<OutputT> runner = new BeamFnDataReadRunner<>(
+          pTransform.getSpec(),
+          processBundleInstructionId,
+          target,
+          coderSpec,
+          beamFnDataClient,
+          consumers);
+      addStartFunction.accept(runner::registerInputLocation);
+      addFinishFunction.accept(runner::blockTillReadFinishes);
+      return runner;
+    }
+  }
+
+  private final BeamFnApi.ApiServiceDescriptor apiServiceDescriptor;
+  private final Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers;
+  private final Supplier<String> processBundleInstructionIdSupplier;
+  private final BeamFnDataClient beamFnDataClientFactory;
+  private final Coder<WindowedValue<OutputT>> coder;
+  private final BeamFnApi.Target inputTarget;
+
+  private CompletableFuture<Void> readFuture;
+
+  BeamFnDataReadRunner(
+      RunnerApi.FunctionSpec functionSpec,
+      Supplier<String> processBundleInstructionIdSupplier,
+      BeamFnApi.Target inputTarget,
+      RunnerApi.Coder coderSpec,
+      BeamFnDataClient beamFnDataClientFactory,
+      Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers)
+          throws IOException {
+    this.apiServiceDescriptor = functionSpec.getParameter().unpack(BeamFnApi.RemoteGrpcPort.class)
+        .getApiServiceDescriptor();
+    this.inputTarget = inputTarget;
+    this.processBundleInstructionIdSupplier = processBundleInstructionIdSupplier;
+    this.beamFnDataClientFactory = beamFnDataClientFactory;
+    this.consumers = consumers;
+
+    @SuppressWarnings("unchecked")
+    Coder<WindowedValue<OutputT>> coder =
+        (Coder<WindowedValue<OutputT>>)
+            CloudObjects.coderFromCloudObject(
+                CloudObject.fromSpec(
+                    OBJECT_MAPPER.readValue(
+                        coderSpec
+                            .getSpec()
+                            .getSpec()
+                            .getParameter()
+                            .unpack(BytesValue.class)
+                            .getValue()
+                            .newInput(),
+                        Map.class)));
+    this.coder = coder;
+  }
+
+  public void registerInputLocation() {
+    this.readFuture = beamFnDataClientFactory.forInboundConsumer(
+        apiServiceDescriptor,
+        KV.of(processBundleInstructionIdSupplier.get(), inputTarget),
+        coder,
+        this::multiplexToConsumers);
+  }
+
+  public void blockTillReadFinishes() throws Exception {
+    LOG.debug("Waiting for process bundle instruction {} and target {} to close.",
+        processBundleInstructionIdSupplier.get(), inputTarget);
+    readFuture.get();
+  }
+
+  private void multiplexToConsumers(WindowedValue<OutputT> value) throws Exception {
+    for (ThrowingConsumer<WindowedValue<OutputT>> consumer : consumers) {
+      consumer.accept(value);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataWriteRunner.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataWriteRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataWriteRunner.java
new file mode 100644
index 0000000..eec4dfd
--- /dev/null
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BeamFnDataWriteRunner.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.fn.harness;
+
+import static com.google.common.collect.Iterables.getOnlyElement;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.auto.service.AutoService;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Multimap;
+import com.google.protobuf.BytesValue;
+import java.io.IOException;
+import java.util.Map;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+import org.apache.beam.fn.harness.data.BeamFnDataClient;
+import org.apache.beam.fn.harness.fn.CloseableThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingRunnable;
+import org.apache.beam.fn.v1.BeamFnApi;
+import org.apache.beam.runners.dataflow.util.CloudObject;
+import org.apache.beam.runners.dataflow.util.CloudObjects;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.KV;
+
+/**
+ * Registers as a consumer with the Beam Fn Data Api. Consumes elements and encodes them for
+ * transmission.
+ *
+ * <p>Can be re-used serially across {@link org.apache.beam.fn.v1.BeamFnApi.ProcessBundleRequest}s.
+ * For each request, call {@link #registerForOutput()} to start and call {@link #close()} to finish.
+ */
+public class BeamFnDataWriteRunner<InputT> {
+
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+  private static final String URN = "urn:org.apache.beam:sink:runner:0.1";
+
+  /** A registrar which provides a factory to handle writing to the Fn Api Data Plane. */
+  @AutoService(PTransformRunnerFactory.Registrar.class)
+  public static class Registrar implements
+      PTransformRunnerFactory.Registrar {
+
+    @Override
+    public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
+      return ImmutableMap.of(URN, new Factory());
+    }
+  }
+
+  /** A factory for {@link BeamFnDataWriteRunner}s. */
+  static class Factory<InputT>
+      implements PTransformRunnerFactory<BeamFnDataWriteRunner<InputT>> {
+
+    @Override
+    public BeamFnDataWriteRunner<InputT> createRunnerForPTransform(
+        PipelineOptions pipelineOptions,
+        BeamFnDataClient beamFnDataClient,
+        String pTransformId,
+        RunnerApi.PTransform pTransform,
+        Supplier<String> processBundleInstructionId,
+        Map<String, RunnerApi.PCollection> pCollections,
+        Map<String, RunnerApi.Coder> coders,
+        Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
+        Consumer<ThrowingRunnable> addStartFunction,
+        Consumer<ThrowingRunnable> addFinishFunction) throws IOException {
+      BeamFnApi.Target target = BeamFnApi.Target.newBuilder()
+          .setPrimitiveTransformReference(pTransformId)
+          .setName(getOnlyElement(pTransform.getInputsMap().keySet()))
+          .build();
+      RunnerApi.Coder coderSpec = coders.get(
+          pCollections.get(getOnlyElement(pTransform.getInputsMap().values())).getCoderId());
+      BeamFnDataWriteRunner<InputT> runner =
+          new BeamFnDataWriteRunner<>(
+              pTransform.getSpec(),
+              processBundleInstructionId,
+              target,
+              coderSpec,
+              beamFnDataClient);
+      addStartFunction.accept(runner::registerForOutput);
+      pCollectionIdsToConsumers.put(
+          getOnlyElement(pTransform.getInputsMap().values()),
+          (ThrowingConsumer)
+              (ThrowingConsumer<WindowedValue<InputT>>) runner::consume);
+      addFinishFunction.accept(runner::close);
+      return runner;
+    }
+  }
+
+  private final BeamFnApi.ApiServiceDescriptor apiServiceDescriptor;
+  private final BeamFnApi.Target outputTarget;
+  private final Coder<WindowedValue<InputT>> coder;
+  private final BeamFnDataClient beamFnDataClientFactory;
+  private final Supplier<String> processBundleInstructionIdSupplier;
+
+  private CloseableThrowingConsumer<WindowedValue<InputT>> consumer;
+
+  BeamFnDataWriteRunner(
+      RunnerApi.FunctionSpec functionSpec,
+      Supplier<String> processBundleInstructionIdSupplier,
+      BeamFnApi.Target outputTarget,
+      RunnerApi.Coder coderSpec,
+      BeamFnDataClient beamFnDataClientFactory)
+          throws IOException {
+    this.apiServiceDescriptor = functionSpec.getParameter().unpack(BeamFnApi.RemoteGrpcPort.class)
+        .getApiServiceDescriptor();
+    this.beamFnDataClientFactory = beamFnDataClientFactory;
+    this.processBundleInstructionIdSupplier = processBundleInstructionIdSupplier;
+    this.outputTarget = outputTarget;
+
+    @SuppressWarnings("unchecked")
+    Coder<WindowedValue<InputT>> coder =
+        (Coder<WindowedValue<InputT>>)
+            CloudObjects.coderFromCloudObject(
+                CloudObject.fromSpec(
+                    OBJECT_MAPPER.readValue(
+                        coderSpec
+                            .getSpec()
+                            .getSpec()
+                            .getParameter()
+                            .unpack(BytesValue.class)
+                            .getValue()
+                            .newInput(),
+                        Map.class)));
+    this.coder = coder;
+  }
+
+  public void registerForOutput() {
+    consumer = beamFnDataClientFactory.forOutboundConsumer(
+        apiServiceDescriptor,
+        KV.of(processBundleInstructionIdSupplier.get(), outputTarget),
+        coder);
+  }
+
+  public void close() throws Exception {
+    consumer.close();
+  }
+
+  public void consume(WindowedValue<InputT> value) throws Exception {
+    consumer.accept(value);
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BoundedSourceRunner.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BoundedSourceRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BoundedSourceRunner.java
new file mode 100644
index 0000000..977e803
--- /dev/null
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BoundedSourceRunner.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.fn.harness;
+
+import com.google.auto.service.AutoService;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Multimap;
+import com.google.protobuf.BytesValue;
+import com.google.protobuf.InvalidProtocolBufferException;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Map;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+import org.apache.beam.fn.harness.data.BeamFnDataClient;
+import org.apache.beam.fn.harness.fn.ThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingRunnable;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.io.Source.Reader;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.SerializableUtils;
+import org.apache.beam.sdk.util.WindowedValue;
+
+/**
+ * A runner which creates {@link Reader}s for each {@link BoundedSource} sent as an input and
+ * executes the {@link Reader}s read loop.
+ */
+public class BoundedSourceRunner<InputT extends BoundedSource<OutputT>, OutputT> {
+
+  private static final String URN = "urn:org.apache.beam:source:java:0.1";
+
+  /** A registrar which provides a factory to handle Java {@link BoundedSource}s. */
+  @AutoService(PTransformRunnerFactory.Registrar.class)
+  public static class Registrar implements
+      PTransformRunnerFactory.Registrar {
+
+    @Override
+    public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
+      return ImmutableMap.of(URN, new Factory());
+    }
+  }
+
+  /** A factory for {@link BoundedSourceRunner}. */
+  static class Factory<InputT extends BoundedSource<OutputT>, OutputT>
+      implements PTransformRunnerFactory<BoundedSourceRunner<InputT, OutputT>> {
+    @Override
+    public BoundedSourceRunner<InputT, OutputT> createRunnerForPTransform(
+        PipelineOptions pipelineOptions,
+        BeamFnDataClient beamFnDataClient,
+        String pTransformId,
+        RunnerApi.PTransform pTransform,
+        Supplier<String> processBundleInstructionId,
+        Map<String, RunnerApi.PCollection> pCollections,
+        Map<String, RunnerApi.Coder> coders,
+        Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
+        Consumer<ThrowingRunnable> addStartFunction,
+        Consumer<ThrowingRunnable> addFinishFunction) {
+
+      ImmutableList.Builder<ThrowingConsumer<WindowedValue<?>>> consumers = ImmutableList.builder();
+      for (String pCollectionId : pTransform.getOutputsMap().values()) {
+        consumers.addAll(pCollectionIdsToConsumers.get(pCollectionId));
+      }
+
+      @SuppressWarnings({"rawtypes", "unchecked"})
+      BoundedSourceRunner<InputT, OutputT> runner = new BoundedSourceRunner(
+          pipelineOptions,
+          pTransform.getSpec(),
+          consumers.build());
+
+      // TODO: Remove and replace with source being sent across gRPC port
+      addStartFunction.accept(runner::start);
+
+      ThrowingConsumer runReadLoop =
+          (ThrowingConsumer<WindowedValue<InputT>>) runner::runReadLoop;
+      for (String pCollectionId : pTransform.getInputsMap().values()) {
+        pCollectionIdsToConsumers.put(
+            pCollectionId,
+            runReadLoop);
+      }
+
+      return runner;
+    }
+  }
+
+  private final PipelineOptions pipelineOptions;
+  private final RunnerApi.FunctionSpec definition;
+  private final Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers;
+
+  BoundedSourceRunner(
+      PipelineOptions pipelineOptions,
+      RunnerApi.FunctionSpec definition,
+      Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers) {
+    this.pipelineOptions = pipelineOptions;
+    this.definition = definition;
+    this.consumers = consumers;
+  }
+
+  /**
+   * The runner harness is meant to send the source over the Beam Fn Data API which would be
+   * consumed by the {@link #runReadLoop}. Drop this method once the runner harness sends the
+   * source instead of unpacking it from the data block of the function specification.
+   */
+  @Deprecated
+  public void start() throws Exception {
+    try {
+      // The representation here is defined as the java serialized representation of the
+      // bounded source object packed into a protobuf Any using a protobuf BytesValue wrapper.
+      byte[] bytes = definition.getParameter().unpack(BytesValue.class).getValue().toByteArray();
+      @SuppressWarnings("unchecked")
+      InputT boundedSource =
+          (InputT) SerializableUtils.deserializeFromByteArray(bytes, definition.toString());
+      runReadLoop(WindowedValue.valueInGlobalWindow(boundedSource));
+    } catch (InvalidProtocolBufferException e) {
+      throw new IOException(
+          String.format("Failed to decode %s, expected %s",
+              definition.getParameter().getTypeUrl(), BytesValue.getDescriptor().getFullName()),
+          e);
+    }
+  }
+
+  /**
+   * Creates a {@link Reader} for each {@link BoundedSource} and executes the {@link Reader}s
+   * read loop. See {@link Reader} for further details of the read loop.
+   *
+   * <p>Propagates any exceptions caused during reading or processing via a consumer to the
+   * caller.
+   */
+  public void runReadLoop(WindowedValue<InputT> value) throws Exception {
+    try (Reader<OutputT> reader = value.getValue().createReader(pipelineOptions)) {
+      if (!reader.start()) {
+        // Reader has no data, immediately return
+        return;
+      }
+      do {
+        // TODO: Should this use the input window as the window for all the outputs?
+        WindowedValue<OutputT> nextValue = WindowedValue.timestampedValueInGlobalWindow(
+            reader.getCurrent(), reader.getCurrentTimestamp());
+        for (ThrowingConsumer<WindowedValue<OutputT>> consumer : consumers) {
+          consumer.accept(nextValue);
+        }
+      } while (reader.advance());
+    }
+  }
+
+  @Override
+  public String toString() {
+    return definition.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
new file mode 100644
index 0000000..97bd71c
--- /dev/null
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/FnApiDoFnRunner.java
@@ -0,0 +1,548 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.fn.harness;
+
+import static com.google.common.base.Preconditions.checkArgument;
+
+import com.google.auto.service.AutoService;
+import com.google.common.collect.Collections2;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableMultimap;
+import com.google.common.collect.Multimap;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.BytesValue;
+import com.google.protobuf.InvalidProtocolBufferException;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Objects;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+import org.apache.beam.fn.harness.data.BeamFnDataClient;
+import org.apache.beam.fn.harness.fn.ThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingRunnable;
+import org.apache.beam.runners.core.DoFnRunner;
+import org.apache.beam.runners.core.construction.ParDoTranslation;
+import org.apache.beam.runners.dataflow.util.DoFnInfo;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.state.State;
+import org.apache.beam.sdk.state.TimeDomain;
+import org.apache.beam.sdk.state.Timer;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.DoFn.OnTimerContext;
+import org.apache.beam.sdk.transforms.DoFn.ProcessContext;
+import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
+import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
+import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.PaneInfo;
+import org.apache.beam.sdk.util.SerializableUtils;
+import org.apache.beam.sdk.util.UserCodeException;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.PCollectionView;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.WindowingStrategy;
+import org.joda.time.Instant;
+
+/**
+ * A {@link DoFnRunner} specific to integrating with the Fn Api. This is to remove the layers
+ * of abstraction caused by StateInternals/TimerInternals since they model state and timer
+ * concepts differently.
+ */
+public class FnApiDoFnRunner<InputT, OutputT> implements DoFnRunner<InputT, OutputT> {
+  /**
+   * A registrar which provides a factory to handle Java {@link DoFn}s.
+   */
+  @AutoService(PTransformRunnerFactory.Registrar.class)
+  public static class Registrar implements
+      PTransformRunnerFactory.Registrar {
+
+    @Override
+    public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
+      return ImmutableMap.of(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN, new Factory());
+    }
+  }
+
+  /** A factory for {@link FnApiDoFnRunner}. */
+  static class Factory<InputT, OutputT>
+      implements PTransformRunnerFactory<DoFnRunner<InputT, OutputT>> {
+
+    @Override
+    public DoFnRunner<InputT, OutputT> createRunnerForPTransform(
+        PipelineOptions pipelineOptions,
+        BeamFnDataClient beamFnDataClient,
+        String pTransformId,
+        RunnerApi.PTransform pTransform,
+        Supplier<String> processBundleInstructionId,
+        Map<String, RunnerApi.PCollection> pCollections,
+        Map<String, RunnerApi.Coder> coders,
+        Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
+        Consumer<ThrowingRunnable> addStartFunction,
+        Consumer<ThrowingRunnable> addFinishFunction) {
+
+      // For every output PCollection, create a map from output name to Consumer
+      ImmutableMap.Builder<String, Collection<ThrowingConsumer<WindowedValue<?>>>>
+          outputMapBuilder = ImmutableMap.builder();
+      for (Map.Entry<String, String> entry : pTransform.getOutputsMap().entrySet()) {
+        outputMapBuilder.put(
+            entry.getKey(),
+            pCollectionIdsToConsumers.get(entry.getValue()));
+      }
+      ImmutableMap<String, Collection<ThrowingConsumer<WindowedValue<?>>>> outputMap =
+          outputMapBuilder.build();
+
+      // Get the DoFnInfo from the serialized blob.
+      ByteString serializedFn;
+      try {
+        serializedFn = pTransform.getSpec().getParameter().unpack(BytesValue.class).getValue();
+      } catch (InvalidProtocolBufferException e) {
+        throw new IllegalArgumentException(
+            String.format("Unable to unwrap DoFn %s", pTransform.getSpec()), e);
+      }
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      DoFnInfo<InputT, OutputT> doFnInfo = (DoFnInfo) SerializableUtils.deserializeFromByteArray(
+          serializedFn.toByteArray(), "DoFnInfo");
+
+      // Verify that the DoFnInfo tag to output map matches the output map on the PTransform.
+      checkArgument(
+          Objects.equals(
+              new HashSet<>(Collections2.transform(outputMap.keySet(), Long::parseLong)),
+              doFnInfo.getOutputMap().keySet()),
+          "Unexpected mismatch between transform output map %s and DoFnInfo output map %s.",
+          outputMap.keySet(),
+          doFnInfo.getOutputMap());
+
+      ImmutableMultimap.Builder<TupleTag<?>,
+          ThrowingConsumer<WindowedValue<?>>> tagToOutputMapBuilder =
+          ImmutableMultimap.builder();
+      for (Map.Entry<Long, TupleTag<?>> entry : doFnInfo.getOutputMap().entrySet()) {
+        @SuppressWarnings({"unchecked", "rawtypes"})
+        Collection<ThrowingConsumer<WindowedValue<?>>> consumers =
+            outputMap.get(Long.toString(entry.getKey()));
+        tagToOutputMapBuilder.putAll(entry.getValue(), consumers);
+      }
+
+      ImmutableMultimap<TupleTag<?>, ThrowingConsumer<WindowedValue<?>>> tagToOutputMap =
+          tagToOutputMapBuilder.build();
+
+      @SuppressWarnings({"unchecked", "rawtypes"})
+      DoFnRunner<InputT, OutputT> runner = new FnApiDoFnRunner<>(
+          pipelineOptions,
+          doFnInfo.getDoFn(),
+          (Collection<ThrowingConsumer<WindowedValue<OutputT>>>) (Collection)
+              tagToOutputMap.get(doFnInfo.getOutputMap().get(doFnInfo.getMainOutput())),
+          tagToOutputMap,
+          doFnInfo.getWindowingStrategy());
+
+      // Register the appropriate handlers.
+      addStartFunction.accept(runner::startBundle);
+      for (String pcollectionId : pTransform.getInputsMap().values()) {
+        pCollectionIdsToConsumers.put(
+            pcollectionId,
+            (ThrowingConsumer) (ThrowingConsumer<WindowedValue<InputT>>) runner::processElement);
+      }
+      addFinishFunction.accept(runner::finishBundle);
+      return runner;
+    }
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////
+
+  private final PipelineOptions pipelineOptions;
+  private final DoFn<InputT, OutputT> doFn;
+  private final Collection<ThrowingConsumer<WindowedValue<OutputT>>> mainOutputConsumers;
+  private final Multimap<TupleTag<?>, ThrowingConsumer<WindowedValue<?>>> outputMap;
+  private final DoFnInvoker<InputT, OutputT> doFnInvoker;
+  private final StartBundleContext startBundleContext;
+  private final ProcessBundleContext processBundleContext;
+  private final FinishBundleContext finishBundleContext;
+
+  /**
+   * The lifetime of this member is only valid during {@link #processElement(WindowedValue)}.
+   */
+  private WindowedValue<InputT> currentElement;
+
+  /**
+   * The lifetime of this member is only valid during {@link #processElement(WindowedValue)}.
+   */
+  private BoundedWindow currentWindow;
+
+  FnApiDoFnRunner(
+      PipelineOptions pipelineOptions,
+      DoFn<InputT, OutputT> doFn,
+      Collection<ThrowingConsumer<WindowedValue<OutputT>>> mainOutputConsumers,
+      Multimap<TupleTag<?>, ThrowingConsumer<WindowedValue<?>>> outputMap,
+      WindowingStrategy windowingStrategy) {
+    this.pipelineOptions = pipelineOptions;
+    this.doFn = doFn;
+    this.mainOutputConsumers = mainOutputConsumers;
+    this.outputMap = outputMap;
+    this.doFnInvoker = DoFnInvokers.invokerFor(doFn);
+    this.startBundleContext = new StartBundleContext();
+    this.processBundleContext = new ProcessBundleContext();
+    this.finishBundleContext = new FinishBundleContext();
+  }
+
+  @Override
+  public void startBundle() {
+    doFnInvoker.invokeStartBundle(startBundleContext);
+  }
+
+  @Override
+  public void processElement(WindowedValue<InputT> elem) {
+    currentElement = elem;
+    try {
+      Iterator<BoundedWindow> windowIterator =
+          (Iterator<BoundedWindow>) elem.getWindows().iterator();
+      while (windowIterator.hasNext()) {
+        currentWindow = windowIterator.next();
+        doFnInvoker.invokeProcessElement(processBundleContext);
+      }
+    } finally {
+      currentElement = null;
+      currentWindow = null;
+    }
+  }
+
+  @Override
+  public void onTimer(
+      String timerId,
+      BoundedWindow window,
+      Instant timestamp,
+      TimeDomain timeDomain) {
+    throw new UnsupportedOperationException("TODO: Add support for timers");
+  }
+
+  @Override
+  public void finishBundle() {
+    doFnInvoker.invokeFinishBundle(finishBundleContext);
+  }
+
+  /**
+   * Outputs the given element to the specified set of consumers wrapping any exceptions.
+   */
+  private <T> void outputTo(
+      Collection<ThrowingConsumer<WindowedValue<T>>> consumers,
+      WindowedValue<T> output) {
+    Iterator<ThrowingConsumer<WindowedValue<T>>> consumerIterator;
+    try {
+      for (ThrowingConsumer<WindowedValue<T>> consumer : consumers) {
+        consumer.accept(output);
+      }
+    } catch (Throwable t) {
+      throw UserCodeException.wrap(t);
+    }
+  }
+
+  /**
+   * Provides arguments for a {@link DoFnInvoker} for {@link DoFn.StartBundle @StartBundle}.
+   */
+  private class StartBundleContext
+      extends DoFn<InputT, OutputT>.StartBundleContext
+      implements DoFnInvoker.ArgumentProvider<InputT, OutputT> {
+
+    private StartBundleContext() {
+      doFn.super();
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public PipelineOptions pipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public BoundedWindow window() {
+      throw new UnsupportedOperationException(
+          "Cannot access window outside of @ProcessElement and @OnTimer methods.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.StartBundleContext startBundleContext(
+        DoFn<InputT, OutputT> doFn) {
+      return this;
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.FinishBundleContext finishBundleContext(
+        DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access FinishBundleContext outside of @FinishBundle method.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.ProcessContext processContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access ProcessContext outside of @ProcessElement method.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.OnTimerContext onTimerContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access OnTimerContext outside of @OnTimer methods.");
+    }
+
+    @Override
+    public RestrictionTracker<?> restrictionTracker() {
+      throw new UnsupportedOperationException(
+          "Cannot access RestrictionTracker outside of @ProcessElement method.");
+    }
+
+    @Override
+    public State state(String stateId) {
+      throw new UnsupportedOperationException(
+          "Cannot access state outside of @ProcessElement and @OnTimer methods.");
+    }
+
+    @Override
+    public Timer timer(String timerId) {
+      throw new UnsupportedOperationException(
+          "Cannot access timers outside of @ProcessElement and @OnTimer methods.");
+    }
+  }
+
+  /**
+   * Provides arguments for a {@link DoFnInvoker} for {@link DoFn.ProcessElement @ProcessElement}.
+   */
+  private class ProcessBundleContext
+      extends DoFn<InputT, OutputT>.ProcessContext
+      implements DoFnInvoker.ArgumentProvider<InputT, OutputT> {
+
+    private ProcessBundleContext() {
+      doFn.super();
+    }
+
+    @Override
+    public BoundedWindow window() {
+      return currentWindow;
+    }
+
+    @Override
+    public DoFn.StartBundleContext startBundleContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access StartBundleContext outside of @StartBundle method.");
+    }
+
+    @Override
+    public DoFn.FinishBundleContext finishBundleContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access FinishBundleContext outside of @FinishBundle method.");
+    }
+
+    @Override
+    public ProcessContext processContext(DoFn<InputT, OutputT> doFn) {
+      return this;
+    }
+
+    @Override
+    public OnTimerContext onTimerContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException("TODO: Add support for timers");
+    }
+
+    @Override
+    public RestrictionTracker<?> restrictionTracker() {
+      throw new UnsupportedOperationException("TODO: Add support for SplittableDoFn");
+    }
+
+    @Override
+    public State state(String stateId) {
+      throw new UnsupportedOperationException("TODO: Add support for state");
+    }
+
+    @Override
+    public Timer timer(String timerId) {
+      throw new UnsupportedOperationException("TODO: Add support for timers");
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public PipelineOptions pipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public void output(OutputT output) {
+      outputTo(mainOutputConsumers,
+          WindowedValue.of(
+              output,
+              currentElement.getTimestamp(),
+              currentWindow,
+              currentElement.getPane()));
+    }
+
+    @Override
+    public void outputWithTimestamp(OutputT output, Instant timestamp) {
+      outputTo(mainOutputConsumers,
+          WindowedValue.of(
+              output,
+              timestamp,
+              currentWindow,
+              currentElement.getPane()));
+    }
+
+    @Override
+    public <T> void output(TupleTag<T> tag, T output) {
+      Collection<ThrowingConsumer<WindowedValue<T>>> consumers = (Collection) outputMap.get(tag);
+      if (consumers == null) {
+        throw new IllegalArgumentException(String.format("Unknown output tag %s", tag));
+      }
+      outputTo(consumers,
+          WindowedValue.of(
+              output,
+              currentElement.getTimestamp(),
+              currentWindow,
+              currentElement.getPane()));
+    }
+
+    @Override
+    public <T> void outputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
+      Collection<ThrowingConsumer<WindowedValue<T>>> consumers = (Collection) outputMap.get(tag);
+      if (consumers == null) {
+        throw new IllegalArgumentException(String.format("Unknown output tag %s", tag));
+      }
+      outputTo(consumers,
+          WindowedValue.of(
+              output,
+              timestamp,
+              currentWindow,
+              currentElement.getPane()));
+    }
+
+    @Override
+    public InputT element() {
+      return currentElement.getValue();
+    }
+
+    @Override
+    public <T> T sideInput(PCollectionView<T> view) {
+      throw new UnsupportedOperationException("TODO: Support side inputs");
+    }
+
+    @Override
+    public Instant timestamp() {
+      return currentElement.getTimestamp();
+    }
+
+    @Override
+    public PaneInfo pane() {
+      return currentElement.getPane();
+    }
+
+    @Override
+    public void updateWatermark(Instant watermark) {
+      throw new UnsupportedOperationException("TODO: Add support for SplittableDoFn");
+    }
+  }
+
+  /**
+   * Provides arguments for a {@link DoFnInvoker} for {@link DoFn.FinishBundle @FinishBundle}.
+   */
+  private class FinishBundleContext
+      extends DoFn<InputT, OutputT>.FinishBundleContext
+      implements DoFnInvoker.ArgumentProvider<InputT, OutputT> {
+
+    private FinishBundleContext() {
+      doFn.super();
+    }
+
+    @Override
+    public PipelineOptions getPipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public PipelineOptions pipelineOptions() {
+      return pipelineOptions;
+    }
+
+    @Override
+    public BoundedWindow window() {
+      throw new UnsupportedOperationException(
+          "Cannot access window outside of @ProcessElement and @OnTimer methods.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.StartBundleContext startBundleContext(
+        DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access StartBundleContext outside of @StartBundle method.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.FinishBundleContext finishBundleContext(
+        DoFn<InputT, OutputT> doFn) {
+      return this;
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.ProcessContext processContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access ProcessContext outside of @ProcessElement method.");
+    }
+
+    @Override
+    public DoFn<InputT, OutputT>.OnTimerContext onTimerContext(DoFn<InputT, OutputT> doFn) {
+      throw new UnsupportedOperationException(
+          "Cannot access OnTimerContext outside of @OnTimer methods.");
+    }
+
+    @Override
+    public RestrictionTracker<?> restrictionTracker() {
+      throw new UnsupportedOperationException(
+          "Cannot access RestrictionTracker outside of @ProcessElement method.");
+    }
+
+    @Override
+    public State state(String stateId) {
+      throw new UnsupportedOperationException(
+          "Cannot access state outside of @ProcessElement and @OnTimer methods.");
+    }
+
+    @Override
+    public Timer timer(String timerId) {
+      throw new UnsupportedOperationException(
+          "Cannot access timers outside of @ProcessElement and @OnTimer methods.");
+    }
+
+    @Override
+    public void output(OutputT output, Instant timestamp, BoundedWindow window) {
+      outputTo(mainOutputConsumers,
+          WindowedValue.of(output, timestamp, window, PaneInfo.NO_FIRING));
+    }
+
+    @Override
+    public <T> void output(TupleTag<T> tag, T output, Instant timestamp, BoundedWindow window) {
+      Collection<ThrowingConsumer<WindowedValue<T>>> consumers = (Collection) outputMap.get(tag);
+      if (consumers == null) {
+        throw new IllegalArgumentException(String.format("Unknown output tag %s", tag));
+      }
+      outputTo(consumers,
+          WindowedValue.of(output, timestamp, window, PaneInfo.NO_FIRING));
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/PTransformRunnerFactory.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/PTransformRunnerFactory.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/PTransformRunnerFactory.java
new file mode 100644
index 0000000..7cf0610
--- /dev/null
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/PTransformRunnerFactory.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.fn.harness;
+
+import com.google.common.collect.Multimap;
+import java.io.IOException;
+import java.util.Map;
+import java.util.function.Consumer;
+import java.util.function.Supplier;
+import org.apache.beam.fn.harness.data.BeamFnDataClient;
+import org.apache.beam.fn.harness.fn.ThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingRunnable;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.util.WindowedValue;
+
+/**
+ * A factory able to instantiate an appropriate handler for a given PTransform.
+ */
+public interface PTransformRunnerFactory<T> {
+
+  /**
+   * Creates and returns a handler for a given PTransform. Note that the handler must support
+   * processing multiple bundles. The handler will be discarded if an error is thrown during
+   * element processing, or during execution of start/finish.
+   *
+   * @param pipelineOptions Pipeline options
+   * @param beamFnDataClient
+   * @param pTransformId The id of the PTransform.
+   * @param pTransform The PTransform definition.
+   * @param processBundleInstructionId A supplier containing the active process bundle instruction
+   * id.
+   * @param pCollections A mapping from PCollection id to PCollection definition.
+   * @param coders A mapping from coder id to coder definition.
+   * @param pCollectionIdsToConsumers A mapping from PCollection id to a collection of consumers.
+   * Note that if this handler is a consumer, it should register itself within this multimap under
+   * the appropriate PCollection ids. Also note that all output consumers needed by this PTransform
+   * (based on the values of the {@link RunnerApi.PTransform#getOutputsMap()} will have already
+   * registered within this multimap.
+   * @param addStartFunction A consumer to register a start bundle handler with.
+   * @param addFinishFunction A consumer to register a finish bundle handler with.
+   */
+  T createRunnerForPTransform(
+      PipelineOptions pipelineOptions,
+      BeamFnDataClient beamFnDataClient,
+      String pTransformId,
+      RunnerApi.PTransform pTransform,
+      Supplier<String> processBundleInstructionId,
+      Map<String, RunnerApi.PCollection> pCollections,
+      Map<String, RunnerApi.Coder> coders,
+      Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
+      Consumer<ThrowingRunnable> addStartFunction,
+      Consumer<ThrowingRunnable> addFinishFunction) throws IOException;
+
+  /**
+   * A registrar which can return a mapping from {@link RunnerApi.FunctionSpec#getUrn()} to
+   * a factory capable of instantiating an appropriate handler.
+   */
+  interface Registrar {
+    /**
+     * Returns a mapping from {@link RunnerApi.FunctionSpec#getUrn()} to a factory capable of
+     * instantiating an appropriate handler.
+     */
+    Map<String, PTransformRunnerFactory> getPTransformRunnerFactories();
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
index 2a9cef8..1e73570 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/control/ProcessBundleHandler.java
@@ -34,12 +34,12 @@ import java.util.Set;
 import java.util.function.Consumer;
 import java.util.function.Function;
 import java.util.function.Supplier;
+import org.apache.beam.fn.harness.PTransformRunnerFactory;
+import org.apache.beam.fn.harness.PTransformRunnerFactory.Registrar;
 import org.apache.beam.fn.harness.data.BeamFnDataClient;
 import org.apache.beam.fn.harness.fn.ThrowingConsumer;
 import org.apache.beam.fn.harness.fn.ThrowingRunnable;
 import org.apache.beam.fn.v1.BeamFnApi;
-import org.apache.beam.runners.core.PTransformRunnerFactory;
-import org.apache.beam.runners.core.PTransformRunnerFactory.Registrar;
 import org.apache.beam.sdk.common.runner.v1.RunnerApi;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.util.WindowedValue;

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BeamFnDataReadRunner.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BeamFnDataReadRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BeamFnDataReadRunner.java
deleted file mode 100644
index 9339347..0000000
--- a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BeamFnDataReadRunner.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.core;
-
-import static com.google.common.collect.Iterables.getOnlyElement;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.auto.service.AutoService;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Multimap;
-import com.google.protobuf.BytesValue;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Map;
-import java.util.concurrent.CompletableFuture;
-import java.util.function.Consumer;
-import java.util.function.Supplier;
-import org.apache.beam.fn.harness.data.BeamFnDataClient;
-import org.apache.beam.fn.harness.fn.ThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.fn.v1.BeamFnApi;
-import org.apache.beam.runners.dataflow.util.CloudObject;
-import org.apache.beam.runners.dataflow.util.CloudObjects;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.common.runner.v1.RunnerApi;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.KV;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * Registers as a consumer for data over the Beam Fn API. Multiplexes any received data
- * to all consumers in the specified output map.
- *
- * <p>Can be re-used serially across {@link org.apache.beam.fn.v1.BeamFnApi.ProcessBundleRequest}s.
- * For each request, call {@link #registerInputLocation()} to start and call
- * {@link #blockTillReadFinishes()} to finish.
- */
-public class BeamFnDataReadRunner<OutputT> {
-
-  private static final Logger LOG = LoggerFactory.getLogger(BeamFnDataReadRunner.class);
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-  private static final String URN = "urn:org.apache.beam:source:runner:0.1";
-
-  /** A registrar which provides a factory to handle reading from the Fn Api Data Plane. */
-  @AutoService(PTransformRunnerFactory.Registrar.class)
-  public static class Registrar implements
-      PTransformRunnerFactory.Registrar {
-
-    @Override
-    public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
-      return ImmutableMap.of(URN, new Factory());
-    }
-  }
-
-  /** A factory for {@link BeamFnDataReadRunner}s. */
-  static class Factory<OutputT>
-      implements PTransformRunnerFactory<BeamFnDataReadRunner<OutputT>> {
-
-    @Override
-    public BeamFnDataReadRunner<OutputT> createRunnerForPTransform(
-        PipelineOptions pipelineOptions,
-        BeamFnDataClient beamFnDataClient,
-        String pTransformId,
-        RunnerApi.PTransform pTransform,
-        Supplier<String> processBundleInstructionId,
-        Map<String, RunnerApi.PCollection> pCollections,
-        Map<String, RunnerApi.Coder> coders,
-        Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
-        Consumer<ThrowingRunnable> addStartFunction,
-        Consumer<ThrowingRunnable> addFinishFunction) throws IOException {
-
-      BeamFnApi.Target target = BeamFnApi.Target.newBuilder()
-          .setPrimitiveTransformReference(pTransformId)
-          .setName(getOnlyElement(pTransform.getOutputsMap().keySet()))
-          .build();
-      RunnerApi.Coder coderSpec = coders.get(pCollections.get(
-          getOnlyElement(pTransform.getOutputsMap().values())).getCoderId());
-      Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers =
-          (Collection) pCollectionIdsToConsumers.get(
-              getOnlyElement(pTransform.getOutputsMap().values()));
-
-      BeamFnDataReadRunner<OutputT> runner = new BeamFnDataReadRunner<>(
-          pTransform.getSpec(),
-          processBundleInstructionId,
-          target,
-          coderSpec,
-          beamFnDataClient,
-          consumers);
-      addStartFunction.accept(runner::registerInputLocation);
-      addFinishFunction.accept(runner::blockTillReadFinishes);
-      return runner;
-    }
-  }
-
-  private final BeamFnApi.ApiServiceDescriptor apiServiceDescriptor;
-  private final Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers;
-  private final Supplier<String> processBundleInstructionIdSupplier;
-  private final BeamFnDataClient beamFnDataClientFactory;
-  private final Coder<WindowedValue<OutputT>> coder;
-  private final BeamFnApi.Target inputTarget;
-
-  private CompletableFuture<Void> readFuture;
-
-  BeamFnDataReadRunner(
-      RunnerApi.FunctionSpec functionSpec,
-      Supplier<String> processBundleInstructionIdSupplier,
-      BeamFnApi.Target inputTarget,
-      RunnerApi.Coder coderSpec,
-      BeamFnDataClient beamFnDataClientFactory,
-      Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers)
-          throws IOException {
-    this.apiServiceDescriptor = functionSpec.getParameter().unpack(BeamFnApi.RemoteGrpcPort.class)
-        .getApiServiceDescriptor();
-    this.inputTarget = inputTarget;
-    this.processBundleInstructionIdSupplier = processBundleInstructionIdSupplier;
-    this.beamFnDataClientFactory = beamFnDataClientFactory;
-    this.consumers = consumers;
-
-    @SuppressWarnings("unchecked")
-    Coder<WindowedValue<OutputT>> coder =
-        (Coder<WindowedValue<OutputT>>)
-            CloudObjects.coderFromCloudObject(
-                CloudObject.fromSpec(
-                    OBJECT_MAPPER.readValue(
-                        coderSpec
-                            .getSpec()
-                            .getSpec()
-                            .getParameter()
-                            .unpack(BytesValue.class)
-                            .getValue()
-                            .newInput(),
-                        Map.class)));
-    this.coder = coder;
-  }
-
-  public void registerInputLocation() {
-    this.readFuture = beamFnDataClientFactory.forInboundConsumer(
-        apiServiceDescriptor,
-        KV.of(processBundleInstructionIdSupplier.get(), inputTarget),
-        coder,
-        this::multiplexToConsumers);
-  }
-
-  public void blockTillReadFinishes() throws Exception {
-    LOG.debug("Waiting for process bundle instruction {} and target {} to close.",
-        processBundleInstructionIdSupplier.get(), inputTarget);
-    readFuture.get();
-  }
-
-  private void multiplexToConsumers(WindowedValue<OutputT> value) throws Exception {
-    for (ThrowingConsumer<WindowedValue<OutputT>> consumer : consumers) {
-      consumer.accept(value);
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BeamFnDataWriteRunner.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BeamFnDataWriteRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BeamFnDataWriteRunner.java
deleted file mode 100644
index c2a996b..0000000
--- a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BeamFnDataWriteRunner.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.core;
-
-import static com.google.common.collect.Iterables.getOnlyElement;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.auto.service.AutoService;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Multimap;
-import com.google.protobuf.BytesValue;
-import java.io.IOException;
-import java.util.Map;
-import java.util.function.Consumer;
-import java.util.function.Supplier;
-import org.apache.beam.fn.harness.data.BeamFnDataClient;
-import org.apache.beam.fn.harness.fn.CloseableThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.fn.v1.BeamFnApi;
-import org.apache.beam.runners.dataflow.util.CloudObject;
-import org.apache.beam.runners.dataflow.util.CloudObjects;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.common.runner.v1.RunnerApi;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.KV;
-
-/**
- * Registers as a consumer with the Beam Fn Data Api. Consumes elements and encodes them for
- * transmission.
- *
- * <p>Can be re-used serially across {@link org.apache.beam.fn.v1.BeamFnApi.ProcessBundleRequest}s.
- * For each request, call {@link #registerForOutput()} to start and call {@link #close()} to finish.
- */
-public class BeamFnDataWriteRunner<InputT> {
-
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-  private static final String URN = "urn:org.apache.beam:sink:runner:0.1";
-
-  /** A registrar which provides a factory to handle writing to the Fn Api Data Plane. */
-  @AutoService(PTransformRunnerFactory.Registrar.class)
-  public static class Registrar implements
-      PTransformRunnerFactory.Registrar {
-
-    @Override
-    public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
-      return ImmutableMap.of(URN, new Factory());
-    }
-  }
-
-  /** A factory for {@link BeamFnDataWriteRunner}s. */
-  static class Factory<InputT>
-      implements PTransformRunnerFactory<BeamFnDataWriteRunner<InputT>> {
-
-    @Override
-    public BeamFnDataWriteRunner<InputT> createRunnerForPTransform(
-        PipelineOptions pipelineOptions,
-        BeamFnDataClient beamFnDataClient,
-        String pTransformId,
-        RunnerApi.PTransform pTransform,
-        Supplier<String> processBundleInstructionId,
-        Map<String, RunnerApi.PCollection> pCollections,
-        Map<String, RunnerApi.Coder> coders,
-        Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
-        Consumer<ThrowingRunnable> addStartFunction,
-        Consumer<ThrowingRunnable> addFinishFunction) throws IOException {
-      BeamFnApi.Target target = BeamFnApi.Target.newBuilder()
-          .setPrimitiveTransformReference(pTransformId)
-          .setName(getOnlyElement(pTransform.getInputsMap().keySet()))
-          .build();
-      RunnerApi.Coder coderSpec = coders.get(
-          pCollections.get(getOnlyElement(pTransform.getInputsMap().values())).getCoderId());
-      BeamFnDataWriteRunner<InputT> runner =
-          new BeamFnDataWriteRunner<>(
-              pTransform.getSpec(),
-              processBundleInstructionId,
-              target,
-              coderSpec,
-              beamFnDataClient);
-      addStartFunction.accept(runner::registerForOutput);
-      pCollectionIdsToConsumers.put(
-          getOnlyElement(pTransform.getInputsMap().values()),
-          (ThrowingConsumer)
-              (ThrowingConsumer<WindowedValue<InputT>>) runner::consume);
-      addFinishFunction.accept(runner::close);
-      return runner;
-    }
-  }
-
-  private final BeamFnApi.ApiServiceDescriptor apiServiceDescriptor;
-  private final BeamFnApi.Target outputTarget;
-  private final Coder<WindowedValue<InputT>> coder;
-  private final BeamFnDataClient beamFnDataClientFactory;
-  private final Supplier<String> processBundleInstructionIdSupplier;
-
-  private CloseableThrowingConsumer<WindowedValue<InputT>> consumer;
-
-  BeamFnDataWriteRunner(
-      RunnerApi.FunctionSpec functionSpec,
-      Supplier<String> processBundleInstructionIdSupplier,
-      BeamFnApi.Target outputTarget,
-      RunnerApi.Coder coderSpec,
-      BeamFnDataClient beamFnDataClientFactory)
-          throws IOException {
-    this.apiServiceDescriptor = functionSpec.getParameter().unpack(BeamFnApi.RemoteGrpcPort.class)
-        .getApiServiceDescriptor();
-    this.beamFnDataClientFactory = beamFnDataClientFactory;
-    this.processBundleInstructionIdSupplier = processBundleInstructionIdSupplier;
-    this.outputTarget = outputTarget;
-
-    @SuppressWarnings("unchecked")
-    Coder<WindowedValue<InputT>> coder =
-        (Coder<WindowedValue<InputT>>)
-            CloudObjects.coderFromCloudObject(
-                CloudObject.fromSpec(
-                    OBJECT_MAPPER.readValue(
-                        coderSpec
-                            .getSpec()
-                            .getSpec()
-                            .getParameter()
-                            .unpack(BytesValue.class)
-                            .getValue()
-                            .newInput(),
-                        Map.class)));
-    this.coder = coder;
-  }
-
-  public void registerForOutput() {
-    consumer = beamFnDataClientFactory.forOutboundConsumer(
-        apiServiceDescriptor,
-        KV.of(processBundleInstructionIdSupplier.get(), outputTarget),
-        coder);
-  }
-
-  public void close() throws Exception {
-    consumer.close();
-  }
-
-  public void consume(WindowedValue<InputT> value) throws Exception {
-    consumer.accept(value);
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BoundedSourceRunner.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BoundedSourceRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BoundedSourceRunner.java
deleted file mode 100644
index 3338c3a..0000000
--- a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/BoundedSourceRunner.java
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.core;
-
-import com.google.auto.service.AutoService;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Multimap;
-import com.google.protobuf.BytesValue;
-import com.google.protobuf.InvalidProtocolBufferException;
-import java.io.IOException;
-import java.util.Collection;
-import java.util.Map;
-import java.util.function.Consumer;
-import java.util.function.Supplier;
-import org.apache.beam.fn.harness.data.BeamFnDataClient;
-import org.apache.beam.fn.harness.fn.ThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.sdk.common.runner.v1.RunnerApi;
-import org.apache.beam.sdk.io.BoundedSource;
-import org.apache.beam.sdk.io.Source.Reader;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.util.SerializableUtils;
-import org.apache.beam.sdk.util.WindowedValue;
-
-/**
- * A runner which creates {@link Reader}s for each {@link BoundedSource} sent as an input and
- * executes the {@link Reader}s read loop.
- */
-public class BoundedSourceRunner<InputT extends BoundedSource<OutputT>, OutputT> {
-
-  private static final String URN = "urn:org.apache.beam:source:java:0.1";
-
-  /** A registrar which provides a factory to handle Java {@link BoundedSource}s. */
-  @AutoService(PTransformRunnerFactory.Registrar.class)
-  public static class Registrar implements
-      PTransformRunnerFactory.Registrar {
-
-    @Override
-    public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
-      return ImmutableMap.of(URN, new Factory());
-    }
-  }
-
-  /** A factory for {@link BoundedSourceRunner}. */
-  static class Factory<InputT extends BoundedSource<OutputT>, OutputT>
-      implements PTransformRunnerFactory<BoundedSourceRunner<InputT, OutputT>> {
-    @Override
-    public BoundedSourceRunner<InputT, OutputT> createRunnerForPTransform(
-        PipelineOptions pipelineOptions,
-        BeamFnDataClient beamFnDataClient,
-        String pTransformId,
-        RunnerApi.PTransform pTransform,
-        Supplier<String> processBundleInstructionId,
-        Map<String, RunnerApi.PCollection> pCollections,
-        Map<String, RunnerApi.Coder> coders,
-        Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
-        Consumer<ThrowingRunnable> addStartFunction,
-        Consumer<ThrowingRunnable> addFinishFunction) {
-
-      ImmutableList.Builder<ThrowingConsumer<WindowedValue<?>>> consumers = ImmutableList.builder();
-      for (String pCollectionId : pTransform.getOutputsMap().values()) {
-        consumers.addAll(pCollectionIdsToConsumers.get(pCollectionId));
-      }
-
-      @SuppressWarnings({"rawtypes", "unchecked"})
-      BoundedSourceRunner<InputT, OutputT> runner = new BoundedSourceRunner(
-          pipelineOptions,
-          pTransform.getSpec(),
-          consumers.build());
-
-      // TODO: Remove and replace with source being sent across gRPC port
-      addStartFunction.accept(runner::start);
-
-      ThrowingConsumer runReadLoop =
-          (ThrowingConsumer<WindowedValue<InputT>>) runner::runReadLoop;
-      for (String pCollectionId : pTransform.getInputsMap().values()) {
-        pCollectionIdsToConsumers.put(
-            pCollectionId,
-            runReadLoop);
-      }
-
-      return runner;
-    }
-  }
-
-  private final PipelineOptions pipelineOptions;
-  private final RunnerApi.FunctionSpec definition;
-  private final Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers;
-
-  BoundedSourceRunner(
-      PipelineOptions pipelineOptions,
-      RunnerApi.FunctionSpec definition,
-      Collection<ThrowingConsumer<WindowedValue<OutputT>>> consumers) {
-    this.pipelineOptions = pipelineOptions;
-    this.definition = definition;
-    this.consumers = consumers;
-  }
-
-  /**
-   * The runner harness is meant to send the source over the Beam Fn Data API which would be
-   * consumed by the {@link #runReadLoop}. Drop this method once the runner harness sends the
-   * source instead of unpacking it from the data block of the function specification.
-   */
-  @Deprecated
-  public void start() throws Exception {
-    try {
-      // The representation here is defined as the java serialized representation of the
-      // bounded source object packed into a protobuf Any using a protobuf BytesValue wrapper.
-      byte[] bytes = definition.getParameter().unpack(BytesValue.class).getValue().toByteArray();
-      @SuppressWarnings("unchecked")
-      InputT boundedSource =
-          (InputT) SerializableUtils.deserializeFromByteArray(bytes, definition.toString());
-      runReadLoop(WindowedValue.valueInGlobalWindow(boundedSource));
-    } catch (InvalidProtocolBufferException e) {
-      throw new IOException(
-          String.format("Failed to decode %s, expected %s",
-              definition.getParameter().getTypeUrl(), BytesValue.getDescriptor().getFullName()),
-          e);
-    }
-  }
-
-  /**
-   * Creates a {@link Reader} for each {@link BoundedSource} and executes the {@link Reader}s
-   * read loop. See {@link Reader} for further details of the read loop.
-   *
-   * <p>Propagates any exceptions caused during reading or processing via a consumer to the
-   * caller.
-   */
-  public void runReadLoop(WindowedValue<InputT> value) throws Exception {
-    try (Reader<OutputT> reader = value.getValue().createReader(pipelineOptions)) {
-      if (!reader.start()) {
-        // Reader has no data, immediately return
-        return;
-      }
-      do {
-        // TODO: Should this use the input window as the window for all the outputs?
-        WindowedValue<OutputT> nextValue = WindowedValue.timestampedValueInGlobalWindow(
-            reader.getCurrent(), reader.getCurrentTimestamp());
-        for (ThrowingConsumer<WindowedValue<OutputT>> consumer : consumers) {
-          consumer.accept(nextValue);
-        }
-      } while (reader.advance());
-    }
-  }
-
-  @Override
-  public String toString() {
-    return definition.toString();
-  }
-}


[41/50] [abbrv] beam git commit: [BEAM-2642] Update Google Auth to 0.7.1

Posted by jb...@apache.org.
[BEAM-2642] Update Google Auth to 0.7.1

This closes #3596


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4d1db226
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4d1db226
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4d1db226

Branch: refs/heads/DSL_SQL
Commit: 4d1db2265298af324372e5212ec06cd10b4f4908
Parents: a6f460f 51427a6
Author: Luke Cwik <lc...@google.com>
Authored: Wed Jul 19 13:09:13 2017 -0700
Committer: Luke Cwik <lc...@google.com>
Committed: Wed Jul 19 13:09:13 2017 -0700

----------------------------------------------------------------------
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------



[19/50] [abbrv] beam git commit: Splits large TextIOTest into TextIOReadTest and TextIOWriteTest

Posted by jb...@apache.org.
Splits large TextIOTest into TextIOReadTest and TextIOWriteTest


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d495d151
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d495d151
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d495d151

Branch: refs/heads/DSL_SQL
Commit: d495d1511fe86a2199eb247df95ff0c876803c67
Parents: 0f06eb2
Author: Eugene Kirpichov <ki...@google.com>
Authored: Fri Jun 23 18:01:53 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Mon Jul 17 17:08:00 2017 -0700

----------------------------------------------------------------------
 .../org/apache/beam/sdk/io/TextIOReadTest.java  |  847 +++++++++++
 .../java/org/apache/beam/sdk/io/TextIOTest.java | 1353 +-----------------
 .../org/apache/beam/sdk/io/TextIOWriteTest.java |  604 ++++++++
 3 files changed, 1460 insertions(+), 1344 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/d495d151/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOReadTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOReadTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOReadTest.java
new file mode 100644
index 0000000..8b53111
--- /dev/null
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOReadTest.java
@@ -0,0 +1,847 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io;
+
+import static org.apache.beam.sdk.TestUtils.LINES_ARRAY;
+import static org.apache.beam.sdk.TestUtils.NO_LINES_ARRAY;
+import static org.apache.beam.sdk.io.TextIO.CompressionType.AUTO;
+import static org.apache.beam.sdk.io.TextIO.CompressionType.BZIP2;
+import static org.apache.beam.sdk.io.TextIO.CompressionType.DEFLATE;
+import static org.apache.beam.sdk.io.TextIO.CompressionType.GZIP;
+import static org.apache.beam.sdk.io.TextIO.CompressionType.UNCOMPRESSED;
+import static org.apache.beam.sdk.io.TextIO.CompressionType.ZIP;
+import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
+import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasValue;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.hasItem;
+import static org.hamcrest.Matchers.hasSize;
+import static org.hamcrest.Matchers.startsWith;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Set;
+import java.util.zip.GZIPOutputStream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipOutputStream;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.io.BoundedSource.BoundedReader;
+import org.apache.beam.sdk.io.TextIO.CompressionType;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.testing.NeedsRunner;
+import org.apache.beam.sdk.testing.PAssert;
+import org.apache.beam.sdk.testing.SourceTestUtils;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.testing.ValidatesRunner;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.display.DisplayData;
+import org.apache.beam.sdk.transforms.display.DisplayDataEvaluator;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.values.PCollection;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
+import org.apache.commons.compress.compressors.deflate.DeflateCompressorOutputStream;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link TextIO.Read}. */
+@RunWith(JUnit4.class)
+public class TextIOReadTest {
+  private static final List<String> EMPTY = Collections.emptyList();
+  private static final List<String> TINY =
+      Arrays.asList("Irritable eagle", "Optimistic jay", "Fanciful hawk");
+  private static final List<String> LARGE = makeLines(1000);
+
+  private static Path tempFolder;
+  private static File emptyTxt;
+  private static File tinyTxt;
+  private static File largeTxt;
+  private static File emptyGz;
+  private static File tinyGz;
+  private static File largeGz;
+  private static File emptyBzip2;
+  private static File tinyBzip2;
+  private static File largeBzip2;
+  private static File emptyZip;
+  private static File tinyZip;
+  private static File largeZip;
+  private static File emptyDeflate;
+  private static File tinyDeflate;
+  private static File largeDeflate;
+
+  @Rule public TestPipeline p = TestPipeline.create();
+
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  private static File writeToFile(List<String> lines, String filename, CompressionType compression)
+      throws IOException {
+    File file = tempFolder.resolve(filename).toFile();
+    OutputStream output = new FileOutputStream(file);
+    switch (compression) {
+      case UNCOMPRESSED:
+        break;
+      case GZIP:
+        output = new GZIPOutputStream(output);
+        break;
+      case BZIP2:
+        output = new BZip2CompressorOutputStream(output);
+        break;
+      case ZIP:
+        ZipOutputStream zipOutput = new ZipOutputStream(output);
+        zipOutput.putNextEntry(new ZipEntry("entry"));
+        output = zipOutput;
+        break;
+      case DEFLATE:
+        output = new DeflateCompressorOutputStream(output);
+        break;
+      default:
+        throw new UnsupportedOperationException(compression.toString());
+    }
+    writeToStreamAndClose(lines, output);
+    return file;
+  }
+
+  @BeforeClass
+  public static void setupClass() throws IOException {
+    tempFolder = Files.createTempDirectory("TextIOTest");
+    // empty files
+    emptyTxt = writeToFile(EMPTY, "empty.txt", CompressionType.UNCOMPRESSED);
+    emptyGz = writeToFile(EMPTY, "empty.gz", GZIP);
+    emptyBzip2 = writeToFile(EMPTY, "empty.bz2", BZIP2);
+    emptyZip = writeToFile(EMPTY, "empty.zip", ZIP);
+    emptyDeflate = writeToFile(EMPTY, "empty.deflate", DEFLATE);
+    // tiny files
+    tinyTxt = writeToFile(TINY, "tiny.txt", CompressionType.UNCOMPRESSED);
+    tinyGz = writeToFile(TINY, "tiny.gz", GZIP);
+    tinyBzip2 = writeToFile(TINY, "tiny.bz2", BZIP2);
+    tinyZip = writeToFile(TINY, "tiny.zip", ZIP);
+    tinyDeflate = writeToFile(TINY, "tiny.deflate", DEFLATE);
+    // large files
+    largeTxt = writeToFile(LARGE, "large.txt", CompressionType.UNCOMPRESSED);
+    largeGz = writeToFile(LARGE, "large.gz", GZIP);
+    largeBzip2 = writeToFile(LARGE, "large.bz2", BZIP2);
+    largeZip = writeToFile(LARGE, "large.zip", ZIP);
+    largeDeflate = writeToFile(LARGE, "large.deflate", DEFLATE);
+  }
+
+  @AfterClass
+  public static void teardownClass() throws IOException {
+    Files.walkFileTree(
+        tempFolder,
+        new SimpleFileVisitor<Path>() {
+          @Override
+          public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
+              throws IOException {
+            Files.delete(file);
+            return FileVisitResult.CONTINUE;
+          }
+
+          @Override
+          public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
+            Files.delete(dir);
+            return FileVisitResult.CONTINUE;
+          }
+        });
+  }
+
+  private void runTestRead(String[] expected) throws Exception {
+    File tmpFile = Files.createTempFile(tempFolder, "file", "txt").toFile();
+    String filename = tmpFile.getPath();
+
+    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
+      for (String elem : expected) {
+        byte[] encodedElem = CoderUtils.encodeToByteArray(StringUtf8Coder.of(), elem);
+        String line = new String(encodedElem);
+        writer.println(line);
+      }
+    }
+
+    TextIO.Read read = TextIO.read().from(filename);
+
+    PCollection<String> output = p.apply(read);
+
+    PAssert.that(output).containsInAnyOrder(expected);
+    p.run();
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testReadStrings() throws Exception {
+    runTestRead(LINES_ARRAY);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testReadEmptyStrings() throws Exception {
+    runTestRead(NO_LINES_ARRAY);
+  }
+
+  @Test
+  public void testReadNamed() throws Exception {
+    p.enableAbandonedNodeEnforcement(false);
+
+    assertEquals("TextIO.Read/Read.out", p.apply(TextIO.read().from("somefile")).getName());
+    assertEquals(
+        "MyRead/Read.out", p.apply("MyRead", TextIO.read().from(emptyTxt.getPath())).getName());
+  }
+
+  @Test
+  public void testReadDisplayData() {
+    TextIO.Read read = TextIO.read().from("foo.*").withCompressionType(BZIP2);
+
+    DisplayData displayData = DisplayData.from(read);
+
+    assertThat(displayData, hasDisplayItem("filePattern", "foo.*"));
+    assertThat(displayData, hasDisplayItem("compressionType", BZIP2.toString()));
+  }
+
+  @Test
+  @Category(ValidatesRunner.class)
+  public void testPrimitiveReadDisplayData() {
+    DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
+
+    TextIO.Read read = TextIO.read().from("foobar");
+
+    Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(read);
+    assertThat(
+        "TextIO.Read should include the file prefix in its primitive display data",
+        displayData,
+        hasItem(hasDisplayItem(hasValue(startsWith("foobar")))));
+  }
+
+  /** Options for testing. */
+  public interface RuntimeTestOptions extends PipelineOptions {
+    ValueProvider<String> getInput();
+    void setInput(ValueProvider<String> value);
+  }
+
+  @Test
+  public void testRuntimeOptionsNotCalledInApply() throws Exception {
+    p.enableAbandonedNodeEnforcement(false);
+
+    RuntimeTestOptions options =
+        PipelineOptionsFactory.as(RuntimeTestOptions.class);
+
+    p.apply(TextIO.read().from(options.getInput()));
+  }
+
+  @Test
+  public void testCompressionTypeIsSet() throws Exception {
+    TextIO.Read read = TextIO.read().from("/tmp/test");
+    assertEquals(AUTO, read.getCompressionType());
+    read = TextIO.read().from("/tmp/test").withCompressionType(GZIP);
+    assertEquals(GZIP, read.getCompressionType());
+  }
+
+  /**
+   * Helper that writes the given lines (adding a newline in between) to a stream, then closes the
+   * stream.
+   */
+  private static void writeToStreamAndClose(List<String> lines, OutputStream outputStream) {
+    try (PrintStream writer = new PrintStream(outputStream)) {
+      for (String line : lines) {
+        writer.println(line);
+      }
+    }
+  }
+
+  /**
+   * Helper method that runs TextIO.read().from(filename).withCompressionType(compressionType) and
+   * TextIO.readAll().withCompressionType(compressionType) applied to the single filename,
+   * and asserts that the results match the given expected output.
+   */
+  private void assertReadingCompressedFileMatchesExpected(
+      File file, CompressionType compressionType, List<String> expected) {
+
+    TextIO.Read read = TextIO.read().from(file.getPath()).withCompressionType(compressionType);
+    PAssert.that(p.apply("Read_" + file + "_" + compressionType.toString(), read))
+        .containsInAnyOrder(expected);
+
+    TextIO.ReadAll readAll =
+        TextIO.readAll().withCompressionType(compressionType).withDesiredBundleSizeBytes(10);
+    PAssert.that(
+            p.apply("Create_" + file, Create.of(file.getPath()))
+                .apply("Read_" + compressionType.toString(), readAll))
+        .containsInAnyOrder(expected);
+    p.run();
+  }
+
+  /** Helper to make an array of compressible strings. Returns ["word"i] for i in range(0,n). */
+  private static List<String> makeLines(int n) {
+    List<String> ret = new ArrayList<>();
+    for (int i = 0; i < n; ++i) {
+      ret.add("word" + i);
+    }
+    return ret;
+  }
+
+  /** Tests reading from a small, gzipped file with no .gz extension but GZIP compression set. */
+  @Test
+  @Category(NeedsRunner.class)
+  public void testSmallCompressedGzipReadNoExtension() throws Exception {
+    File smallGzNoExtension = writeToFile(TINY, "tiny_gz_no_extension", GZIP);
+    assertReadingCompressedFileMatchesExpected(smallGzNoExtension, GZIP, TINY);
+  }
+
+  /**
+   * Tests reading from a small, uncompressed file with .gz extension. This must work in AUTO or
+   * GZIP modes. This is needed because some network file systems / HTTP clients will transparently
+   * decompress gzipped content.
+   */
+  @Test
+  @Category(NeedsRunner.class)
+  public void testSmallCompressedGzipReadActuallyUncompressed() throws Exception {
+    File smallGzNotCompressed =
+        writeToFile(TINY, "tiny_uncompressed.gz", CompressionType.UNCOMPRESSED);
+    // Should work with GZIP compression set.
+    assertReadingCompressedFileMatchesExpected(smallGzNotCompressed, GZIP, TINY);
+    // Should also work with AUTO mode set.
+    assertReadingCompressedFileMatchesExpected(smallGzNotCompressed, AUTO, TINY);
+  }
+
+  /** Tests reading from a small, bzip2ed file with no .bz2 extension but BZIP2 compression set. */
+  @Test
+  @Category(NeedsRunner.class)
+  public void testSmallCompressedBzip2ReadNoExtension() throws Exception {
+    File smallBz2NoExtension = writeToFile(TINY, "tiny_bz2_no_extension", BZIP2);
+    assertReadingCompressedFileMatchesExpected(smallBz2NoExtension, BZIP2, TINY);
+  }
+
+  /**
+   * Create a zip file with the given lines.
+   *
+   * @param expected A list of expected lines, populated in the zip file.
+   * @param filename Optionally zip file name (can be null).
+   * @param fieldsEntries Fields to write in zip entries.
+   * @return The zip filename.
+   * @throws Exception In case of a failure during zip file creation.
+   */
+  private String createZipFile(List<String> expected, String filename, String[]... fieldsEntries)
+      throws Exception {
+    File tmpFile = tempFolder.resolve(filename).toFile();
+    String tmpFileName = tmpFile.getPath();
+
+    ZipOutputStream out = new ZipOutputStream(new FileOutputStream(tmpFile));
+    PrintStream writer = new PrintStream(out, true /* auto-flush on write */);
+
+    int index = 0;
+    for (String[] entry : fieldsEntries) {
+      out.putNextEntry(new ZipEntry(Integer.toString(index)));
+      for (String field : entry) {
+        writer.println(field);
+        expected.add(field);
+      }
+      out.closeEntry();
+      index++;
+    }
+
+    writer.close();
+    out.close();
+
+    return tmpFileName;
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testTxtRead() throws Exception {
+    // Files with non-compressed extensions should work in AUTO and UNCOMPRESSED modes.
+    for (CompressionType type : new CompressionType[] {AUTO, UNCOMPRESSED}) {
+      assertReadingCompressedFileMatchesExpected(emptyTxt, type, EMPTY);
+      assertReadingCompressedFileMatchesExpected(tinyTxt, type, TINY);
+      assertReadingCompressedFileMatchesExpected(largeTxt, type, LARGE);
+    }
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testGzipCompressedRead() throws Exception {
+    // Files with the right extensions should work in AUTO and GZIP modes.
+    for (CompressionType type : new CompressionType[] {AUTO, GZIP}) {
+      assertReadingCompressedFileMatchesExpected(emptyGz, type, EMPTY);
+      assertReadingCompressedFileMatchesExpected(tinyGz, type, TINY);
+      assertReadingCompressedFileMatchesExpected(largeGz, type, LARGE);
+    }
+
+    // Sanity check that we're properly testing compression.
+    assertThat(largeTxt.length(), greaterThan(largeGz.length()));
+
+    // GZIP files with non-gz extension should work in GZIP mode.
+    File gzFile = writeToFile(TINY, "tiny_gz_no_extension", GZIP);
+    assertReadingCompressedFileMatchesExpected(gzFile, GZIP, TINY);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testBzip2CompressedRead() throws Exception {
+    // Files with the right extensions should work in AUTO and BZIP2 modes.
+    for (CompressionType type : new CompressionType[] {AUTO, BZIP2}) {
+      assertReadingCompressedFileMatchesExpected(emptyBzip2, type, EMPTY);
+      assertReadingCompressedFileMatchesExpected(tinyBzip2, type, TINY);
+      assertReadingCompressedFileMatchesExpected(largeBzip2, type, LARGE);
+    }
+
+    // Sanity check that we're properly testing compression.
+    assertThat(largeTxt.length(), greaterThan(largeBzip2.length()));
+
+    // BZ2 files with non-bz2 extension should work in BZIP2 mode.
+    File bz2File = writeToFile(TINY, "tiny_bz2_no_extension", BZIP2);
+    assertReadingCompressedFileMatchesExpected(bz2File, BZIP2, TINY);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testZipCompressedRead() throws Exception {
+    // Files with the right extensions should work in AUTO and ZIP modes.
+    for (CompressionType type : new CompressionType[] {AUTO, ZIP}) {
+      assertReadingCompressedFileMatchesExpected(emptyZip, type, EMPTY);
+      assertReadingCompressedFileMatchesExpected(tinyZip, type, TINY);
+      assertReadingCompressedFileMatchesExpected(largeZip, type, LARGE);
+    }
+
+    // Sanity check that we're properly testing compression.
+    assertThat(largeTxt.length(), greaterThan(largeZip.length()));
+
+    // Zip files with non-zip extension should work in ZIP mode.
+    File zipFile = writeToFile(TINY, "tiny_zip_no_extension", ZIP);
+    assertReadingCompressedFileMatchesExpected(zipFile, ZIP, TINY);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testDeflateCompressedRead() throws Exception {
+    // Files with the right extensions should work in AUTO and ZIP modes.
+    for (CompressionType type : new CompressionType[] {AUTO, DEFLATE}) {
+      assertReadingCompressedFileMatchesExpected(emptyDeflate, type, EMPTY);
+      assertReadingCompressedFileMatchesExpected(tinyDeflate, type, TINY);
+      assertReadingCompressedFileMatchesExpected(largeDeflate, type, LARGE);
+    }
+
+    // Sanity check that we're properly testing compression.
+    assertThat(largeTxt.length(), greaterThan(largeDeflate.length()));
+
+    // Deflate files with non-deflate extension should work in DEFLATE mode.
+    File deflateFile = writeToFile(TINY, "tiny_deflate_no_extension", DEFLATE);
+    assertReadingCompressedFileMatchesExpected(deflateFile, DEFLATE, TINY);
+  }
+
+  /**
+   * Tests a zip file with no entries. This is a corner case not tested elsewhere as the default
+   * test zip files have a single entry.
+   */
+  @Test
+  @Category(NeedsRunner.class)
+  public void testZipCompressedReadWithNoEntries() throws Exception {
+    String filename = createZipFile(new ArrayList<String>(), "empty zip file");
+    assertReadingCompressedFileMatchesExpected(new File(filename), CompressionType.ZIP, EMPTY);
+  }
+
+  /**
+   * Tests a zip file with multiple entries. This is a corner case not tested elsewhere as the
+   * default test zip files have a single entry.
+   */
+  @Test
+  @Category(NeedsRunner.class)
+  public void testZipCompressedReadWithMultiEntriesFile() throws Exception {
+    String[] entry0 = new String[] {"first", "second", "three"};
+    String[] entry1 = new String[] {"four", "five", "six"};
+    String[] entry2 = new String[] {"seven", "eight", "nine"};
+
+    List<String> expected = new ArrayList<>();
+
+    String filename = createZipFile(expected, "multiple entries", entry0, entry1, entry2);
+    assertReadingCompressedFileMatchesExpected(new File(filename), CompressionType.ZIP, expected);
+  }
+
+  /**
+   * Read a ZIP compressed file containing data, multiple empty entries, and then more data. We
+   * expect just the data back.
+   */
+  @Test
+  @Category(NeedsRunner.class)
+  public void testZipCompressedReadWithComplexEmptyAndPresentEntries() throws Exception {
+    String filename =
+        createZipFile(
+            new ArrayList<String>(),
+            "complex empty and present entries",
+            new String[] {"cat"},
+            new String[] {},
+            new String[] {},
+            new String[] {"dog"});
+
+    assertReadingCompressedFileMatchesExpected(
+        new File(filename), CompressionType.ZIP, Arrays.asList("cat", "dog"));
+  }
+
+  @Test
+  public void testTextIOGetName() {
+    assertEquals("TextIO.Read", TextIO.read().from("somefile").getName());
+    assertEquals("TextIO.Read", TextIO.read().from("somefile").toString());
+  }
+
+  @Test
+  public void testProgressEmptyFile() throws IOException {
+    try (BoundedReader<String> reader =
+        prepareSource(new byte[0]).createReader(PipelineOptionsFactory.create())) {
+      // Check preconditions before starting.
+      assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
+      assertEquals(0, reader.getSplitPointsConsumed());
+      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
+
+      // Assert empty
+      assertFalse(reader.start());
+
+      // Check postconditions after finishing
+      assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
+      assertEquals(0, reader.getSplitPointsConsumed());
+      assertEquals(0, reader.getSplitPointsRemaining());
+    }
+  }
+
+  @Test
+  public void testProgressTextFile() throws IOException {
+    String file = "line1\nline2\nline3";
+    try (BoundedReader<String> reader =
+        prepareSource(file.getBytes()).createReader(PipelineOptionsFactory.create())) {
+      // Check preconditions before starting
+      assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
+      assertEquals(0, reader.getSplitPointsConsumed());
+      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
+
+      // Line 1
+      assertTrue(reader.start());
+      assertEquals(0, reader.getSplitPointsConsumed());
+      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
+
+      // Line 2
+      assertTrue(reader.advance());
+      assertEquals(1, reader.getSplitPointsConsumed());
+      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
+
+      // Line 3
+      assertTrue(reader.advance());
+      assertEquals(2, reader.getSplitPointsConsumed());
+      assertEquals(1, reader.getSplitPointsRemaining());
+
+      // Check postconditions after finishing
+      assertFalse(reader.advance());
+      assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
+      assertEquals(3, reader.getSplitPointsConsumed());
+      assertEquals(0, reader.getSplitPointsRemaining());
+    }
+  }
+
+  @Test
+  public void testProgressAfterSplitting() throws IOException {
+    String file = "line1\nline2\nline3";
+    BoundedSource<String> source = prepareSource(file.getBytes());
+    BoundedSource<String> remainder;
+
+    // Create the remainder, verifying properties pre- and post-splitting.
+    try (BoundedReader<String> readerOrig = source.createReader(PipelineOptionsFactory.create())) {
+      // Preconditions.
+      assertEquals(0.0, readerOrig.getFractionConsumed(), 1e-6);
+      assertEquals(0, readerOrig.getSplitPointsConsumed());
+      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, readerOrig.getSplitPointsRemaining());
+
+      // First record, before splitting.
+      assertTrue(readerOrig.start());
+      assertEquals(0, readerOrig.getSplitPointsConsumed());
+      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, readerOrig.getSplitPointsRemaining());
+
+      // Split. 0.1 is in line1, so should now be able to detect last record.
+      remainder = readerOrig.splitAtFraction(0.1);
+      System.err.println(readerOrig.getCurrentSource());
+      assertNotNull(remainder);
+
+      // First record, after splitting.
+      assertEquals(0, readerOrig.getSplitPointsConsumed());
+      assertEquals(1, readerOrig.getSplitPointsRemaining());
+
+      // Finish and postconditions.
+      assertFalse(readerOrig.advance());
+      assertEquals(1.0, readerOrig.getFractionConsumed(), 1e-6);
+      assertEquals(1, readerOrig.getSplitPointsConsumed());
+      assertEquals(0, readerOrig.getSplitPointsRemaining());
+    }
+
+    // Check the properties of the remainder.
+    try (BoundedReader<String> reader = remainder.createReader(PipelineOptionsFactory.create())) {
+      // Preconditions.
+      assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
+      assertEquals(0, reader.getSplitPointsConsumed());
+      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
+
+      // First record should be line 2.
+      assertTrue(reader.start());
+      assertEquals(0, reader.getSplitPointsConsumed());
+      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
+
+      // Second record is line 3
+      assertTrue(reader.advance());
+      assertEquals(1, reader.getSplitPointsConsumed());
+      assertEquals(1, reader.getSplitPointsRemaining());
+
+      // Check postconditions after finishing
+      assertFalse(reader.advance());
+      assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
+      assertEquals(2, reader.getSplitPointsConsumed());
+      assertEquals(0, reader.getSplitPointsRemaining());
+    }
+  }
+
+  @Test
+  public void testReadEmptyLines() throws Exception {
+    runTestReadWithData("\n\n\n".getBytes(StandardCharsets.UTF_8), ImmutableList.of("", "", ""));
+  }
+
+  @Test
+  public void testReadFileWithLineFeedDelimiter() throws Exception {
+    runTestReadWithData(
+        "asdf\nhjkl\nxyz\n".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnDelimiter() throws Exception {
+    runTestReadWithData(
+        "asdf\rhjkl\rxyz\r".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnAndLineFeedDelimiter() throws Exception {
+    runTestReadWithData(
+        "asdf\r\nhjkl\r\nxyz\r\n".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithMixedDelimiters() throws Exception {
+    runTestReadWithData(
+        "asdf\rhjkl\r\nxyz\n".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithLineFeedDelimiterAndNonEmptyBytesAtEnd() throws Exception {
+    runTestReadWithData(
+        "asdf\nhjkl\nxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnDelimiterAndNonEmptyBytesAtEnd() throws Exception {
+    runTestReadWithData(
+        "asdf\rhjkl\rxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithCarriageReturnAndLineFeedDelimiterAndNonEmptyBytesAtEnd()
+      throws Exception {
+    runTestReadWithData(
+        "asdf\r\nhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  @Test
+  public void testReadFileWithMixedDelimitersAndNonEmptyBytesAtEnd() throws Exception {
+    runTestReadWithData(
+        "asdf\rhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8),
+        ImmutableList.of("asdf", "hjkl", "xyz"));
+  }
+
+  private void runTestReadWithData(byte[] data, List<String> expectedResults) throws Exception {
+    TextSource source = prepareSource(data);
+    List<String> actual = SourceTestUtils.readFromSource(source, PipelineOptionsFactory.create());
+    assertThat(actual, containsInAnyOrder(new ArrayList<>(expectedResults).toArray(new String[0])));
+  }
+
+  @Test
+  public void testSplittingSourceWithEmptyLines() throws Exception {
+    TextSource source = prepareSource("\n\n\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithLineFeedDelimiter() throws Exception {
+    TextSource source = prepareSource("asdf\nhjkl\nxyz\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnDelimiter() throws Exception {
+    TextSource source = prepareSource("asdf\rhjkl\rxyz\r".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnAndLineFeedDelimiter() throws Exception {
+    TextSource source = prepareSource("asdf\r\nhjkl\r\nxyz\r\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithMixedDelimiters() throws Exception {
+    TextSource source = prepareSource("asdf\rhjkl\r\nxyz\n".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithLineFeedDelimiterAndNonEmptyBytesAtEnd() throws Exception {
+    TextSource source = prepareSource("asdf\nhjkl\nxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnDelimiterAndNonEmptyBytesAtEnd()
+      throws Exception {
+    TextSource source = prepareSource("asdf\rhjkl\rxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithCarriageReturnAndLineFeedDelimiterAndNonEmptyBytesAtEnd()
+      throws Exception {
+    TextSource source = prepareSource("asdf\r\nhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  @Test
+  public void testSplittingSourceWithMixedDelimitersAndNonEmptyBytesAtEnd() throws Exception {
+    TextSource source = prepareSource("asdf\rhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8));
+    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
+  }
+
+  private TextSource prepareSource(byte[] data) throws IOException {
+    Path path = Files.createTempFile(tempFolder, "tempfile", "ext");
+    Files.write(path, data);
+    return new TextSource(ValueProvider.StaticValueProvider.of(path.toString()));
+  }
+
+  @Test
+  public void testInitialSplitAutoModeTxt() throws Exception {
+    PipelineOptions options = TestPipeline.testingPipelineOptions();
+    long desiredBundleSize = 1000;
+
+    // Sanity check: file is at least 2 bundles long.
+    assertThat(largeTxt.length(), greaterThan(2 * desiredBundleSize));
+
+    FileBasedSource<String> source = TextIO.read().from(largeTxt.getPath()).getSource();
+    List<? extends FileBasedSource<String>> splits = source.split(desiredBundleSize, options);
+
+    // At least 2 splits and they are equal to reading the whole file.
+    assertThat(splits, hasSize(greaterThan(1)));
+    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
+  }
+
+  @Test
+  public void testInitialSplitAutoModeGz() throws Exception {
+    long desiredBundleSize = 1000;
+    PipelineOptions options = TestPipeline.testingPipelineOptions();
+
+    // Sanity check: file is at least 2 bundles long.
+    assertThat(largeGz.length(), greaterThan(2 * desiredBundleSize));
+
+    FileBasedSource<String> source = TextIO.read().from(largeGz.getPath()).getSource();
+    List<? extends FileBasedSource<String>> splits = source.split(desiredBundleSize, options);
+
+    // Exactly 1 split, even in AUTO mode, since it is a gzip file.
+    assertThat(splits, hasSize(equalTo(1)));
+    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
+  }
+
+  @Test
+  public void testInitialSplitGzipModeTxt() throws Exception {
+    PipelineOptions options = TestPipeline.testingPipelineOptions();
+    long desiredBundleSize = 1000;
+
+    // Sanity check: file is at least 2 bundles long.
+    assertThat(largeTxt.length(), greaterThan(2 * desiredBundleSize));
+
+    FileBasedSource<String> source =
+        TextIO.read().from(largeTxt.getPath()).withCompressionType(GZIP).getSource();
+    List<? extends FileBasedSource<String>> splits = source.split(desiredBundleSize, options);
+
+    // Exactly 1 split, even though splittable text file, since using GZIP mode.
+    assertThat(splits, hasSize(equalTo(1)));
+    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
+  }
+
+  @Test
+  public void testInitialSplitGzipModeGz() throws Exception {
+    PipelineOptions options = TestPipeline.testingPipelineOptions();
+    long desiredBundleSize = 1000;
+
+    // Sanity check: file is at least 2 bundles long.
+    assertThat(largeGz.length(), greaterThan(2 * desiredBundleSize));
+
+    FileBasedSource<String> source =
+        TextIO.read().from(largeGz.getPath()).withCompressionType(GZIP).getSource();
+    List<? extends FileBasedSource<String>> splits = source.split(desiredBundleSize, options);
+
+    // Exactly 1 split using .gz extension and using GZIP mode.
+    assertThat(splits, hasSize(equalTo(1)));
+    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testReadAll() throws IOException {
+    writeToFile(TINY, "readAllTiny1.zip", ZIP);
+    writeToFile(TINY, "readAllTiny2.zip", ZIP);
+    writeToFile(LARGE, "readAllLarge1.zip", ZIP);
+    writeToFile(LARGE, "readAllLarge2.zip", ZIP);
+    PCollection<String> lines =
+        p.apply(
+                Create.of(
+                    tempFolder.resolve("readAllTiny*").toString(),
+                    tempFolder.resolve("readAllLarge*").toString()))
+            .apply(TextIO.readAll().withCompressionType(AUTO));
+    PAssert.that(lines).containsInAnyOrder(Iterables.concat(TINY, TINY, LARGE, LARGE));
+    p.run();
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/d495d151/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
index a6be4fb..4fe1c56 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOTest.java
@@ -17,1350 +17,15 @@
  */
 package org.apache.beam.sdk.io;
 
-import static com.google.common.base.MoreObjects.firstNonNull;
-import static org.apache.beam.sdk.TestUtils.LINES2_ARRAY;
-import static org.apache.beam.sdk.TestUtils.LINES_ARRAY;
-import static org.apache.beam.sdk.TestUtils.NO_LINES_ARRAY;
-import static org.apache.beam.sdk.io.TextIO.CompressionType.AUTO;
-import static org.apache.beam.sdk.io.TextIO.CompressionType.BZIP2;
-import static org.apache.beam.sdk.io.TextIO.CompressionType.DEFLATE;
-import static org.apache.beam.sdk.io.TextIO.CompressionType.GZIP;
-import static org.apache.beam.sdk.io.TextIO.CompressionType.UNCOMPRESSED;
-import static org.apache.beam.sdk.io.TextIO.CompressionType.ZIP;
-import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
-import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasValue;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.hamcrest.Matchers.equalTo;
-import static org.hamcrest.Matchers.greaterThan;
-import static org.hamcrest.Matchers.hasItem;
-import static org.hamcrest.Matchers.hasSize;
-import static org.hamcrest.Matchers.startsWith;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-
-import com.google.common.base.Function;
-import com.google.common.base.Functions;
-import com.google.common.base.Predicate;
-import com.google.common.base.Predicates;
-import com.google.common.collect.FluentIterable;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Lists;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.PrintStream;
-import java.nio.charset.StandardCharsets;
-import java.nio.file.FileVisitResult;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.nio.file.SimpleFileVisitor;
-import java.nio.file.attribute.BasicFileAttributes;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Set;
-import java.util.zip.GZIPOutputStream;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipOutputStream;
-import javax.annotation.Nullable;
-import org.apache.beam.sdk.coders.AvroCoder;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.DefaultCoder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.io.BoundedSource.BoundedReader;
-import org.apache.beam.sdk.io.DefaultFilenamePolicy.Params;
-import org.apache.beam.sdk.io.FileBasedSink.DynamicDestinations;
-import org.apache.beam.sdk.io.FileBasedSink.FilenamePolicy;
-import org.apache.beam.sdk.io.FileBasedSink.WritableByteChannelFactory;
-import org.apache.beam.sdk.io.TextIO.CompressionType;
-import org.apache.beam.sdk.io.fs.MatchResult;
-import org.apache.beam.sdk.io.fs.MatchResult.Metadata;
-import org.apache.beam.sdk.io.fs.ResolveOptions.StandardResolveOptions;
-import org.apache.beam.sdk.io.fs.ResourceId;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.options.ValueProvider;
-import org.apache.beam.sdk.options.ValueProvider.StaticValueProvider;
-import org.apache.beam.sdk.testing.NeedsRunner;
-import org.apache.beam.sdk.testing.PAssert;
-import org.apache.beam.sdk.testing.SourceTestUtils;
-import org.apache.beam.sdk.testing.TestPipeline;
-import org.apache.beam.sdk.testing.ValidatesRunner;
-import org.apache.beam.sdk.transforms.Create;
-import org.apache.beam.sdk.transforms.SerializableFunction;
-import org.apache.beam.sdk.transforms.display.DisplayData;
-import org.apache.beam.sdk.transforms.display.DisplayDataEvaluator;
-import org.apache.beam.sdk.util.CoderUtils;
-import org.apache.beam.sdk.values.PCollection;
-import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
-import org.apache.commons.compress.compressors.deflate.DeflateCompressorOutputStream;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.experimental.categories.Category;
-import org.junit.rules.ExpectedException;
 import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/**
- * Tests for {@link TextIO} {@link TextIO.Read} and {@link TextIO.Write} transforms.
- */
-// TODO: Change the tests to use ValidatesRunner instead of NeedsRunner
-@RunWith(JUnit4.class)
-@SuppressWarnings("unchecked")
+import org.junit.runners.Suite;
+
+/** Tests for {@link TextIO} transforms. */
+@RunWith(Suite.class)
+@Suite.SuiteClasses({
+  TextIOReadTest.class,
+  TextIOWriteTest.class
+})
 public class TextIOTest {
-  private static final String MY_HEADER = "myHeader";
-  private static final String MY_FOOTER = "myFooter";
-  private static final List<String> EMPTY = Collections.emptyList();
-  private static final List<String> TINY =
-      Arrays.asList("Irritable eagle", "Optimistic jay", "Fanciful hawk");
-  private static final List<String> LARGE = makeLines(1000);
-
-  private static Path tempFolder;
-  private static File emptyTxt;
-  private static File tinyTxt;
-  private static File largeTxt;
-  private static File emptyGz;
-  private static File tinyGz;
-  private static File largeGz;
-  private static File emptyBzip2;
-  private static File tinyBzip2;
-  private static File largeBzip2;
-  private static File emptyZip;
-  private static File tinyZip;
-  private static File largeZip;
-  private static File emptyDeflate;
-  private static File tinyDeflate;
-  private static File largeDeflate;
-
-  @Rule
-  public TestPipeline p = TestPipeline.create();
-
-  @Rule
-  public ExpectedException expectedException = ExpectedException.none();
-
-  private static File writeToFile(List<String> lines, String filename, CompressionType compression)
-      throws IOException {
-    File file = tempFolder.resolve(filename).toFile();
-    OutputStream output = new FileOutputStream(file);
-    switch (compression) {
-      case UNCOMPRESSED:
-        break;
-      case GZIP:
-        output = new GZIPOutputStream(output);
-        break;
-      case BZIP2:
-        output = new BZip2CompressorOutputStream(output);
-        break;
-      case ZIP:
-        ZipOutputStream zipOutput = new ZipOutputStream(output);
-        zipOutput.putNextEntry(new ZipEntry("entry"));
-        output = zipOutput;
-        break;
-      case DEFLATE:
-        output = new DeflateCompressorOutputStream(output);
-        break;
-      default:
-        throw new UnsupportedOperationException(compression.toString());
-    }
-    writeToStreamAndClose(lines, output);
-    return file;
-  }
-
-  @BeforeClass
-  public static void setupClass() throws IOException {
-    tempFolder = Files.createTempDirectory("TextIOTest");
-    // empty files
-    emptyTxt = writeToFile(EMPTY, "empty.txt", CompressionType.UNCOMPRESSED);
-    emptyGz = writeToFile(EMPTY, "empty.gz", GZIP);
-    emptyBzip2 = writeToFile(EMPTY, "empty.bz2", BZIP2);
-    emptyZip = writeToFile(EMPTY, "empty.zip", ZIP);
-    emptyDeflate = writeToFile(EMPTY, "empty.deflate", DEFLATE);
-    // tiny files
-    tinyTxt = writeToFile(TINY, "tiny.txt", CompressionType.UNCOMPRESSED);
-    tinyGz = writeToFile(TINY, "tiny.gz", GZIP);
-    tinyBzip2 = writeToFile(TINY, "tiny.bz2", BZIP2);
-    tinyZip = writeToFile(TINY, "tiny.zip", ZIP);
-    tinyDeflate = writeToFile(TINY, "tiny.deflate", DEFLATE);
-    // large files
-    largeTxt = writeToFile(LARGE, "large.txt", CompressionType.UNCOMPRESSED);
-    largeGz = writeToFile(LARGE, "large.gz", GZIP);
-    largeBzip2 = writeToFile(LARGE, "large.bz2", BZIP2);
-    largeZip = writeToFile(LARGE, "large.zip", ZIP);
-    largeDeflate = writeToFile(LARGE, "large.deflate", DEFLATE);
-  }
-
-  @AfterClass
-  public static void teardownClass() throws IOException {
-    Files.walkFileTree(tempFolder, new SimpleFileVisitor<Path>() {
-      @Override
-      public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
-        Files.delete(file);
-        return FileVisitResult.CONTINUE;
-      }
-
-      @Override
-      public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
-        Files.delete(dir);
-        return FileVisitResult.CONTINUE;
-      }
-    });
-  }
-
-  private void runTestRead(String[] expected) throws Exception {
-    File tmpFile = Files.createTempFile(tempFolder, "file", "txt").toFile();
-    String filename = tmpFile.getPath();
-
-    try (PrintStream writer = new PrintStream(new FileOutputStream(tmpFile))) {
-      for (String elem : expected) {
-        byte[] encodedElem = CoderUtils.encodeToByteArray(StringUtf8Coder.of(), elem);
-        String line = new String(encodedElem);
-        writer.println(line);
-      }
-    }
-
-    TextIO.Read read = TextIO.read().from(filename);
-
-    PCollection<String> output = p.apply(read);
-
-    PAssert.that(output).containsInAnyOrder(expected);
-    p.run();
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testReadStrings() throws Exception {
-    runTestRead(LINES_ARRAY);
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testReadEmptyStrings() throws Exception {
-    runTestRead(NO_LINES_ARRAY);
-  }
-
-  @Test
-  public void testReadNamed() throws Exception {
-    p.enableAbandonedNodeEnforcement(false);
-
-    assertEquals(
-        "TextIO.Read/Read.out",
-        p.apply(TextIO.read().from("somefile")).getName());
-    assertEquals(
-        "MyRead/Read.out",
-        p.apply("MyRead", TextIO.read().from(emptyTxt.getPath())).getName());
-  }
-
-  @Test
-  public void testReadDisplayData() {
-    TextIO.Read read = TextIO.read()
-        .from("foo.*")
-        .withCompressionType(BZIP2);
-
-    DisplayData displayData = DisplayData.from(read);
-
-    assertThat(displayData, hasDisplayItem("filePattern", "foo.*"));
-    assertThat(displayData, hasDisplayItem("compressionType", BZIP2.toString()));
-  }
-
-  @Test
-  @Category(ValidatesRunner.class)
-  public void testPrimitiveReadDisplayData() {
-    DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
-
-    TextIO.Read read = TextIO.read()
-        .from("foobar");
-
-    Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(read);
-    assertThat("TextIO.Read should include the file prefix in its primitive display data",
-        displayData, hasItem(hasDisplayItem(hasValue(startsWith("foobar")))));
-  }
-
-  static class TestDynamicDestinations extends DynamicDestinations<String, String> {
-    ResourceId baseDir;
-
-    TestDynamicDestinations(ResourceId baseDir) {
-      this.baseDir = baseDir;
-    }
-
-    @Override
-    public String getDestination(String element) {
-      // Destination is based on first character of string.
-      return element.substring(0, 1);
-    }
-
-    @Override
-    public String getDefaultDestination() {
-      return "";
-    }
-
-    @Nullable
-    @Override
-    public Coder<String> getDestinationCoder() {
-      return StringUtf8Coder.of();
-    }
-
-    @Override
-    public FilenamePolicy getFilenamePolicy(String destination) {
-      return DefaultFilenamePolicy.fromStandardParameters(
-          StaticValueProvider.of(
-              baseDir.resolve("file_" + destination + ".txt", StandardResolveOptions.RESOLVE_FILE)),
-          null,
-          null,
-          false);
-    }
-  }
-
-  class StartsWith implements Predicate<String> {
-    String prefix;
-
-    StartsWith(String prefix) {
-      this.prefix = prefix;
-    }
-
-    @Override
-    public boolean apply(@Nullable String input) {
-      return input.startsWith(prefix);
-    }
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testDynamicDestinations() throws Exception {
-    ResourceId baseDir =
-        FileSystems.matchNewResource(
-            Files.createTempDirectory(tempFolder, "testDynamicDestinations").toString(), true);
-
-    List<String> elements = Lists.newArrayList("aaaa", "aaab", "baaa", "baab", "caaa", "caab");
-    PCollection<String> input = p.apply(Create.of(elements).withCoder(StringUtf8Coder.of()));
-    input.apply(
-        TextIO.write()
-            .to(new TestDynamicDestinations(baseDir))
-            .withTempDirectory(FileSystems.matchNewResource(baseDir.toString(), true)));
-    p.run();
-
-    assertOutputFiles(
-        Iterables.toArray(Iterables.filter(elements, new StartsWith("a")), String.class),
-        null,
-        null,
-        0,
-        baseDir.resolve("file_a.txt", StandardResolveOptions.RESOLVE_FILE),
-        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
-    assertOutputFiles(
-        Iterables.toArray(Iterables.filter(elements, new StartsWith("b")), String.class),
-        null,
-        null,
-        0,
-        baseDir.resolve("file_b.txt", StandardResolveOptions.RESOLVE_FILE),
-        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
-    assertOutputFiles(
-        Iterables.toArray(Iterables.filter(elements, new StartsWith("c")), String.class),
-        null,
-        null,
-        0,
-        baseDir.resolve("file_c.txt", StandardResolveOptions.RESOLVE_FILE),
-        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
-  }
-
-  @DefaultCoder(AvroCoder.class)
-  private static class UserWriteType {
-    String destination;
-    String metadata;
-
-    UserWriteType() {
-      this.destination = "";
-      this.metadata = "";
-    }
-
-    UserWriteType(String destination, String metadata) {
-      this.destination = destination;
-      this.metadata = metadata;
-    }
-
-    @Override
-    public String toString() {
-      return String.format("destination: %s metadata : %s", destination, metadata);
-    }
-  }
-
-  private static class SerializeUserWrite implements SerializableFunction<UserWriteType, String> {
-    @Override
-    public String apply(UserWriteType input) {
-      return input.toString();
-    }
-  }
-
-  private static class UserWriteDestination implements SerializableFunction<UserWriteType, Params> {
-    private ResourceId baseDir;
-
-    UserWriteDestination(ResourceId baseDir) {
-      this.baseDir = baseDir;
-    }
-
-    @Override
-    public Params apply(UserWriteType input) {
-      return new Params()
-          .withBaseFilename(
-              baseDir.resolve(
-                  "file_" + input.destination.substring(0, 1) + ".txt",
-                  StandardResolveOptions.RESOLVE_FILE));
-    }
-  }
-
-  private static class ExtractWriteDestination implements Function<UserWriteType, String> {
-    @Override
-    public String apply(@Nullable UserWriteType input) {
-      return input.destination;
-    }
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testDynamicDefaultFilenamePolicy() throws Exception {
-    ResourceId baseDir =
-        FileSystems.matchNewResource(
-            Files.createTempDirectory(tempFolder, "testDynamicDestinations").toString(), true);
-
-    List<UserWriteType> elements =
-        Lists.newArrayList(
-            new UserWriteType("aaaa", "first"),
-            new UserWriteType("aaab", "second"),
-            new UserWriteType("baaa", "third"),
-            new UserWriteType("baab", "fourth"),
-            new UserWriteType("caaa", "fifth"),
-            new UserWriteType("caab", "sixth"));
-    PCollection<UserWriteType> input = p.apply(Create.of(elements));
-    input.apply(
-        TextIO.writeCustomType(new SerializeUserWrite())
-            .to(new UserWriteDestination(baseDir), new Params())
-            .withTempDirectory(FileSystems.matchNewResource(baseDir.toString(), true)));
-    p.run();
-
-    String[] aElements =
-        Iterables.toArray(
-            Iterables.transform(
-                Iterables.filter(
-                    elements,
-                    Predicates.compose(new StartsWith("a"), new ExtractWriteDestination())),
-                Functions.toStringFunction()),
-            String.class);
-    String[] bElements =
-        Iterables.toArray(
-            Iterables.transform(
-                Iterables.filter(
-                    elements,
-                    Predicates.compose(new StartsWith("b"), new ExtractWriteDestination())),
-                Functions.toStringFunction()),
-            String.class);
-    String[] cElements =
-        Iterables.toArray(
-            Iterables.transform(
-                Iterables.filter(
-                    elements,
-                    Predicates.compose(new StartsWith("c"), new ExtractWriteDestination())),
-                Functions.toStringFunction()),
-            String.class);
-    assertOutputFiles(
-        aElements,
-        null,
-        null,
-        0,
-        baseDir.resolve("file_a.txt", StandardResolveOptions.RESOLVE_FILE),
-        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
-    assertOutputFiles(
-        bElements,
-        null,
-        null,
-        0,
-        baseDir.resolve("file_b.txt", StandardResolveOptions.RESOLVE_FILE),
-        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
-    assertOutputFiles(
-        cElements,
-        null,
-        null,
-        0,
-        baseDir.resolve("file_c.txt", StandardResolveOptions.RESOLVE_FILE),
-        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
-  }
-
-  private void runTestWrite(String[] elems) throws Exception {
-    runTestWrite(elems, null, null, 1);
-  }
-
-  private void runTestWrite(String[] elems, int numShards) throws Exception {
-    runTestWrite(elems, null, null, numShards);
-  }
-
-  private void runTestWrite(String[] elems, String header, String footer)
-      throws Exception {
-    runTestWrite(elems, header, footer, 1);
-  }
-
-  private void runTestWrite(
-      String[] elems, String header, String footer, int numShards) throws Exception {
-    String outputName = "file.txt";
-    Path baseDir = Files.createTempDirectory(tempFolder, "testwrite");
-    ResourceId baseFilename =
-        FileBasedSink.convertToFileResourceIfPossible(baseDir.resolve(outputName).toString());
-
-    PCollection<String> input =
-        p.apply(Create.of(Arrays.asList(elems)).withCoder(StringUtf8Coder.of()));
-
-    TextIO.Write write =
-        TextIO.write().to(baseFilename)
-            .withHeader(header)
-            .withFooter(footer);
-
-    if (numShards == 1) {
-      write = write.withoutSharding();
-    } else if (numShards > 0) {
-      write = write.withNumShards(numShards).withShardNameTemplate(ShardNameTemplate.INDEX_OF_MAX);
-    }
-
-    input.apply(write);
-
-    p.run();
-
-    assertOutputFiles(
-        elems,
-        header,
-        footer,
-        numShards,
-        baseFilename,
-        firstNonNull(
-            write.inner.getShardTemplate(),
-            DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE));
-  }
-
-  public static void assertOutputFiles(
-      String[] elems,
-      final String header,
-      final String footer,
-      int numShards,
-      ResourceId outputPrefix,
-      String shardNameTemplate)
-      throws Exception {
-    List<File> expectedFiles = new ArrayList<>();
-    if (numShards == 0) {
-      String pattern = outputPrefix.toString() + "*";
-      List<MatchResult> matches = FileSystems.match(Collections.singletonList(pattern));
-      for (Metadata expectedFile : Iterables.getOnlyElement(matches).metadata()) {
-        expectedFiles.add(new File(expectedFile.resourceId().toString()));
-      }
-    } else {
-      for (int i = 0; i < numShards; i++) {
-        expectedFiles.add(
-            new File(
-                DefaultFilenamePolicy.constructName(
-                        outputPrefix, shardNameTemplate, "", i, numShards, null, null)
-                    .toString()));
-      }
-    }
-
-    List<List<String>> actual = new ArrayList<>();
-
-    for (File tmpFile : expectedFiles) {
-      try (BufferedReader reader = new BufferedReader(new FileReader(tmpFile))) {
-        List<String> currentFile = new ArrayList<>();
-        for (;;) {
-          String line = reader.readLine();
-          if (line == null) {
-            break;
-          }
-          currentFile.add(line);
-        }
-        actual.add(currentFile);
-      }
-    }
-
-    List<String> expectedElements = new ArrayList<>(elems.length);
-    for (String elem : elems) {
-      byte[] encodedElem = CoderUtils.encodeToByteArray(StringUtf8Coder.of(), elem);
-      String line = new String(encodedElem);
-      expectedElements.add(line);
-    }
-
-    List<String> actualElements =
-        Lists.newArrayList(
-            Iterables.concat(
-                FluentIterable
-                    .from(actual)
-                    .transform(removeHeaderAndFooter(header, footer))
-                    .toList()));
-
-    assertThat(actualElements, containsInAnyOrder(expectedElements.toArray()));
-
-    assertTrue(Iterables.all(actual, haveProperHeaderAndFooter(header, footer)));
-  }
-
-  private static Function<List<String>, List<String>> removeHeaderAndFooter(final String header,
-      final String footer) {
-    return new Function<List<String>, List<String>>() {
-      @Nullable
-      @Override
-      public List<String> apply(List<String> lines) {
-        ArrayList<String> newLines = Lists.newArrayList(lines);
-        if (header != null) {
-          newLines.remove(0);
-        }
-        if (footer != null) {
-          int last = newLines.size() - 1;
-          newLines.remove(last);
-        }
-        return newLines;
-      }
-    };
-  }
-
-  private static Predicate<List<String>> haveProperHeaderAndFooter(final String header,
-      final String footer) {
-    return new Predicate<List<String>>() {
-      @Override
-      public boolean apply(List<String> fileLines) {
-        int last = fileLines.size() - 1;
-        return (header == null || fileLines.get(0).equals(header))
-            && (footer == null || fileLines.get(last).equals(footer));
-      }
-    };
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testWriteStrings() throws Exception {
-    runTestWrite(LINES_ARRAY);
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testWriteEmptyStringsNoSharding() throws Exception {
-    runTestWrite(NO_LINES_ARRAY, 0);
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testWriteEmptyStrings() throws Exception {
-    runTestWrite(NO_LINES_ARRAY);
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testShardedWrite() throws Exception {
-    runTestWrite(LINES_ARRAY, 5);
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testWriteWithHeader() throws Exception {
-    runTestWrite(LINES_ARRAY, MY_HEADER, null);
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testWriteWithFooter() throws Exception {
-    runTestWrite(LINES_ARRAY, null, MY_FOOTER);
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testWriteWithHeaderAndFooter() throws Exception {
-    runTestWrite(LINES_ARRAY, MY_HEADER, MY_FOOTER);
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testWriteWithWritableByteChannelFactory() throws Exception {
-    Coder<String> coder = StringUtf8Coder.of();
-    String outputName = "file.txt";
-    ResourceId baseDir =
-        FileSystems.matchNewResource(
-            Files.createTempDirectory(tempFolder, "testwrite").toString(), true);
-
-    PCollection<String> input = p.apply(Create.of(Arrays.asList(LINES2_ARRAY)).withCoder(coder));
-
-    final WritableByteChannelFactory writableByteChannelFactory =
-        new DrunkWritableByteChannelFactory();
-    TextIO.Write write =
-        TextIO.write()
-            .to(baseDir.resolve(outputName, StandardResolveOptions.RESOLVE_FILE).toString())
-            .withoutSharding()
-            .withWritableByteChannelFactory(writableByteChannelFactory);
-    DisplayData displayData = DisplayData.from(write);
-    assertThat(displayData, hasDisplayItem("writableByteChannelFactory", "DRUNK"));
-
-    input.apply(write);
-
-    p.run();
-
-    final List<String> drunkElems = new ArrayList<>(LINES2_ARRAY.length * 2 + 2);
-    for (String elem : LINES2_ARRAY) {
-      drunkElems.add(elem);
-      drunkElems.add(elem);
-    }
-    assertOutputFiles(
-        drunkElems.toArray(new String[0]),
-        null,
-        null,
-        1,
-        baseDir.resolve(
-            outputName + writableByteChannelFactory.getSuggestedFilenameSuffix(),
-            StandardResolveOptions.RESOLVE_FILE),
-        write.inner.getShardTemplate());
-  }
-
-  @Test
-  public void testWriteDisplayData() {
-    TextIO.Write write = TextIO.write()
-        .to("/foo")
-        .withSuffix("bar")
-        .withShardNameTemplate("-SS-of-NN-")
-        .withNumShards(100)
-        .withFooter("myFooter")
-        .withHeader("myHeader");
-
-    DisplayData displayData = DisplayData.from(write);
-
-    assertThat(displayData, hasDisplayItem("filePrefix", "/foo"));
-    assertThat(displayData, hasDisplayItem("fileSuffix", "bar"));
-    assertThat(displayData, hasDisplayItem("fileHeader", "myHeader"));
-    assertThat(displayData, hasDisplayItem("fileFooter", "myFooter"));
-    assertThat(displayData, hasDisplayItem("shardNameTemplate", "-SS-of-NN-"));
-    assertThat(displayData, hasDisplayItem("numShards", 100));
-    assertThat(displayData, hasDisplayItem("writableByteChannelFactory", "UNCOMPRESSED"));
-  }
-
-  @Test
-  public void testWriteDisplayDataValidateThenHeader() {
-    TextIO.Write write = TextIO.write()
-        .to("foo")
-        .withHeader("myHeader");
-
-    DisplayData displayData = DisplayData.from(write);
-
-    assertThat(displayData, hasDisplayItem("fileHeader", "myHeader"));
-  }
-
-  @Test
-  public void testWriteDisplayDataValidateThenFooter() {
-    TextIO.Write write = TextIO.write()
-        .to("foo")
-        .withFooter("myFooter");
-
-    DisplayData displayData = DisplayData.from(write);
-
-    assertThat(displayData, hasDisplayItem("fileFooter", "myFooter"));
-  }
-
-  /** Options for testing. */
-  public interface RuntimeTestOptions extends PipelineOptions {
-    ValueProvider<String> getInput();
-    void setInput(ValueProvider<String> value);
-
-    ValueProvider<String> getOutput();
-    void setOutput(ValueProvider<String> value);
-  }
-
-  @Test
-  public void testRuntimeOptionsNotCalledInApply() throws Exception {
-    p.enableAbandonedNodeEnforcement(false);
-
-    RuntimeTestOptions options = PipelineOptionsFactory.as(RuntimeTestOptions.class);
-
-    p
-        .apply(TextIO.read().from(options.getInput()))
-        .apply(TextIO.write().to(options.getOutput()));
-  }
-
-  @Test
-  public void testCompressionTypeIsSet() throws Exception {
-    TextIO.Read read = TextIO.read().from("/tmp/test");
-    assertEquals(AUTO, read.getCompressionType());
-    read = TextIO.read().from("/tmp/test").withCompressionType(GZIP);
-    assertEquals(GZIP, read.getCompressionType());
-  }
-
-  /**
-   * Helper that writes the given lines (adding a newline in between) to a stream, then closes the
-   * stream.
-   */
-  private static void writeToStreamAndClose(List<String> lines, OutputStream outputStream) {
-    try (PrintStream writer = new PrintStream(outputStream)) {
-      for (String line : lines) {
-        writer.println(line);
-      }
-    }
-  }
-
-  /**
-   * Helper method that runs TextIO.read().from(filename).withCompressionType(compressionType) and
-   * TextIO.readAll().withCompressionType(compressionType) applied to the single filename,
-   * and asserts that the results match the given expected output.
-   */
-  private void assertReadingCompressedFileMatchesExpected(
-      File file, CompressionType compressionType, List<String> expected) {
-
-    TextIO.Read read = TextIO.read().from(file.getPath()).withCompressionType(compressionType);
-    PAssert.that(p.apply("Read_" + file + "_" + compressionType.toString(), read))
-        .containsInAnyOrder(expected);
-
-    TextIO.ReadAll readAll =
-        TextIO.readAll().withCompressionType(compressionType).withDesiredBundleSizeBytes(10);
-    PAssert.that(
-            p.apply("Create_" + file, Create.of(file.getPath()))
-                .apply("Read_" + compressionType.toString(), readAll))
-        .containsInAnyOrder(expected);
-    p.run();
-  }
-
-  /**
-   * Helper to make an array of compressible strings. Returns ["word"i] for i in range(0,n).
-   */
-  private static List<String> makeLines(int n) {
-    List<String> ret = new ArrayList<>();
-    for (int i = 0; i < n; ++i) {
-      ret.add("word" + i);
-    }
-    return ret;
-  }
-
-  /**
-   * Tests reading from a small, gzipped file with no .gz extension but GZIP compression set.
-   */
-  @Test
-  @Category(NeedsRunner.class)
-  public void testSmallCompressedGzipReadNoExtension() throws Exception {
-    File smallGzNoExtension = writeToFile(TINY, "tiny_gz_no_extension", GZIP);
-    assertReadingCompressedFileMatchesExpected(smallGzNoExtension, GZIP, TINY);
-  }
-
-  /**
-   * Tests reading from a small, uncompressed file with .gz extension. This must work in AUTO or
-   * GZIP modes. This is needed because some network file systems / HTTP clients will transparently
-   * decompress gzipped content.
-   */
-  @Test
-  @Category(NeedsRunner.class)
-  public void testSmallCompressedGzipReadActuallyUncompressed() throws Exception {
-    File smallGzNotCompressed =
-        writeToFile(TINY, "tiny_uncompressed.gz", CompressionType.UNCOMPRESSED);
-    // Should work with GZIP compression set.
-    assertReadingCompressedFileMatchesExpected(smallGzNotCompressed, GZIP, TINY);
-    // Should also work with AUTO mode set.
-    assertReadingCompressedFileMatchesExpected(smallGzNotCompressed, AUTO, TINY);
-  }
-
-  /**
-   * Tests reading from a small, bzip2ed file with no .bz2 extension but BZIP2 compression set.
-   */
-  @Test
-  @Category(NeedsRunner.class)
-  public void testSmallCompressedBzip2ReadNoExtension() throws Exception {
-    File smallBz2NoExtension = writeToFile(TINY, "tiny_bz2_no_extension", BZIP2);
-    assertReadingCompressedFileMatchesExpected(smallBz2NoExtension, BZIP2, TINY);
-  }
-
-  /**
-   * Create a zip file with the given lines.
-   *
-   * @param expected A list of expected lines, populated in the zip file.
-   * @param filename Optionally zip file name (can be null).
-   * @param fieldsEntries Fields to write in zip entries.
-   * @return The zip filename.
-   * @throws Exception In case of a failure during zip file creation.
-   */
-  private String createZipFile(List<String> expected, String filename, String[]... fieldsEntries)
-      throws Exception {
-    File tmpFile = tempFolder.resolve(filename).toFile();
-    String tmpFileName = tmpFile.getPath();
-
-    ZipOutputStream out = new ZipOutputStream(new FileOutputStream(tmpFile));
-    PrintStream writer = new PrintStream(out, true /* auto-flush on write */);
-
-    int index = 0;
-    for (String[] entry : fieldsEntries) {
-      out.putNextEntry(new ZipEntry(Integer.toString(index)));
-      for (String field : entry) {
-        writer.println(field);
-        expected.add(field);
-      }
-      out.closeEntry();
-      index++;
-    }
-
-    writer.close();
-    out.close();
-
-    return tmpFileName;
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testTxtRead() throws Exception {
-    // Files with non-compressed extensions should work in AUTO and UNCOMPRESSED modes.
-    for (CompressionType type : new CompressionType[]{AUTO, UNCOMPRESSED}) {
-      assertReadingCompressedFileMatchesExpected(emptyTxt, type, EMPTY);
-      assertReadingCompressedFileMatchesExpected(tinyTxt, type, TINY);
-      assertReadingCompressedFileMatchesExpected(largeTxt, type, LARGE);
-    }
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testGzipCompressedRead() throws Exception {
-    // Files with the right extensions should work in AUTO and GZIP modes.
-    for (CompressionType type : new CompressionType[]{AUTO, GZIP}) {
-      assertReadingCompressedFileMatchesExpected(emptyGz, type, EMPTY);
-      assertReadingCompressedFileMatchesExpected(tinyGz, type, TINY);
-      assertReadingCompressedFileMatchesExpected(largeGz, type, LARGE);
-    }
-
-    // Sanity check that we're properly testing compression.
-    assertThat(largeTxt.length(), greaterThan(largeGz.length()));
-
-    // GZIP files with non-gz extension should work in GZIP mode.
-    File gzFile = writeToFile(TINY, "tiny_gz_no_extension", GZIP);
-    assertReadingCompressedFileMatchesExpected(gzFile, GZIP, TINY);
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testBzip2CompressedRead() throws Exception {
-    // Files with the right extensions should work in AUTO and BZIP2 modes.
-    for (CompressionType type : new CompressionType[]{AUTO, BZIP2}) {
-      assertReadingCompressedFileMatchesExpected(emptyBzip2, type, EMPTY);
-      assertReadingCompressedFileMatchesExpected(tinyBzip2, type, TINY);
-      assertReadingCompressedFileMatchesExpected(largeBzip2, type, LARGE);
-    }
-
-    // Sanity check that we're properly testing compression.
-    assertThat(largeTxt.length(), greaterThan(largeBzip2.length()));
-
-    // BZ2 files with non-bz2 extension should work in BZIP2 mode.
-    File bz2File = writeToFile(TINY, "tiny_bz2_no_extension", BZIP2);
-    assertReadingCompressedFileMatchesExpected(bz2File, BZIP2, TINY);
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testZipCompressedRead() throws Exception {
-    // Files with the right extensions should work in AUTO and ZIP modes.
-    for (CompressionType type : new CompressionType[]{AUTO, ZIP}) {
-      assertReadingCompressedFileMatchesExpected(emptyZip, type, EMPTY);
-      assertReadingCompressedFileMatchesExpected(tinyZip, type, TINY);
-      assertReadingCompressedFileMatchesExpected(largeZip, type, LARGE);
-    }
-
-    // Sanity check that we're properly testing compression.
-    assertThat(largeTxt.length(), greaterThan(largeZip.length()));
-
-    // Zip files with non-zip extension should work in ZIP mode.
-    File zipFile = writeToFile(TINY, "tiny_zip_no_extension", ZIP);
-    assertReadingCompressedFileMatchesExpected(zipFile, ZIP, TINY);
-  }
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testDeflateCompressedRead() throws Exception {
-    // Files with the right extensions should work in AUTO and ZIP modes.
-    for (CompressionType type : new CompressionType[]{AUTO, DEFLATE}) {
-      assertReadingCompressedFileMatchesExpected(emptyDeflate, type, EMPTY);
-      assertReadingCompressedFileMatchesExpected(tinyDeflate, type, TINY);
-      assertReadingCompressedFileMatchesExpected(largeDeflate, type, LARGE);
-    }
-
-    // Sanity check that we're properly testing compression.
-    assertThat(largeTxt.length(), greaterThan(largeDeflate.length()));
-
-    // Deflate files with non-deflate extension should work in DEFLATE mode.
-    File deflateFile = writeToFile(TINY, "tiny_deflate_no_extension", DEFLATE);
-    assertReadingCompressedFileMatchesExpected(deflateFile, DEFLATE, TINY);
-  }
-
-  /**
-   * Tests a zip file with no entries. This is a corner case not tested elsewhere as the default
-   * test zip files have a single entry.
-   */
-  @Test
-  @Category(NeedsRunner.class)
-  public void testZipCompressedReadWithNoEntries() throws Exception {
-    String filename = createZipFile(new ArrayList<String>(), "empty zip file");
-    assertReadingCompressedFileMatchesExpected(new File(filename), CompressionType.ZIP, EMPTY);
-  }
-
-  /**
-   * Tests a zip file with multiple entries. This is a corner case not tested elsewhere as the
-   * default test zip files have a single entry.
-   */
-  @Test
-  @Category(NeedsRunner.class)
-  public void testZipCompressedReadWithMultiEntriesFile() throws Exception {
-    String[] entry0 = new String[]{"first", "second", "three"};
-    String[] entry1 = new String[]{"four", "five", "six"};
-    String[] entry2 = new String[]{"seven", "eight", "nine"};
-
-    List<String> expected = new ArrayList<>();
-
-    String filename = createZipFile(expected, "multiple entries", entry0, entry1, entry2);
-    assertReadingCompressedFileMatchesExpected(
-        new File(filename), CompressionType.ZIP, expected);
-  }
-
-  /**
-   * Read a ZIP compressed file containing data, multiple empty entries, and then more data. We
-   * expect just the data back.
-   */
-  @Test
-  @Category(NeedsRunner.class)
-  public void testZipCompressedReadWithComplexEmptyAndPresentEntries() throws Exception {
-    String filename = createZipFile(
-        new ArrayList<String>(),
-        "complex empty and present entries",
-        new String[]{"cat"},
-        new String[]{},
-        new String[]{},
-        new String[]{"dog"});
-
-    assertReadingCompressedFileMatchesExpected(
-        new File(filename), CompressionType.ZIP, Arrays.asList("cat", "dog"));
-  }
-
-  @Test
-  public void testTextIOGetName() {
-    assertEquals("TextIO.Read", TextIO.read().from("somefile").getName());
-    assertEquals("TextIO.Write", TextIO.write().to("somefile").getName());
-    assertEquals("TextIO.Read", TextIO.read().from("somefile").toString());
-  }
-
-  @Test
-  public void testProgressEmptyFile() throws IOException {
-    try (BoundedReader<String> reader =
-        prepareSource(new byte[0]).createReader(PipelineOptionsFactory.create())) {
-      // Check preconditions before starting.
-      assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
-      assertEquals(0, reader.getSplitPointsConsumed());
-      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
-
-      // Assert empty
-      assertFalse(reader.start());
-
-      // Check postconditions after finishing
-      assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
-      assertEquals(0, reader.getSplitPointsConsumed());
-      assertEquals(0, reader.getSplitPointsRemaining());
-    }
-  }
-
-  @Test
-  public void testProgressTextFile() throws IOException {
-    String file = "line1\nline2\nline3";
-    try (BoundedReader<String> reader =
-        prepareSource(file.getBytes()).createReader(PipelineOptionsFactory.create())) {
-      // Check preconditions before starting
-      assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
-      assertEquals(0, reader.getSplitPointsConsumed());
-      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
-
-      // Line 1
-      assertTrue(reader.start());
-      assertEquals(0, reader.getSplitPointsConsumed());
-      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
-
-      // Line 2
-      assertTrue(reader.advance());
-      assertEquals(1, reader.getSplitPointsConsumed());
-      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
-
-      // Line 3
-      assertTrue(reader.advance());
-      assertEquals(2, reader.getSplitPointsConsumed());
-      assertEquals(1, reader.getSplitPointsRemaining());
-
-      // Check postconditions after finishing
-      assertFalse(reader.advance());
-      assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
-      assertEquals(3, reader.getSplitPointsConsumed());
-      assertEquals(0, reader.getSplitPointsRemaining());
-    }
-  }
-
-  @Test
-  public void testProgressAfterSplitting() throws IOException {
-    String file = "line1\nline2\nline3";
-    BoundedSource<String> source = prepareSource(file.getBytes());
-    BoundedSource<String> remainder;
-
-    // Create the remainder, verifying properties pre- and post-splitting.
-    try (BoundedReader<String> readerOrig = source.createReader(PipelineOptionsFactory.create())) {
-      // Preconditions.
-      assertEquals(0.0, readerOrig.getFractionConsumed(), 1e-6);
-      assertEquals(0, readerOrig.getSplitPointsConsumed());
-      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, readerOrig.getSplitPointsRemaining());
-
-      // First record, before splitting.
-      assertTrue(readerOrig.start());
-      assertEquals(0, readerOrig.getSplitPointsConsumed());
-      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, readerOrig.getSplitPointsRemaining());
-
-      // Split. 0.1 is in line1, so should now be able to detect last record.
-      remainder = readerOrig.splitAtFraction(0.1);
-      System.err.println(readerOrig.getCurrentSource());
-      assertNotNull(remainder);
-
-      // First record, after splitting.
-      assertEquals(0, readerOrig.getSplitPointsConsumed());
-      assertEquals(1, readerOrig.getSplitPointsRemaining());
-
-      // Finish and postconditions.
-      assertFalse(readerOrig.advance());
-      assertEquals(1.0, readerOrig.getFractionConsumed(), 1e-6);
-      assertEquals(1, readerOrig.getSplitPointsConsumed());
-      assertEquals(0, readerOrig.getSplitPointsRemaining());
-    }
-
-    // Check the properties of the remainder.
-    try (BoundedReader<String> reader = remainder.createReader(PipelineOptionsFactory.create())) {
-      // Preconditions.
-      assertEquals(0.0, reader.getFractionConsumed(), 1e-6);
-      assertEquals(0, reader.getSplitPointsConsumed());
-      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
-
-      // First record should be line 2.
-      assertTrue(reader.start());
-      assertEquals(0, reader.getSplitPointsConsumed());
-      assertEquals(BoundedReader.SPLIT_POINTS_UNKNOWN, reader.getSplitPointsRemaining());
-
-      // Second record is line 3
-      assertTrue(reader.advance());
-      assertEquals(1, reader.getSplitPointsConsumed());
-      assertEquals(1, reader.getSplitPointsRemaining());
-
-      // Check postconditions after finishing
-      assertFalse(reader.advance());
-      assertEquals(1.0, reader.getFractionConsumed(), 1e-6);
-      assertEquals(2, reader.getSplitPointsConsumed());
-      assertEquals(0, reader.getSplitPointsRemaining());
-    }
-  }
-
-  @Test
-  public void testReadEmptyLines() throws Exception {
-    runTestReadWithData("\n\n\n".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("", "", ""));
-  }
-
-  @Test
-  public void testReadFileWithLineFeedDelimiter() throws Exception {
-    runTestReadWithData("asdf\nhjkl\nxyz\n".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithCarriageReturnDelimiter() throws Exception {
-    runTestReadWithData("asdf\rhjkl\rxyz\r".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithCarriageReturnAndLineFeedDelimiter() throws Exception {
-    runTestReadWithData("asdf\r\nhjkl\r\nxyz\r\n".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithMixedDelimiters() throws Exception {
-    runTestReadWithData("asdf\rhjkl\r\nxyz\n".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithLineFeedDelimiterAndNonEmptyBytesAtEnd() throws Exception {
-    runTestReadWithData("asdf\nhjkl\nxyz".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithCarriageReturnDelimiterAndNonEmptyBytesAtEnd() throws Exception {
-    runTestReadWithData("asdf\rhjkl\rxyz".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithCarriageReturnAndLineFeedDelimiterAndNonEmptyBytesAtEnd()
-      throws Exception {
-    runTestReadWithData("asdf\r\nhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  @Test
-  public void testReadFileWithMixedDelimitersAndNonEmptyBytesAtEnd() throws Exception {
-    runTestReadWithData("asdf\rhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8),
-        ImmutableList.of("asdf", "hjkl", "xyz"));
-  }
-
-  private void runTestReadWithData(byte[] data, List<String> expectedResults) throws Exception {
-    TextSource source = prepareSource(data);
-    List<String> actual = SourceTestUtils.readFromSource(source, PipelineOptionsFactory.create());
-    assertThat(actual, containsInAnyOrder(new ArrayList<>(expectedResults).toArray(new String[0])));
-  }
-
-  @Test
-  public void testSplittingSourceWithEmptyLines() throws Exception {
-    TextSource source = prepareSource("\n\n\n".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithLineFeedDelimiter() throws Exception {
-    TextSource source = prepareSource("asdf\nhjkl\nxyz\n".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithCarriageReturnDelimiter() throws Exception {
-    TextSource source = prepareSource("asdf\rhjkl\rxyz\r".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithCarriageReturnAndLineFeedDelimiter() throws Exception {
-    TextSource source = prepareSource(
-        "asdf\r\nhjkl\r\nxyz\r\n".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithMixedDelimiters() throws Exception {
-    TextSource source = prepareSource(
-        "asdf\rhjkl\r\nxyz\n".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithLineFeedDelimiterAndNonEmptyBytesAtEnd() throws Exception {
-    TextSource source = prepareSource("asdf\nhjkl\nxyz".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithCarriageReturnDelimiterAndNonEmptyBytesAtEnd()
-      throws Exception {
-    TextSource source = prepareSource("asdf\rhjkl\rxyz".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithCarriageReturnAndLineFeedDelimiterAndNonEmptyBytesAtEnd()
-      throws Exception {
-    TextSource source = prepareSource(
-        "asdf\r\nhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  @Test
-  public void testSplittingSourceWithMixedDelimitersAndNonEmptyBytesAtEnd() throws Exception {
-    TextSource source = prepareSource("asdf\rhjkl\r\nxyz".getBytes(StandardCharsets.UTF_8));
-    SourceTestUtils.assertSplitAtFractionExhaustive(source, PipelineOptionsFactory.create());
-  }
-
-  private TextSource prepareSource(byte[] data) throws IOException {
-    Path path = Files.createTempFile(tempFolder, "tempfile", "ext");
-    Files.write(path, data);
-    return new TextSource(ValueProvider.StaticValueProvider.of(path.toString()));
-  }
-
-  @Test
-  public void testInitialSplitAutoModeTxt() throws Exception {
-    PipelineOptions options = TestPipeline.testingPipelineOptions();
-    long desiredBundleSize = 1000;
-
-    // Sanity check: file is at least 2 bundles long.
-    assertThat(largeTxt.length(), greaterThan(2 * desiredBundleSize));
-
-    FileBasedSource<String> source = TextIO.read().from(largeTxt.getPath()).getSource();
-    List<? extends FileBasedSource<String>> splits =
-        source.split(desiredBundleSize, options);
-
-    // At least 2 splits and they are equal to reading the whole file.
-    assertThat(splits, hasSize(greaterThan(1)));
-    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
-  }
-
-  @Test
-  public void testInitialSplitAutoModeGz() throws Exception {
-    long desiredBundleSize = 1000;
-    PipelineOptions options = TestPipeline.testingPipelineOptions();
-
-    // Sanity check: file is at least 2 bundles long.
-    assertThat(largeGz.length(), greaterThan(2 * desiredBundleSize));
-
-    FileBasedSource<String> source = TextIO.read().from(largeGz.getPath()).getSource();
-    List<? extends FileBasedSource<String>> splits =
-        source.split(desiredBundleSize, options);
-
-    // Exactly 1 split, even in AUTO mode, since it is a gzip file.
-    assertThat(splits, hasSize(equalTo(1)));
-    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
-  }
-
-  @Test
-  public void testInitialSplitGzipModeTxt() throws Exception {
-    PipelineOptions options = TestPipeline.testingPipelineOptions();
-    long desiredBundleSize = 1000;
-
-    // Sanity check: file is at least 2 bundles long.
-    assertThat(largeTxt.length(), greaterThan(2 * desiredBundleSize));
-
-    FileBasedSource<String> source =
-        TextIO.read().from(largeTxt.getPath()).withCompressionType(GZIP).getSource();
-    List<? extends FileBasedSource<String>> splits =
-        source.split(desiredBundleSize, options);
-
-    // Exactly 1 split, even though splittable text file, since using GZIP mode.
-    assertThat(splits, hasSize(equalTo(1)));
-    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
-  }
-
-  @Test
-  public void testInitialSplitGzipModeGz() throws Exception {
-    PipelineOptions options = TestPipeline.testingPipelineOptions();
-    long desiredBundleSize = 1000;
-
-    // Sanity check: file is at least 2 bundles long.
-    assertThat(largeGz.length(), greaterThan(2 * desiredBundleSize));
-
-    FileBasedSource<String> source =
-        TextIO.read().from(largeGz.getPath()).withCompressionType(GZIP).getSource();
-    List<? extends FileBasedSource<String>> splits =
-        source.split(desiredBundleSize, options);
-
-    // Exactly 1 split using .gz extension and using GZIP mode.
-    assertThat(splits, hasSize(equalTo(1)));
-    SourceTestUtils.assertSourcesEqualReferenceSource(source, splits, options);
-  }
-
-
-  @Test
-  @Category(NeedsRunner.class)
-  public void testReadAll() throws IOException {
-    writeToFile(TINY, "readAllTiny1.zip", ZIP);
-    writeToFile(TINY, "readAllTiny2.zip", ZIP);
-    writeToFile(LARGE, "readAllLarge1.zip", ZIP);
-    writeToFile(LARGE, "readAllLarge2.zip", ZIP);
-    PCollection<String> lines =
-        p.apply(
-                Create.of(
-                    tempFolder.resolve("readAllTiny*").toString(),
-                    tempFolder.resolve("readAllLarge*").toString()))
-            .apply(TextIO.readAll().withCompressionType(AUTO));
-    PAssert.that(lines).containsInAnyOrder(Iterables.concat(TINY, TINY, LARGE, LARGE));
-    p.run();
-  }
+   // Empty.
 }


[35/50] [abbrv] beam git commit: This closes #3582

Posted by jb...@apache.org.
This closes #3582


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d5101750
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d5101750
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d5101750

Branch: refs/heads/DSL_SQL
Commit: d5101750e76460b4ad057103069abbd3833bce96
Parents: 0d927ef bdf5bd6
Author: Pei He <pe...@apache.org>
Authored: Wed Jul 19 11:31:32 2017 +0800
Committer: Pei He <pe...@apache.org>
Committed: Wed Jul 19 11:31:32 2017 +0800

----------------------------------------------------------------------
 .../apache/beam/sdk/testing/TestPipeline.java   | 63 ++++----------------
 .../beam/sdk/testing/TestPipelineTest.java      | 38 +-----------
 2 files changed, 13 insertions(+), 88 deletions(-)
----------------------------------------------------------------------



[10/50] [abbrv] beam git commit: Closes #3578

Posted by jb...@apache.org.
Closes #3578


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/02905c27
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/02905c27
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/02905c27

Branch: refs/heads/DSL_SQL
Commit: 02905c27bfc59aa90ebe9c929fa060e705ff2fc3
Parents: 532256e e7059e5
Author: Robert Bradshaw <ro...@google.com>
Authored: Mon Jul 17 15:08:02 2017 -0700
Committer: Robert Bradshaw <ro...@google.com>
Committed: Mon Jul 17 15:08:02 2017 -0700

----------------------------------------------------------------------
 .../apache_beam/examples/snippets/snippets.py   |  2 +-
 sdks/python/apache_beam/transforms/core.py      |  2 +-
 sdks/python/apache_beam/transforms/trigger.py   | 21 +++++++++++++++-----
 3 files changed, 18 insertions(+), 7 deletions(-)
----------------------------------------------------------------------



[14/50] [abbrv] beam git commit: Adjust pull request template for Jenkins and mergebot world

Posted by jb...@apache.org.
Adjust pull request template for Jenkins and mergebot world

Adds details about making a good series of commits, while removing advice that
the user do things that Jenkins will do for them.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/4c6fa39f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/4c6fa39f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/4c6fa39f

Branch: refs/heads/DSL_SQL
Commit: 4c6fa39f619709ff127ca8418121ad91afa2041b
Parents: 7e4719c
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Jul 17 13:06:26 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Jul 17 15:59:39 2017 -0700

----------------------------------------------------------------------
 .github/PULL_REQUEST_TEMPLATE.md | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/4c6fa39f/.github/PULL_REQUEST_TEMPLATE.md
----------------------------------------------------------------------
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index 868edd1..eeee750 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,12 +1,10 @@
-Be sure to do all of the following to help us incorporate your contribution
-quickly and easily:
+Follow this checklist to help us incorporate your contribution quickly and easily:
 
- - [ ] Make sure the PR title is formatted like:
-   `[BEAM-<Jira issue #>] Description of pull request`
- - [ ] Make sure tests pass via `mvn clean verify`.
- - [ ] Replace `<Jira issue #>` in the title with the actual Jira issue
-       number, if there is one.
- - [ ] If this contribution is large, please file an Apache
-       [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf).
+ - [ ] Make sure there is a [JIRA issue](https://issues.apache.org/jira/projects/BEAM/issues/) filed for the change (usually before you start working on it).  Trivial changes like typos do not require a JIRA issue.  Your pull request should address just this issue, without pulling in other changes.
+ - [ ] Each commit in the pull request should have a meaningful subject line and body.
+ - [ ] Format the pull request title like `[BEAM-1234] Fixes bug in ApproximateQuantiles`, where you replace `BEAM-1234` with the appropriate JIRA issue.
+ - [ ] Write a pull request description that is detailed enough to understand what the pull request does, how, and why.
+ - [ ] Run `mvn clean verify` to make sure basic checks pass. A more thorough check will be performed on your pull request automatically.
+ - [ ] If this contribution is large, please file an Apache [Individual Contributor License Agreement](https://www.apache.org/licenses/icla.pdf).
 
 ---


[39/50] [abbrv] beam git commit: This closes #3531: [BEAM-2306] Fail build when @Deprecated is used without @deprecated javadoc

Posted by jb...@apache.org.
This closes #3531: [BEAM-2306] Fail build when @Deprecated is used without @deprecated javadoc

  [BEAM-2306] Add checkstyle check to fail the build when @Deprecated is used without @deprecated javadoc (or vice versa).


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a6f460fe
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a6f460fe
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a6f460fe

Branch: refs/heads/DSL_SQL
Commit: a6f460fe3b760aafbc748ae18956f0f2c1fedfad
Parents: 7fde976 d290114
Author: Kenneth Knowles <kl...@google.com>
Authored: Wed Jul 19 09:03:48 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Wed Jul 19 09:03:48 2017 -0700

----------------------------------------------------------------------
 .../construction/CreatePCollectionViewTranslation.java  | 11 ++++++++++-
 .../core/construction/PTransformTranslation.java        |  4 ++++
 .../beam/runners/core/InMemoryTimerInternals.java       |  9 +++++++++
 .../java/org/apache/beam/runners/core/StateTags.java    |  3 +++
 .../beam/runners/direct/DirectTimerInternals.java       |  9 +++++++++
 .../translation/wrappers/streaming/DoFnOperator.java    |  9 +++++++++
 .../apache/beam/runners/dataflow/DataflowRunner.java    |  3 ++-
 .../options/DataflowPipelineWorkerPoolOptions.java      |  3 +++
 .../build-tools/src/main/resources/beam/checkstyle.xml  |  8 ++++++++
 .../src/main/java/org/apache/beam/sdk/coders/Coder.java | 12 +++++++++++-
 .../java/org/apache/beam/sdk/coders/CoderRegistry.java  |  9 +++++++++
 .../main/java/org/apache/beam/sdk/io/AvroSource.java    |  6 ------
 .../main/java/org/apache/beam/sdk/testing/PAssert.java  |  5 +++--
 .../java/org/apache/beam/sdk/testing/StreamingIT.java   |  4 ++++
 .../java/org/apache/beam/sdk/transforms/Combine.java    |  1 -
 .../main/java/org/apache/beam/sdk/transforms/DoFn.java  |  3 +++
 .../main/java/org/apache/beam/sdk/transforms/View.java  |  2 +-
 .../beam/sdk/transforms/reflect/DoFnInvokers.java       |  9 ---------
 .../java/org/apache/beam/sdk/util/IdentityWindowFn.java |  1 -
 .../org/apache/beam/sdk/values/PCollectionViews.java    |  1 -
 .../main/java/org/apache/beam/sdk/values/PValue.java    |  4 ++--
 .../org/apache/beam/sdk/coders/DefaultCoderTest.java    |  3 ++-
 .../org/apache/beam/fn/harness/BoundedSourceRunner.java |  6 +++---
 23 files changed, 95 insertions(+), 30 deletions(-)
----------------------------------------------------------------------



[40/50] [abbrv] beam git commit: [BEAM-2642] Update Google Auth to 0.7.1

Posted by jb...@apache.org.
[BEAM-2642] Update Google Auth to 0.7.1

Google auth libraries 0.6.1 and 0.7.1 have the same versions of dependencies.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/51427a6e
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/51427a6e
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/51427a6e

Branch: refs/heads/DSL_SQL
Commit: 51427a6e3fd9fc3f311ee4df076745db9d67af7a
Parents: a6f460f
Author: Luke Cwik <lc...@google.com>
Authored: Wed Jul 19 11:36:35 2017 -0700
Committer: Luke Cwik <lc...@google.com>
Committed: Wed Jul 19 11:36:35 2017 -0700

----------------------------------------------------------------------
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/51427a6e/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index d27d367..f2d0dde 100644
--- a/pom.xml
+++ b/pom.xml
@@ -119,7 +119,7 @@
     <google-api-common.version>1.0.0-rc2</google-api-common.version>
     <google-auto-service.version>1.0-rc2</google-auto-service.version>
     <google-auto-value.version>1.4.1</google-auto-value.version>
-    <google-auth.version>0.6.1</google-auth.version>
+    <google-auth.version>0.7.1</google-auth.version>
     <google-clients.version>1.22.0</google-clients.version>
     <google-cloud-bigdataoss.version>1.4.5</google-cloud-bigdataoss.version>
     <google-cloud-core.version>1.0.2</google-cloud-core.version>


[15/50] [abbrv] beam git commit: This closes #3575: Adjust pull request template for Jenkins and mergebot world

Posted by jb...@apache.org.
This closes #3575: Adjust pull request template for Jenkins and mergebot world


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/04d364d3
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/04d364d3
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/04d364d3

Branch: refs/heads/DSL_SQL
Commit: 04d364d31959f044c7ccc7b9fc52884f4ae501d7
Parents: 1996869 4c6fa39
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Jul 17 16:00:09 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Jul 17 16:00:09 2017 -0700

----------------------------------------------------------------------
 .github/PULL_REQUEST_TEMPLATE.md | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)
----------------------------------------------------------------------



[36/50] [abbrv] beam git commit: [BEAM-2532] Memoizes TableSchema in BigQuerySourceBase

Posted by jb...@apache.org.
[BEAM-2532] Memoizes TableSchema in BigQuerySourceBase

Instead of parsing the JSON schema for every record.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e86c004d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e86c004d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e86c004d

Branch: refs/heads/DSL_SQL
Commit: e86c004de5d4b5f8bd0c3c53207cf3c1760f5d8e
Parents: d510175
Author: Neville Li <ne...@spotify.com>
Authored: Tue Jul 18 09:07:21 2017 -0400
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Jul 18 22:28:57 2017 -0700

----------------------------------------------------------------------
 .../sdk/io/gcp/bigquery/BigQuerySourceBase.java  | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/e86c004d/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java
index 2de60a2..2b1eafe 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/bigquery/BigQuerySourceBase.java
@@ -29,11 +29,16 @@ import com.google.api.services.bigquery.model.JobReference;
 import com.google.api.services.bigquery.model.TableReference;
 import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
+import com.google.common.base.Function;
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 import java.io.IOException;
+import java.io.Serializable;
 import java.util.List;
 import java.util.NoSuchElementException;
+import javax.annotation.Nullable;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.io.AvroSource;
@@ -168,10 +173,12 @@ abstract class BigQuerySourceBase extends BoundedSource<TableRow> {
 
     SerializableFunction<GenericRecord, TableRow> function =
         new SerializableFunction<GenericRecord, TableRow>() {
+          private Supplier<TableSchema> schema = Suppliers.memoize(
+              Suppliers.compose(new TableSchemaFunction(), Suppliers.ofInstance(jsonSchema)));
+
           @Override
           public TableRow apply(GenericRecord input) {
-            return BigQueryAvroUtils.convertGenericRecordToTableRow(
-                input, BigQueryHelpers.fromJsonString(jsonSchema, TableSchema.class));
+            return BigQueryAvroUtils.convertGenericRecordToTableRow(input, schema.get());
           }};
 
     List<BoundedSource<TableRow>> avroSources = Lists.newArrayList();
@@ -182,6 +189,14 @@ abstract class BigQuerySourceBase extends BoundedSource<TableRow> {
     return ImmutableList.copyOf(avroSources);
   }
 
+  private static class TableSchemaFunction implements Serializable, Function<String, TableSchema> {
+    @Nullable
+    @Override
+    public TableSchema apply(@Nullable String input) {
+      return BigQueryHelpers.fromJsonString(input, TableSchema.class);
+    }
+  }
+
   protected static class BigQueryReader extends BoundedReader<TableRow> {
     private final BigQuerySourceBase source;
     private final BigQueryServices.BigQueryJsonReader reader;


[12/50] [abbrv] beam git commit: [BEAM-933] Fix and enable findbugs in Java examples

Posted by jb...@apache.org.
[BEAM-933] Fix and enable findbugs in Java examples


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/f6daad4f
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/f6daad4f
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/f6daad4f

Branch: refs/heads/DSL_SQL
Commit: f6daad4fc95cb633794c60254c6c335602f1df31
Parents: 02905c2
Author: eralmas7 <er...@yahoo.com>
Authored: Sun Jul 9 11:50:52 2017 +0530
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Jul 17 15:52:08 2017 -0700

----------------------------------------------------------------------
 examples/java/pom.xml                           | 12 ----------
 .../apache/beam/examples/complete/TfIdf.java    |  3 ++-
 .../examples/complete/TopWikipediaSessions.java | 24 ++++++++++----------
 .../beam/examples/complete/TrafficRoutes.java   | 19 ++++++++++++++++
 .../beam/examples/cookbook/TriggerExample.java  |  6 +++--
 5 files changed, 37 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/f6daad4f/examples/java/pom.xml
----------------------------------------------------------------------
diff --git a/examples/java/pom.xml b/examples/java/pom.xml
index ae64a79..12fe06f 100644
--- a/examples/java/pom.xml
+++ b/examples/java/pom.xml
@@ -365,18 +365,6 @@
   </profiles>
 
   <build>
-    <pluginManagement>
-      <plugins>
-        <!-- BEAM-933 -->
-        <plugin>
-          <groupId>org.codehaus.mojo</groupId>
-          <artifactId>findbugs-maven-plugin</artifactId>
-          <configuration>
-            <skip>true</skip>
-          </configuration>
-        </plugin>
-      </plugins>
-    </pluginManagement>
 
     <plugins>
       <plugin>

http://git-wip-us.apache.org/repos/asf/beam/blob/f6daad4f/examples/java/src/main/java/org/apache/beam/examples/complete/TfIdf.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/TfIdf.java b/examples/java/src/main/java/org/apache/beam/examples/complete/TfIdf.java
index 7552b94..435ffab 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/complete/TfIdf.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/complete/TfIdf.java
@@ -17,6 +17,7 @@
  */
 package org.apache.beam.examples.complete;
 
+import com.google.common.base.Optional;
 import java.io.File;
 import java.io.IOException;
 import java.net.URI;
@@ -121,7 +122,7 @@ public class TfIdf {
     Set<URI> uris = new HashSet<>();
     if (absoluteUri.getScheme().equals("file")) {
       File directory = new File(absoluteUri);
-      for (String entry : directory.list()) {
+      for (String entry : Optional.fromNullable(directory.list()).or(new String[] {})) {
         File path = new File(directory, entry);
         uris.add(path.toURI());
       }

http://git-wip-us.apache.org/repos/asf/beam/blob/f6daad4f/examples/java/src/main/java/org/apache/beam/examples/complete/TopWikipediaSessions.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/TopWikipediaSessions.java b/examples/java/src/main/java/org/apache/beam/examples/complete/TopWikipediaSessions.java
index 478e2dc..3691e53 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/complete/TopWikipediaSessions.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/complete/TopWikipediaSessions.java
@@ -162,17 +162,18 @@ public class TopWikipediaSessions {
     public PCollection<String> expand(PCollection<TableRow> input) {
       return input
           .apply(ParDo.of(new ExtractUserAndTimestamp()))
-
-          .apply("SampleUsers", ParDo.of(
-              new DoFn<String, String>() {
-                @ProcessElement
-                public void processElement(ProcessContext c) {
-                  if (Math.abs(c.element().hashCode()) <= Integer.MAX_VALUE * samplingThreshold) {
-                    c.output(c.element());
-                  }
-                }
-              }))
-
+          .apply(
+              "SampleUsers",
+              ParDo.of(
+                  new DoFn<String, String>() {
+                    @ProcessElement
+                    public void processElement(ProcessContext c) {
+                      if (Math.abs((long) c.element().hashCode())
+                          <= Integer.MAX_VALUE * samplingThreshold) {
+                        c.output(c.element());
+                      }
+                    }
+                  }))
           .apply(new ComputeSessions())
           .apply("SessionsToStrings", ParDo.of(new SessionsToStringsDoFn()))
           .apply(new TopPerMonth())
@@ -191,7 +192,6 @@ public class TopWikipediaSessions {
     @Default.String(EXPORTED_WIKI_TABLE)
     String getInput();
     void setInput(String value);
-
     @Description("File to output results to")
     @Validation.Required
     String getOutput();

http://git-wip-us.apache.org/repos/asf/beam/blob/f6daad4f/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java b/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java
index c9ba18c..fb16eb4 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/complete/TrafficRoutes.java
@@ -29,6 +29,8 @@ import java.util.HashMap;
 import java.util.Hashtable;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
+
 import org.apache.avro.reflect.Nullable;
 import org.apache.beam.examples.common.ExampleBigQueryTableOptions;
 import org.apache.beam.examples.common.ExampleOptions;
@@ -112,6 +114,23 @@ public class TrafficRoutes {
     public int compareTo(StationSpeed other) {
       return Long.compare(this.timestamp, other.timestamp);
     }
+
+    @Override
+    public boolean equals(Object object) {
+      if (object == null) {
+        return false;
+      }
+      if (object.getClass() != getClass()) {
+        return false;
+      }
+      StationSpeed otherStationSpeed = (StationSpeed) object;
+      return Objects.equals(this.timestamp, otherStationSpeed.timestamp);
+    }
+
+    @Override
+    public int hashCode() {
+      return this.timestamp.hashCode();
+    }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/beam/blob/f6daad4f/examples/java/src/main/java/org/apache/beam/examples/cookbook/TriggerExample.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/cookbook/TriggerExample.java b/examples/java/src/main/java/org/apache/beam/examples/cookbook/TriggerExample.java
index e7596aa..651c242 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/cookbook/TriggerExample.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/cookbook/TriggerExample.java
@@ -23,6 +23,7 @@ import com.google.api.services.bigquery.model.TableRow;
 import com.google.api.services.bigquery.model.TableSchema;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Random;
 import java.util.concurrent.TimeUnit;
 import org.apache.beam.examples.common.ExampleBigQueryTableOptions;
 import org.apache.beam.examples.common.ExampleOptions;
@@ -476,9 +477,10 @@ public class TriggerExample {
     @ProcessElement
     public void processElement(ProcessContext c) throws Exception {
       Instant timestamp = Instant.now();
-      if (Math.random() < THRESHOLD){
+      Random random = new Random();
+      if (random.nextDouble() < THRESHOLD){
         int range = MAX_DELAY - MIN_DELAY;
-        int delayInMinutes = (int) (Math.random() * range) + MIN_DELAY;
+        int delayInMinutes = random.nextInt(range) + MIN_DELAY;
         long delayInMillis = TimeUnit.MINUTES.toMillis(delayInMinutes);
         timestamp = new Instant(timestamp.getMillis() - delayInMillis);
       }


[20/50] [abbrv] beam git commit: This closes #3442: Splits large TextIOTest into TextIOReadTest and TextIOWriteTest

Posted by jb...@apache.org.
This closes #3442: Splits large TextIOTest into TextIOReadTest and TextIOWriteTest


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7c363181
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7c363181
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7c363181

Branch: refs/heads/DSL_SQL
Commit: 7c3631810a604ba58ec16c3b3aa9a346bd6d9f17
Parents: 0f06eb2 d495d15
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Jul 17 19:43:20 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Jul 17 19:43:20 2017 -0700

----------------------------------------------------------------------
 .../org/apache/beam/sdk/io/TextIOReadTest.java  |  847 +++++++++++
 .../java/org/apache/beam/sdk/io/TextIOTest.java | 1353 +-----------------
 .../org/apache/beam/sdk/io/TextIOWriteTest.java |  604 ++++++++
 3 files changed, 1460 insertions(+), 1344 deletions(-)
----------------------------------------------------------------------



[11/50] [abbrv] beam git commit: [BEAM-1502] GroupByKey should not return bare lists in DirectRunner.

Posted by jb...@apache.org.
[BEAM-1502] GroupByKey should not return bare lists in DirectRunner.

This leads to invalidated expectations on other runners.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/e7059e5c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/e7059e5c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/e7059e5c

Branch: refs/heads/DSL_SQL
Commit: e7059e5cb3cd07855582641798c58fc3cf5cd682
Parents: 532256e
Author: Robert Bradshaw <ro...@google.com>
Authored: Mon Jul 17 13:44:40 2017 -0700
Committer: Robert Bradshaw <ro...@google.com>
Committed: Mon Jul 17 15:08:02 2017 -0700

----------------------------------------------------------------------
 .../apache_beam/examples/snippets/snippets.py   |  2 +-
 sdks/python/apache_beam/transforms/core.py      |  2 +-
 sdks/python/apache_beam/transforms/trigger.py   | 21 +++++++++++++++-----
 3 files changed, 18 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/e7059e5c/sdks/python/apache_beam/examples/snippets/snippets.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/examples/snippets/snippets.py b/sdks/python/apache_beam/examples/snippets/snippets.py
index 3a5f9b1..27b8120 100644
--- a/sdks/python/apache_beam/examples/snippets/snippets.py
+++ b/sdks/python/apache_beam/examples/snippets/snippets.py
@@ -1136,7 +1136,7 @@ def model_group_by_key(contents, output_path):
     grouped_words = words_and_counts | beam.GroupByKey()
     # [END model_group_by_key_transform]
     (grouped_words
-     | 'count words' >> beam.Map(lambda (word, counts): (word, len(counts)))
+     | 'count words' >> beam.Map(lambda (word, counts): (word, sum(counts)))
      | beam.io.WriteToText(output_path))
 
 

http://git-wip-us.apache.org/repos/asf/beam/blob/e7059e5c/sdks/python/apache_beam/transforms/core.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py
index 8018219..92b8737 100644
--- a/sdks/python/apache_beam/transforms/core.py
+++ b/sdks/python/apache_beam/transforms/core.py
@@ -1017,7 +1017,7 @@ class CombineValuesDoFn(DoFn):
            self.combinefn.apply(element[1], *args, **kwargs))]
 
     # Add the elements into three accumulators (for testing of merge).
-    elements = element[1]
+    elements = list(element[1])
     accumulators = []
     for k in range(3):
       if len(elements) <= k:

http://git-wip-us.apache.org/repos/asf/beam/blob/e7059e5c/sdks/python/apache_beam/transforms/trigger.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/transforms/trigger.py b/sdks/python/apache_beam/transforms/trigger.py
index f77fa1a..c1fbfc5 100644
--- a/sdks/python/apache_beam/transforms/trigger.py
+++ b/sdks/python/apache_beam/transforms/trigger.py
@@ -24,6 +24,7 @@ from abc import ABCMeta
 from abc import abstractmethod
 import collections
 import copy
+import itertools
 
 from apache_beam.coders import observable
 from apache_beam.transforms import combiners
@@ -878,6 +879,17 @@ class _UnwindowedValues(observable.ObservableMixin):
   def __reduce__(self):
     return list, (list(self),)
 
+  def __eq__(self, other):
+    if isinstance(other, collections.Iterable):
+      return all(
+          a == b
+          for a, b in itertools.izip_longest(self, other, fillvalue=object()))
+    else:
+      return NotImplemented
+
+  def __ne__(self, other):
+    return not self == other
+
 
 class DefaultGlobalBatchTriggerDriver(TriggerDriver):
   """Breaks a bundles into window (pane)s according to the default triggering.
@@ -888,11 +900,10 @@ class DefaultGlobalBatchTriggerDriver(TriggerDriver):
     pass
 
   def process_elements(self, state, windowed_values, unused_output_watermark):
-    if isinstance(windowed_values, list):
-      unwindowed = [wv.value for wv in windowed_values]
-    else:
-      unwindowed = _UnwindowedValues(windowed_values)
-    yield WindowedValue(unwindowed, MIN_TIMESTAMP, self.GLOBAL_WINDOW_TUPLE)
+    yield WindowedValue(
+        _UnwindowedValues(windowed_values),
+        MIN_TIMESTAMP,
+        self.GLOBAL_WINDOW_TUPLE)
 
   def process_timer(self, window_id, name, time_domain, timestamp, state):
     raise TypeError('Triggers never set or called for batch default windowing.')


[09/50] [abbrv] beam git commit: Improving labeling of side inputs for Dataflow

Posted by jb...@apache.org.
Improving labeling of side inputs for Dataflow


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7257507d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7257507d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7257507d

Branch: refs/heads/DSL_SQL
Commit: 7257507d939271a91287837c20fcdde37dc1ddeb
Parents: 7e4719c
Author: Pablo <pa...@google.com>
Authored: Fri Jul 7 13:49:47 2017 -0700
Committer: Robert Bradshaw <ro...@google.com>
Committed: Mon Jul 17 14:33:01 2017 -0700

----------------------------------------------------------------------
 .../runners/dataflow/dataflow_runner.py           | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/7257507d/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
index 059e139..89c18d4 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
@@ -21,6 +21,7 @@ The runner will create a JSON description of the job graph and then submit it
 to the Dataflow Service for remote execution by a worker.
 """
 
+from collections import defaultdict
 import logging
 import threading
 import time
@@ -485,11 +486,24 @@ class DataflowRunner(PipelineRunner):
     si_dict = {}
     # We must call self._cache.get_pvalue exactly once due to refcounting.
     si_labels = {}
+    full_label_counts = defaultdict(int)
     lookup_label = lambda side_pval: si_labels[side_pval]
     for side_pval in transform_node.side_inputs:
       assert isinstance(side_pval, AsSideInput)
-      si_label = 'SideInput-' + self._get_unique_step_name()
-      si_full_label = '%s/%s' % (transform_node.full_label, si_label)
+      step_number = self._get_unique_step_name()
+      si_label = 'SideInput-' + step_number
+      pcollection_label = '%s.%s' % (
+          side_pval.pvalue.producer.full_label.split('/')[-1],
+          side_pval.pvalue.tag if side_pval.pvalue.tag else 'out')
+      si_full_label = '%s/%s(%s.%s)' % (transform_node.full_label,
+                                        side_pval.__class__.__name__,
+                                        pcollection_label,
+                                        full_label_counts[pcollection_label])
+
+      # Count the number of times the same PCollection is a side input
+      # to the same ParDo.
+      full_label_counts[pcollection_label] += 1
+
       self._add_singleton_step(
           si_label, si_full_label, side_pval.pvalue.tag,
           self._cache.get_pvalue(side_pval.pvalue))


[49/50] [abbrv] beam git commit: This closes #3591: [BEAM-1542] Introduced SpannerIO.readAll

Posted by jb...@apache.org.
This closes #3591: [BEAM-1542] Introduced SpannerIO.readAll


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/afeba371
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/afeba371
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/afeba371

Branch: refs/heads/DSL_SQL
Commit: afeba3715c806b53115f8f7994eb7bc207c68932
Parents: c8e3744 95e9c28
Author: Eugene Kirpichov <ki...@google.com>
Authored: Thu Jul 20 10:59:14 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Thu Jul 20 10:59:14 2017 -0700

----------------------------------------------------------------------
 .../sdk/io/gcp/spanner/NaiveSpannerReadFn.java  |  35 ++--
 .../beam/sdk/io/gcp/spanner/ReadOperation.java  |  96 ++++++++++
 .../beam/sdk/io/gcp/spanner/SpannerIO.java      | 187 ++++++++++++++-----
 .../sdk/io/gcp/spanner/SpannerIOReadTest.java   | 145 +++++++++-----
 4 files changed, 353 insertions(+), 110 deletions(-)
----------------------------------------------------------------------



[48/50] [abbrv] beam git commit: Introduces SpannerIO.readAll()

Posted by jb...@apache.org.
Introduces SpannerIO.readAll()


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/95e9c28c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/95e9c28c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/95e9c28c

Branch: refs/heads/DSL_SQL
Commit: 95e9c28ca4da5bac31f3d768595693e43b464c1c
Parents: c8e3744
Author: Mairbek Khadikov <ma...@google.com>
Authored: Tue Jul 18 16:23:58 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Thu Jul 20 10:58:51 2017 -0700

----------------------------------------------------------------------
 .../sdk/io/gcp/spanner/NaiveSpannerReadFn.java  |  35 ++--
 .../beam/sdk/io/gcp/spanner/ReadOperation.java  |  96 ++++++++++
 .../beam/sdk/io/gcp/spanner/SpannerIO.java      | 187 ++++++++++++++-----
 .../sdk/io/gcp/spanner/SpannerIOReadTest.java   | 145 +++++++++-----
 4 files changed, 353 insertions(+), 110 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/95e9c28c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/NaiveSpannerReadFn.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/NaiveSpannerReadFn.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/NaiveSpannerReadFn.java
index d193b95..92b3fe3 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/NaiveSpannerReadFn.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/NaiveSpannerReadFn.java
@@ -22,44 +22,53 @@ import com.google.cloud.spanner.ResultSet;
 import com.google.cloud.spanner.Struct;
 import com.google.cloud.spanner.TimestampBound;
 import com.google.common.annotations.VisibleForTesting;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.values.PCollectionView;
 
 /** A simplest read function implementation. Parallelism support is coming. */
 @VisibleForTesting
-class NaiveSpannerReadFn extends AbstractSpannerFn<Object, Struct> {
-  private final SpannerIO.Read config;
+class NaiveSpannerReadFn extends AbstractSpannerFn<ReadOperation, Struct> {
+  private final SpannerConfig config;
+  @Nullable private final PCollectionView<Transaction> transaction;
 
-  NaiveSpannerReadFn(SpannerIO.Read config) {
+  NaiveSpannerReadFn(SpannerConfig config, @Nullable PCollectionView<Transaction> transaction) {
     this.config = config;
+    this.transaction = transaction;
+  }
+
+  NaiveSpannerReadFn(SpannerConfig config) {
+    this(config, null);
   }
 
   SpannerConfig getSpannerConfig() {
-    return config.getSpannerConfig();
+    return config;
   }
 
   @ProcessElement
   public void processElement(ProcessContext c) throws Exception {
     TimestampBound timestampBound = TimestampBound.strong();
-    if (config.getTransaction() != null) {
-      Transaction transaction = c.sideInput(config.getTransaction());
+    if (transaction != null) {
+      Transaction transaction = c.sideInput(this.transaction);
       timestampBound = TimestampBound.ofReadTimestamp(transaction.timestamp());
     }
+    ReadOperation op = c.element();
     try (ReadOnlyTransaction readOnlyTransaction =
         databaseClient().readOnlyTransaction(timestampBound)) {
-      ResultSet resultSet = execute(readOnlyTransaction);
+      ResultSet resultSet = execute(op, readOnlyTransaction);
       while (resultSet.next()) {
         c.output(resultSet.getCurrentRowAsStruct());
       }
     }
   }
 
-  private ResultSet execute(ReadOnlyTransaction readOnlyTransaction) {
-    if (config.getQuery() != null) {
-      return readOnlyTransaction.executeQuery(config.getQuery());
+  private ResultSet execute(ReadOperation op, ReadOnlyTransaction readOnlyTransaction) {
+    if (op.getQuery() != null) {
+      return readOnlyTransaction.executeQuery(op.getQuery());
     }
-    if (config.getIndex() != null) {
+    if (op.getIndex() != null) {
       return readOnlyTransaction.readUsingIndex(
-          config.getTable(), config.getIndex(), config.getKeySet(), config.getColumns());
+          op.getTable(), op.getIndex(), op.getKeySet(), op.getColumns());
     }
-    return readOnlyTransaction.read(config.getTable(), config.getKeySet(), config.getColumns());
+    return readOnlyTransaction.read(op.getTable(), op.getKeySet(), op.getColumns());
   }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/95e9c28c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/ReadOperation.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/ReadOperation.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/ReadOperation.java
new file mode 100644
index 0000000..3b2bb6b
--- /dev/null
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/ReadOperation.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io.gcp.spanner;
+
+import com.google.auto.value.AutoValue;
+import com.google.cloud.spanner.KeySet;
+import com.google.cloud.spanner.Statement;
+import java.io.Serializable;
+import java.util.Arrays;
+import java.util.List;
+import javax.annotation.Nullable;
+
+/** Encapsulates a spanner read operation. */
+@AutoValue
+public abstract class ReadOperation implements Serializable {
+
+  public static ReadOperation create() {
+    return new AutoValue_ReadOperation.Builder().setKeySet(KeySet.all()).build();
+  }
+
+  @Nullable
+  public abstract Statement getQuery();
+
+  @Nullable
+  public abstract String getTable();
+
+  @Nullable
+  public abstract String getIndex();
+
+  @Nullable
+  public abstract List<String> getColumns();
+
+  @Nullable
+  public abstract KeySet getKeySet();
+
+  @AutoValue.Builder
+  abstract static class Builder {
+
+    abstract Builder setQuery(Statement statement);
+
+    abstract Builder setTable(String table);
+
+    abstract Builder setIndex(String index);
+
+    abstract Builder setColumns(List<String> columns);
+
+    abstract Builder setKeySet(KeySet keySet);
+
+    abstract ReadOperation build();
+  }
+
+  abstract Builder toBuilder();
+
+  public ReadOperation withTable(String table) {
+    return toBuilder().setTable(table).build();
+  }
+
+  public ReadOperation withColumns(String... columns) {
+    return withColumns(Arrays.asList(columns));
+  }
+
+  public ReadOperation withColumns(List<String> columns) {
+    return toBuilder().setColumns(columns).build();
+  }
+
+  public ReadOperation withQuery(Statement statement) {
+    return toBuilder().setQuery(statement).build();
+  }
+
+  public ReadOperation withQuery(String sql) {
+    return withQuery(Statement.of(sql));
+  }
+
+  public ReadOperation withKeySet(KeySet keySet) {
+    return toBuilder().setKeySet(keySet).build();
+  }
+
+  public ReadOperation withIndex(String index) {
+    return toBuilder().setIndex(index).build();
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/95e9c28c/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java
index a247d4c..e5c9c05 100644
--- a/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java
+++ b/sdks/java/io/google-cloud-platform/src/main/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIO.java
@@ -31,12 +31,11 @@ import com.google.cloud.spanner.Statement;
 import com.google.cloud.spanner.Struct;
 import com.google.cloud.spanner.TimestampBound;
 import com.google.common.annotations.VisibleForTesting;
-
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
+import java.util.UUID;
 import javax.annotation.Nullable;
-
 import org.apache.beam.sdk.annotations.Experimental;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.ValueProvider;
@@ -44,7 +43,11 @@ import org.apache.beam.sdk.transforms.Create;
 import org.apache.beam.sdk.transforms.DoFn;
 import org.apache.beam.sdk.transforms.PTransform;
 import org.apache.beam.sdk.transforms.ParDo;
+import org.apache.beam.sdk.transforms.Reshuffle;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.Values;
 import org.apache.beam.sdk.transforms.View;
+import org.apache.beam.sdk.transforms.WithKeys;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.values.PBegin;
 import org.apache.beam.sdk.values.PCollection;
@@ -172,7 +175,18 @@ public class SpannerIO {
     return new AutoValue_SpannerIO_Read.Builder()
         .setSpannerConfig(SpannerConfig.create())
         .setTimestampBound(TimestampBound.strong())
-        .setKeySet(KeySet.all())
+        .setReadOperation(ReadOperation.create())
+        .build();
+  }
+
+  /**
+   * A {@link PTransform} that works like {@link #read}, but executes read operations coming from a
+   * {@link PCollection}.
+   */
+  @Experimental(Experimental.Kind.SOURCE_SINK)
+  public static ReadAll readAll() {
+    return new AutoValue_SpannerIO_ReadAll.Builder()
+        .setSpannerConfig(SpannerConfig.create())
         .build();
   }
 
@@ -202,34 +216,113 @@ public class SpannerIO {
         .build();
   }
 
-  /**
-   * A {@link PTransform} that reads data from Google Cloud Spanner.
-   *
-   * @see SpannerIO
-   */
+  /** Implementation of {@link #readAll}. */
   @Experimental(Experimental.Kind.SOURCE_SINK)
   @AutoValue
-  public abstract static class Read extends PTransform<PBegin, PCollection<Struct>> {
+  public abstract static class ReadAll
+      extends PTransform<PCollection<ReadOperation>, PCollection<Struct>> {
 
     abstract SpannerConfig getSpannerConfig();
 
     @Nullable
-    abstract TimestampBound getTimestampBound();
+    abstract PCollectionView<Transaction> getTransaction();
 
-    @Nullable
-    abstract Statement getQuery();
+    abstract Builder toBuilder();
 
-    @Nullable
-    abstract String getTable();
+    @AutoValue.Builder
+    abstract static class Builder {
+      abstract Builder setSpannerConfig(SpannerConfig spannerConfig);
 
-    @Nullable
-    abstract String getIndex();
+      abstract Builder setTransaction(PCollectionView<Transaction> transaction);
 
-    @Nullable
-    abstract List<String> getColumns();
+      abstract ReadAll build();
+    }
+
+    /** Specifies the Cloud Spanner configuration. */
+    public ReadAll withSpannerConfig(SpannerConfig spannerConfig) {
+      return toBuilder().setSpannerConfig(spannerConfig).build();
+    }
+
+    /** Specifies the Cloud Spanner project. */
+    public ReadAll withProjectId(String projectId) {
+      return withProjectId(ValueProvider.StaticValueProvider.of(projectId));
+    }
+
+    /** Specifies the Cloud Spanner project. */
+    public ReadAll withProjectId(ValueProvider<String> projectId) {
+      SpannerConfig config = getSpannerConfig();
+      return withSpannerConfig(config.withProjectId(projectId));
+    }
+
+    /** Specifies the Cloud Spanner instance. */
+    public ReadAll withInstanceId(String instanceId) {
+      return withInstanceId(ValueProvider.StaticValueProvider.of(instanceId));
+    }
+
+    /** Specifies the Cloud Spanner instance. */
+    public ReadAll withInstanceId(ValueProvider<String> instanceId) {
+      SpannerConfig config = getSpannerConfig();
+      return withSpannerConfig(config.withInstanceId(instanceId));
+    }
+
+    /** Specifies the Cloud Spanner database. */
+    public ReadAll withDatabaseId(String databaseId) {
+      return withDatabaseId(ValueProvider.StaticValueProvider.of(databaseId));
+    }
+
+    /** Specifies the Cloud Spanner database. */
+    public ReadAll withDatabaseId(ValueProvider<String> databaseId) {
+      SpannerConfig config = getSpannerConfig();
+      return withSpannerConfig(config.withDatabaseId(databaseId));
+    }
+
+    @VisibleForTesting
+    ReadAll withServiceFactory(ServiceFactory<Spanner, SpannerOptions> serviceFactory) {
+      SpannerConfig config = getSpannerConfig();
+      return withSpannerConfig(config.withServiceFactory(serviceFactory));
+    }
+
+    public ReadAll withTransaction(PCollectionView<Transaction> transaction) {
+      return toBuilder().setTransaction(transaction).build();
+    }
+
+    @Override
+    public PCollection<Struct> expand(PCollection<ReadOperation> input) {
+      PCollection<ReadOperation> reshuffled =
+          input
+              .apply(
+                  "Pair with random key",
+                  WithKeys.of(
+                      new SerializableFunction<ReadOperation, String>() {
+                        @Override
+                        public String apply(ReadOperation ignored) {
+                          return UUID.randomUUID().toString();
+                        }
+                      }))
+              .apply("Reshuffle", Reshuffle.<String, ReadOperation>of())
+              .apply("Strip keys", Values.<ReadOperation>create());
+      List<PCollectionView<Transaction>> sideInputs =
+          getTransaction() == null
+              ? Collections.<PCollectionView<Transaction>>emptyList()
+              : Collections.singletonList(getTransaction());
+      return reshuffled.apply(
+          "Execute queries",
+          ParDo.of(new NaiveSpannerReadFn(getSpannerConfig(), getTransaction()))
+              .withSideInputs(sideInputs));
+    }
+  }
+
+  /** Implementation of {@link #read}. */
+  @Experimental(Experimental.Kind.SOURCE_SINK)
+  @AutoValue
+  public abstract static class Read extends PTransform<PBegin, PCollection<Struct>> {
+
+    abstract SpannerConfig getSpannerConfig();
+
+    abstract ReadOperation getReadOperation();
 
     @Nullable
-    abstract KeySet getKeySet();
+    abstract TimestampBound getTimestampBound();
 
     @Nullable
     abstract PCollectionView<Transaction> getTransaction();
@@ -241,17 +334,9 @@ public class SpannerIO {
 
       abstract Builder setSpannerConfig(SpannerConfig spannerConfig);
 
-      abstract Builder setTimestampBound(TimestampBound timestampBound);
-
-      abstract Builder setQuery(Statement statement);
-
-      abstract Builder setTable(String table);
-
-      abstract Builder setIndex(String index);
+      abstract Builder setReadOperation(ReadOperation readOperation);
 
-      abstract Builder setColumns(List<String> columns);
-
-      abstract Builder setKeySet(KeySet keySet);
+      abstract Builder setTimestampBound(TimestampBound timestampBound);
 
       abstract Builder setTransaction(PCollectionView<Transaction> transaction);
 
@@ -315,7 +400,11 @@ public class SpannerIO {
     }
 
     public Read withTable(String table) {
-      return toBuilder().setTable(table).build();
+      return withReadOperation(getReadOperation().withTable(table));
+    }
+
+    public Read withReadOperation(ReadOperation operation) {
+      return toBuilder().setReadOperation(operation).build();
     }
 
     public Read withColumns(String... columns) {
@@ -323,11 +412,11 @@ public class SpannerIO {
     }
 
     public Read withColumns(List<String> columns) {
-      return toBuilder().setColumns(columns).build();
+      return withReadOperation(getReadOperation().withColumns(columns));
     }
 
     public Read withQuery(Statement statement) {
-      return toBuilder().setQuery(statement).build();
+      return withReadOperation(getReadOperation().withQuery(statement));
     }
 
     public Read withQuery(String sql) {
@@ -335,14 +424,13 @@ public class SpannerIO {
     }
 
     public Read withKeySet(KeySet keySet) {
-      return toBuilder().setKeySet(keySet).build();
+      return withReadOperation(getReadOperation().withKeySet(keySet));
     }
 
     public Read withIndex(String index) {
-      return toBuilder().setIndex(index).build();
+      return withReadOperation(getReadOperation().withIndex(index));
     }
 
-
     @Override
     public void validate(PipelineOptions options) {
       getSpannerConfig().validate(options);
@@ -351,16 +439,16 @@ public class SpannerIO {
           "SpannerIO.read() runs in a read only transaction and requires timestamp to be set "
               + "with withTimestampBound or withTimestamp method");
 
-      if (getQuery() != null) {
+      if (getReadOperation().getQuery() != null) {
         // TODO: validate query?
-      } else if (getTable() != null) {
+      } else if (getReadOperation().getTable() != null) {
         // Assume read
         checkNotNull(
-            getColumns(),
+            getReadOperation().getColumns(),
             "For a read operation SpannerIO.read() requires a list of "
                 + "columns to set with withColumns method");
         checkArgument(
-            !getColumns().isEmpty(),
+            !getReadOperation().getColumns().isEmpty(),
             "For a read operation SpannerIO.read() requires a"
                 + " list of columns to set with withColumns method");
       } else {
@@ -371,18 +459,17 @@ public class SpannerIO {
 
     @Override
     public PCollection<Struct> expand(PBegin input) {
-      Read config = this;
-      List<PCollectionView<Transaction>> sideInputs = Collections.emptyList();
-      if (getTimestampBound() != null) {
-        PCollectionView<Transaction> transaction =
-            input.apply(createTransaction().withSpannerConfig(getSpannerConfig()));
-        config = config.withTransaction(transaction);
-        sideInputs = Collections.singletonList(transaction);
+      PCollectionView<Transaction> transaction = getTransaction();
+      if (transaction == null && getTimestampBound() != null) {
+        transaction =
+            input.apply(
+                createTransaction()
+                    .withTimestampBound(getTimestampBound())
+                    .withSpannerConfig(getSpannerConfig()));
       }
-      return input
-          .apply(Create.of(1))
-          .apply(
-              "Execute query", ParDo.of(new NaiveSpannerReadFn(config)).withSideInputs(sideInputs));
+      ReadAll readAll =
+          readAll().withSpannerConfig(getSpannerConfig()).withTransaction(transaction);
+      return input.apply(Create.of(getReadOperation())).apply("Execute query", readAll);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/beam/blob/95e9c28c/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOReadTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOReadTest.java b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOReadTest.java
index 5ba2da0..6eb1a33 100644
--- a/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOReadTest.java
+++ b/sdks/java/io/google-cloud-platform/src/test/java/org/apache/beam/sdk/io/gcp/spanner/SpannerIOReadTest.java
@@ -39,6 +39,7 @@ import java.util.List;
 import org.apache.beam.sdk.testing.NeedsRunner;
 import org.apache.beam.sdk.testing.PAssert;
 import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
 import org.apache.beam.sdk.transforms.DoFnTester;
 import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.PCollectionView;
@@ -55,6 +56,7 @@ import org.mockito.Mockito;
 /** Unit tests for {@link SpannerIO}. */
 @RunWith(JUnit4.class)
 public class SpannerIOReadTest implements Serializable {
+
   @Rule
   public final transient TestPipeline pipeline = TestPipeline.create();
   @Rule
@@ -63,12 +65,16 @@ public class SpannerIOReadTest implements Serializable {
   private FakeServiceFactory serviceFactory;
   private ReadOnlyTransaction mockTx;
 
-  private Type fakeType = Type.struct(Type.StructField.of("id", Type.int64()),
-      Type.StructField.of("name", Type.string()));
+  private static final Type FAKE_TYPE =
+      Type.struct(
+          Type.StructField.of("id", Type.int64()), Type.StructField.of("name", Type.string()));
 
-  private List<Struct> fakeRows = Arrays.asList(
-      Struct.newBuilder().add("id", Value.int64(1)).add("name", Value.string("Alice")).build(),
-      Struct.newBuilder().add("id", Value.int64(2)).add("name", Value.string("Bob")).build());
+  private static final List<Struct> FAKE_ROWS =
+      Arrays.asList(
+          Struct.newBuilder().add("id", Value.int64(1)).add("name", Value.string("Alice")).build(),
+          Struct.newBuilder().add("id", Value.int64(2)).add("name", Value.string("Bob")).build(),
+          Struct.newBuilder().add("id", Value.int64(3)).add("name", Value.string("Carl")).build(),
+          Struct.newBuilder().add("id", Value.int64(4)).add("name", Value.string("Dan")).build());
 
   @Before
   @SuppressWarnings("unchecked")
@@ -153,20 +159,19 @@ public class SpannerIOReadTest implements Serializable {
             .withProjectId("test")
             .withInstanceId("123")
             .withDatabaseId("aaa")
-            .withTimestamp(Timestamp.now())
             .withQuery("SELECT * FROM users")
             .withServiceFactory(serviceFactory);
 
-    NaiveSpannerReadFn readFn = new NaiveSpannerReadFn(read);
-    DoFnTester<Object, Struct> fnTester = DoFnTester.of(readFn);
+    NaiveSpannerReadFn readFn = new NaiveSpannerReadFn(read.getSpannerConfig());
+    DoFnTester<ReadOperation, Struct> fnTester = DoFnTester.of(readFn);
 
     when(serviceFactory.mockDatabaseClient().readOnlyTransaction(any(TimestampBound.class)))
         .thenReturn(mockTx);
     when(mockTx.executeQuery(any(Statement.class)))
-        .thenReturn(ResultSets.forRows(fakeType, fakeRows));
+        .thenReturn(ResultSets.forRows(FAKE_TYPE, FAKE_ROWS));
 
-    List<Struct> result = fnTester.processBundle(1);
-    assertThat(result, Matchers.<Struct>iterableWithSize(2));
+    List<Struct> result = fnTester.processBundle(read.getReadOperation());
+    assertThat(result, Matchers.containsInAnyOrder(FAKE_ROWS.toArray()));
 
     verify(serviceFactory.mockDatabaseClient()).readOnlyTransaction(TimestampBound
         .strong());
@@ -180,21 +185,20 @@ public class SpannerIOReadTest implements Serializable {
             .withProjectId("test")
             .withInstanceId("123")
             .withDatabaseId("aaa")
-            .withTimestamp(Timestamp.now())
             .withTable("users")
             .withColumns("id", "name")
             .withServiceFactory(serviceFactory);
 
-    NaiveSpannerReadFn readFn = new NaiveSpannerReadFn(read);
-    DoFnTester<Object, Struct> fnTester = DoFnTester.of(readFn);
+    NaiveSpannerReadFn readFn = new NaiveSpannerReadFn(read.getSpannerConfig());
+    DoFnTester<ReadOperation, Struct> fnTester = DoFnTester.of(readFn);
 
     when(serviceFactory.mockDatabaseClient().readOnlyTransaction(any(TimestampBound.class)))
         .thenReturn(mockTx);
     when(mockTx.read("users", KeySet.all(), Arrays.asList("id", "name")))
-        .thenReturn(ResultSets.forRows(fakeType, fakeRows));
+        .thenReturn(ResultSets.forRows(FAKE_TYPE, FAKE_ROWS));
 
-    List<Struct> result = fnTester.processBundle(1);
-    assertThat(result, Matchers.<Struct>iterableWithSize(2));
+    List<Struct> result = fnTester.processBundle(read.getReadOperation());
+    assertThat(result, Matchers.containsInAnyOrder(FAKE_ROWS.toArray()));
 
     verify(serviceFactory.mockDatabaseClient()).readOnlyTransaction(TimestampBound.strong());
     verify(mockTx).read("users", KeySet.all(), Arrays.asList("id", "name"));
@@ -213,16 +217,16 @@ public class SpannerIOReadTest implements Serializable {
             .withIndex("theindex")
             .withServiceFactory(serviceFactory);
 
-    NaiveSpannerReadFn readFn = new NaiveSpannerReadFn(read);
-    DoFnTester<Object, Struct> fnTester = DoFnTester.of(readFn);
+    NaiveSpannerReadFn readFn = new NaiveSpannerReadFn(read.getSpannerConfig());
+    DoFnTester<ReadOperation, Struct> fnTester = DoFnTester.of(readFn);
 
     when(serviceFactory.mockDatabaseClient().readOnlyTransaction(any(TimestampBound.class)))
         .thenReturn(mockTx);
     when(mockTx.readUsingIndex("users", "theindex", KeySet.all(), Arrays.asList("id", "name")))
-        .thenReturn(ResultSets.forRows(fakeType, fakeRows));
+        .thenReturn(ResultSets.forRows(FAKE_TYPE, FAKE_ROWS));
 
-    List<Struct> result = fnTester.processBundle(1);
-    assertThat(result, Matchers.<Struct>iterableWithSize(2));
+    List<Struct> result = fnTester.processBundle(read.getReadOperation());
+    assertThat(result, Matchers.containsInAnyOrder(FAKE_ROWS.toArray()));
 
     verify(serviceFactory.mockDatabaseClient()).readOnlyTransaction(TimestampBound.strong());
     verify(mockTx).readUsingIndex("users", "theindex", KeySet.all(), Arrays.asList("id", "name"));
@@ -233,30 +237,32 @@ public class SpannerIOReadTest implements Serializable {
   public void readPipeline() throws Exception {
     Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
 
-    PCollectionView<Transaction> tx = pipeline
-        .apply("tx", SpannerIO.createTransaction()
+    SpannerConfig spannerConfig =
+        SpannerConfig.create()
             .withProjectId("test")
             .withInstanceId("123")
             .withDatabaseId("aaa")
-            .withServiceFactory(serviceFactory));
-
-    PCollection<Struct> one = pipeline.apply("read q", SpannerIO.read()
-        .withProjectId("test")
-        .withInstanceId("123")
-        .withDatabaseId("aaa")
-        .withTimestamp(Timestamp.now())
-        .withQuery("SELECT * FROM users")
-        .withServiceFactory(serviceFactory)
-        .withTransaction(tx));
-    PCollection<Struct> two = pipeline.apply("read r", SpannerIO.read()
-        .withProjectId("test")
-        .withInstanceId("123")
-        .withDatabaseId("aaa")
-        .withTimestamp(Timestamp.now())
-        .withTable("users")
-        .withColumns("id", "name")
-        .withServiceFactory(serviceFactory)
-        .withTransaction(tx));
+            .withServiceFactory(serviceFactory);
+
+    PCollectionView<Transaction> tx =
+        pipeline.apply("tx", SpannerIO.createTransaction().withSpannerConfig(spannerConfig));
+
+    PCollection<Struct> one =
+        pipeline.apply(
+            "read q",
+            SpannerIO.read()
+                .withSpannerConfig(spannerConfig)
+                .withQuery("SELECT * FROM users")
+                .withTransaction(tx));
+    PCollection<Struct> two =
+        pipeline.apply(
+            "read r",
+            SpannerIO.read()
+                .withSpannerConfig(spannerConfig)
+                .withTimestamp(Timestamp.now())
+                .withTable("users")
+                .withColumns("id", "name")
+                .withTransaction(tx));
 
     when(serviceFactory.mockDatabaseClient().readOnlyTransaction(any(TimestampBound.class)))
         .thenReturn(mockTx);
@@ -265,13 +271,58 @@ public class SpannerIOReadTest implements Serializable {
         Collections.<Struct>emptyList()));
 
     when(mockTx.executeQuery(Statement.of("SELECT * FROM users")))
-        .thenReturn(ResultSets.forRows(fakeType, fakeRows));
+        .thenReturn(ResultSets.forRows(FAKE_TYPE, FAKE_ROWS));
+    when(mockTx.read("users", KeySet.all(), Arrays.asList("id", "name")))
+        .thenReturn(ResultSets.forRows(FAKE_TYPE, FAKE_ROWS));
+    when(mockTx.getReadTimestamp()).thenReturn(timestamp);
+
+    PAssert.that(one).containsInAnyOrder(FAKE_ROWS);
+    PAssert.that(two).containsInAnyOrder(FAKE_ROWS);
+
+    pipeline.run();
+
+    verify(serviceFactory.mockDatabaseClient(), times(2))
+        .readOnlyTransaction(TimestampBound.ofReadTimestamp(timestamp));
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void readAllPipeline() throws Exception {
+    Timestamp timestamp = Timestamp.ofTimeMicroseconds(12345);
+
+    SpannerConfig spannerConfig =
+        SpannerConfig.create()
+            .withProjectId("test")
+            .withInstanceId("123")
+            .withDatabaseId("aaa")
+            .withServiceFactory(serviceFactory);
+
+    PCollectionView<Transaction> tx =
+        pipeline.apply("tx", SpannerIO.createTransaction().withSpannerConfig(spannerConfig));
+
+    PCollection<ReadOperation> reads =
+        pipeline.apply(
+            Create.of(
+                ReadOperation.create().withQuery("SELECT * FROM users"),
+                ReadOperation.create().withTable("users").withColumns("id", "name")));
+
+    PCollection<Struct> one =
+        reads.apply(
+            "read all", SpannerIO.readAll().withSpannerConfig(spannerConfig).withTransaction(tx));
+
+    when(serviceFactory.mockDatabaseClient().readOnlyTransaction(any(TimestampBound.class)))
+        .thenReturn(mockTx);
+
+    when(mockTx.executeQuery(Statement.of("SELECT 1")))
+        .thenReturn(ResultSets.forRows(Type.struct(), Collections.<Struct>emptyList()));
+
+    when(mockTx.executeQuery(Statement.of("SELECT * FROM users")))
+        .thenReturn(ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(0, 2)));
     when(mockTx.read("users", KeySet.all(), Arrays.asList("id", "name")))
-        .thenReturn(ResultSets.forRows(fakeType, fakeRows));
+        .thenReturn(ResultSets.forRows(FAKE_TYPE, FAKE_ROWS.subList(2, 4)));
     when(mockTx.getReadTimestamp()).thenReturn(timestamp);
 
-    PAssert.that(one).containsInAnyOrder(fakeRows);
-    PAssert.that(two).containsInAnyOrder(fakeRows);
+    PAssert.that(one).containsInAnyOrder(FAKE_ROWS);
 
     pipeline.run();
 


[32/50] [abbrv] beam git commit: Add GroupByKey tests for Multiple & Merging windows

Posted by jb...@apache.org.
Add GroupByKey tests for Multiple & Merging windows

This gives explicit coverage to a GroupByKey where the elements are in
multiple windows, or in merging windows.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/1e947045
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/1e947045
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/1e947045

Branch: refs/heads/DSL_SQL
Commit: 1e947045a54bd59b449fd56f8f5f50879b6d9c4c
Parents: be5b934
Author: Thomas Groh <tg...@google.com>
Authored: Mon Jul 17 13:38:11 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Tue Jul 18 17:52:55 2017 -0700

----------------------------------------------------------------------
 .../beam/sdk/transforms/GroupByKeyTest.java     | 156 +++++++++++++++----
 1 file changed, 122 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/1e947045/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/GroupByKeyTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/GroupByKeyTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/GroupByKeyTest.java
index 4b5d5f5..8fcb4c0 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/GroupByKeyTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/GroupByKeyTest.java
@@ -23,18 +23,20 @@ import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.CoreMatchers.hasItem;
 import static org.hamcrest.Matchers.empty;
 import static org.hamcrest.collection.IsIterableContainingInAnyOrder.containsInAnyOrder;
-import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertThat;
 
 import com.google.common.base.Function;
+import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 import java.io.DataInputStream;
 import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
+import java.io.Serializable;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ThreadLocalRandom;
@@ -56,9 +58,12 @@ import org.apache.beam.sdk.testing.ValidatesRunner;
 import org.apache.beam.sdk.transforms.display.DisplayData;
 import org.apache.beam.sdk.transforms.windowing.AfterProcessingTime;
 import org.apache.beam.sdk.transforms.windowing.FixedWindows;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.transforms.windowing.IntervalWindow;
 import org.apache.beam.sdk.transforms.windowing.InvalidWindows;
 import org.apache.beam.sdk.transforms.windowing.Repeatedly;
 import org.apache.beam.sdk.transforms.windowing.Sessions;
+import org.apache.beam.sdk.transforms.windowing.SlidingWindows;
 import org.apache.beam.sdk.transforms.windowing.TimestampCombiner;
 import org.apache.beam.sdk.transforms.windowing.Window;
 import org.apache.beam.sdk.values.KV;
@@ -67,6 +72,7 @@ import org.apache.beam.sdk.values.PCollection;
 import org.apache.beam.sdk.values.TimestampedValue;
 import org.apache.beam.sdk.values.TypeDescriptor;
 import org.apache.beam.sdk.values.WindowingStrategy;
+import org.hamcrest.Matcher;
 import org.joda.time.Duration;
 import org.joda.time.Instant;
 import org.junit.Assert;
@@ -82,13 +88,13 @@ import org.junit.runners.JUnit4;
  */
 @RunWith(JUnit4.class)
 @SuppressWarnings({"rawtypes", "unchecked"})
-public class GroupByKeyTest {
+public class GroupByKeyTest implements Serializable {
 
   @Rule
-  public final TestPipeline p = TestPipeline.create();
+  public transient TestPipeline p = TestPipeline.create();
 
   @Rule
-  public ExpectedException thrown = ExpectedException.none();
+  public transient ExpectedException thrown = ExpectedException.none();
 
   @Test
   @Category(ValidatesRunner.class)
@@ -109,27 +115,18 @@ public class GroupByKeyTest {
     PCollection<KV<String, Iterable<Integer>>> output =
         input.apply(GroupByKey.<String, Integer>create());
 
-    PAssert.that(output)
-        .satisfies(new AssertThatHasExpectedContentsForTestGroupByKey());
+    SerializableFunction<Iterable<KV<String, Iterable<Integer>>>, Void> checker =
+        containsKvs(
+            kv("k1", 3, 4),
+            kv("k5", Integer.MIN_VALUE, Integer.MAX_VALUE),
+            kv("k2", 66, -33),
+            kv("k3", 0));
+    PAssert.that(output).satisfies(checker);
+    PAssert.that(output).inWindow(GlobalWindow.INSTANCE).satisfies(checker);
 
     p.run();
   }
 
-  static class AssertThatHasExpectedContentsForTestGroupByKey
-      implements SerializableFunction<Iterable<KV<String, Iterable<Integer>>>,
-                                      Void> {
-    @Override
-    public Void apply(Iterable<KV<String, Iterable<Integer>>> actual) {
-      assertThat(actual, containsInAnyOrder(
-          isKv(is("k1"), containsInAnyOrder(3, 4)),
-          isKv(is("k5"), containsInAnyOrder(Integer.MAX_VALUE,
-                                            Integer.MIN_VALUE)),
-          isKv(is("k2"), containsInAnyOrder(66, -33)),
-          isKv(is("k3"), containsInAnyOrder(0))));
-      return null;
-    }
-  }
-
   @Test
   @Category(ValidatesRunner.class)
   public void testGroupByKeyAndWindows() {
@@ -150,24 +147,115 @@ public class GroupByKeyTest {
              .apply(GroupByKey.<String, Integer>create());
 
     PAssert.that(output)
-        .satisfies(new AssertThatHasExpectedContentsForTestGroupByKeyAndWindows());
+        .satisfies(
+            containsKvs(
+                kv("k1", 3),
+                kv("k1", 4),
+                kv("k5", Integer.MAX_VALUE, Integer.MIN_VALUE),
+                kv("k2", 66),
+                kv("k2", -33),
+                kv("k3", 0)));
+    PAssert.that(output)
+        .inWindow(new IntervalWindow(new Instant(0L), Duration.millis(5L)))
+        .satisfies(
+            containsKvs(kv("k1", 3), kv("k5", Integer.MIN_VALUE, Integer.MAX_VALUE), kv("k2", 66)));
+    PAssert.that(output)
+        .inWindow(new IntervalWindow(new Instant(5L), Duration.millis(5L)))
+        .satisfies(containsKvs(kv("k1", 4), kv("k2", -33), kv("k3", 0)));
+
+    p.run();
+  }
+
+  @Test
+  @Category(ValidatesRunner.class)
+  public void testGroupByKeyMultipleWindows() {
+    PCollection<KV<String, Integer>> windowedInput =
+        p.apply(
+                Create.timestamped(
+                    TimestampedValue.of(KV.of("foo", 1), new Instant(1)),
+                    TimestampedValue.of(KV.of("foo", 4), new Instant(4)),
+                    TimestampedValue.of(KV.of("bar", 3), new Instant(3))))
+            .apply(
+                Window.<KV<String, Integer>>into(
+                    SlidingWindows.of(Duration.millis(5L)).every(Duration.millis(3L))));
+
+    PCollection<KV<String, Iterable<Integer>>> output =
+        windowedInput.apply(GroupByKey.<String, Integer>create());
+
+    PAssert.that(output)
+        .satisfies(
+            containsKvs(kv("foo", 1, 4), kv("foo", 1), kv("foo", 4), kv("bar", 3), kv("bar", 3)));
+    PAssert.that(output)
+        .inWindow(new IntervalWindow(new Instant(-3L), Duration.millis(5L)))
+        .satisfies(containsKvs(kv("foo", 1)));
+    PAssert.that(output)
+        .inWindow(new IntervalWindow(new Instant(0L), Duration.millis(5L)))
+        .satisfies(containsKvs(kv("foo", 1, 4), kv("bar", 3)));
+    PAssert.that(output)
+        .inWindow(new IntervalWindow(new Instant(3L), Duration.millis(5L)))
+        .satisfies(containsKvs(kv("foo", 4), kv("bar", 3)));
+
+    p.run();
+  }
+
+  @Test
+  @Category(ValidatesRunner.class)
+  public void testGroupByKeyMergingWindows() {
+    PCollection<KV<String, Integer>> windowedInput =
+        p.apply(
+                Create.timestamped(
+                    TimestampedValue.of(KV.of("foo", 1), new Instant(1)),
+                    TimestampedValue.of(KV.of("foo", 4), new Instant(4)),
+                    TimestampedValue.of(KV.of("bar", 3), new Instant(3)),
+                    TimestampedValue.of(KV.of("foo", 9), new Instant(9))))
+            .apply(Window.<KV<String, Integer>>into(Sessions.withGapDuration(Duration.millis(4L))));
+
+    PCollection<KV<String, Iterable<Integer>>> output =
+        windowedInput.apply(GroupByKey.<String, Integer>create());
+
+    PAssert.that(output).satisfies(containsKvs(kv("foo", 1, 4), kv("foo", 9), kv("bar", 3)));
+    PAssert.that(output)
+        .inWindow(new IntervalWindow(new Instant(1L), new Instant(8L)))
+        .satisfies(containsKvs(kv("foo", 1, 4)));
+    PAssert.that(output)
+        .inWindow(new IntervalWindow(new Instant(3L), new Instant(7L)))
+        .satisfies(containsKvs(kv("bar", 3)));
+    PAssert.that(output)
+        .inWindow(new IntervalWindow(new Instant(9L), new Instant(13L)))
+        .satisfies(containsKvs(kv("foo", 9)));
 
     p.run();
   }
 
-  static class AssertThatHasExpectedContentsForTestGroupByKeyAndWindows
-      implements SerializableFunction<Iterable<KV<String, Iterable<Integer>>>,
-                                      Void> {
+  private static KV<String, Collection<Integer>> kv(String key, Integer... values) {
+    return KV.<String, Collection<Integer>>of(key, ImmutableList.copyOf(values));
+  }
+
+  private static SerializableFunction<Iterable<KV<String, Iterable<Integer>>>, Void> containsKvs(
+      KV<String, Collection<Integer>>... kvs) {
+    return new ContainsKVs(ImmutableList.copyOf(kvs));
+  }
+
+  /**
+   * A function that asserts that the input element contains the expected {@link KV KVs} in any
+   * order, where values appear in any order.
+   */
+  private static class ContainsKVs
+      implements SerializableFunction<Iterable<KV<String, Iterable<Integer>>>, Void> {
+    private final List<KV<String, Collection<Integer>>> expectedKvs;
+
+    private ContainsKVs(List<KV<String, Collection<Integer>>> expectedKvs) {
+      this.expectedKvs = expectedKvs;
+    }
+
     @Override
-      public Void apply(Iterable<KV<String, Iterable<Integer>>> actual) {
-      assertThat(actual, containsInAnyOrder(
-          isKv(is("k1"), containsInAnyOrder(3)),
-          isKv(is("k1"), containsInAnyOrder(4)),
-          isKv(is("k5"), containsInAnyOrder(Integer.MAX_VALUE,
-                                            Integer.MIN_VALUE)),
-          isKv(is("k2"), containsInAnyOrder(66)),
-          isKv(is("k2"), containsInAnyOrder(-33)),
-          isKv(is("k3"), containsInAnyOrder(0))));
+    public Void apply(Iterable<KV<String, Iterable<Integer>>> input) {
+      List<Matcher<? super KV<String, Iterable<Integer>>>> matchers = new ArrayList<>();
+      for (KV<String, Collection<Integer>> expected : expectedKvs) {
+        Integer[] values = expected.getValue().toArray(new Integer[0]);
+        matchers.add(isKv(equalTo(expected.getKey()), containsInAnyOrder(values)));
+      }
+      assertThat(input, containsInAnyOrder(matchers.toArray(new Matcher[0])));
       return null;
     }
   }


[46/50] [abbrv] beam git commit: Dynamic sizing of Datastore write RPCs.

Posted by jb...@apache.org.
Dynamic sizing of Datastore write RPCs.

This implements the same behaviour recently added to Java SDK:
- start at 200 entities per RPC;
- size subsequent requests based on observed latency of previous requests.
Includes a MovingSum class to track recent latency.
Report RPC success & failure counts as metrics (again, as in the Java SDK).


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/0a5157e7
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/0a5157e7
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/0a5157e7

Branch: refs/heads/DSL_SQL
Commit: 0a5157e7e75eb1e1dbfeff4888857154150cc6b6
Parents: 2e51bde
Author: Colin Phipps <fi...@google.com>
Authored: Fri Jul 14 16:10:23 2017 +0000
Committer: chamikara@google.com <ch...@google.com>
Committed: Thu Jul 20 10:18:38 2017 -0700

----------------------------------------------------------------------
 .../io/gcp/datastore/v1/datastoreio.py          | 84 ++++++++++++++---
 .../io/gcp/datastore/v1/datastoreio_test.py     | 53 +++++++++--
 .../apache_beam/io/gcp/datastore/v1/helper.py   | 35 ++++++--
 .../apache_beam/io/gcp/datastore/v1/util.py     | 95 ++++++++++++++++++++
 .../io/gcp/datastore/v1/util_test.py            | 67 ++++++++++++++
 5 files changed, 310 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/0a5157e7/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py b/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py
index 89c2a93..0258814 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio.py
@@ -18,6 +18,7 @@
 """A connector for reading from and writing to Google Cloud Datastore"""
 
 import logging
+import time
 
 # Protect against environments where datastore library is not available.
 # pylint: disable=wrong-import-order, wrong-import-position
@@ -30,6 +31,7 @@ except ImportError:
 
 from apache_beam.io.gcp.datastore.v1 import helper
 from apache_beam.io.gcp.datastore.v1 import query_splitter
+from apache_beam.io.gcp.datastore.v1 import util
 from apache_beam.transforms import Create
 from apache_beam.transforms import DoFn
 from apache_beam.transforms import FlatMap
@@ -38,6 +40,7 @@ from apache_beam.transforms import Map
 from apache_beam.transforms import PTransform
 from apache_beam.transforms import ParDo
 from apache_beam.transforms.util import Values
+from apache_beam.metrics.metric import Metrics
 
 __all__ = ['ReadFromDatastore', 'WriteToDatastore', 'DeleteFromDatastore']
 
@@ -313,12 +316,15 @@ class _Mutate(PTransform):
   supported, as the commits are retried when failures occur.
   """
 
+  _WRITE_BATCH_INITIAL_SIZE = 200
   # Max allowed Datastore writes per batch, and max bytes per batch.
   # Note that the max bytes per batch set here is lower than the 10MB limit
   # actually enforced by the API, to leave space for the CommitRequest wrapper
   # around the mutations.
-  _WRITE_BATCH_SIZE = 500
-  _WRITE_BATCH_BYTES_SIZE = 9000000
+  _WRITE_BATCH_MAX_SIZE = 500
+  _WRITE_BATCH_MAX_BYTES_SIZE = 9000000
+  _WRITE_BATCH_MIN_SIZE = 10
+  _WRITE_BATCH_TARGET_LATENCY_MS = 5000
 
   def __init__(self, project, mutation_fn):
     """Initializes a Mutate transform.
@@ -342,48 +348,102 @@ class _Mutate(PTransform):
     return {'project': self._project,
             'mutation_fn': self._mutation_fn.__class__.__name__}
 
+  class _DynamicBatchSizer(object):
+    """Determines request sizes for future Datastore RPCS."""
+    def __init__(self):
+      self._commit_time_per_entity_ms = util.MovingSum(window_ms=120000,
+                                                       bucket_ms=10000)
+
+    def get_batch_size(self, now):
+      """Returns the recommended size for datastore RPCs at this time."""
+      if not self._commit_time_per_entity_ms.has_data(now):
+        return _Mutate._WRITE_BATCH_INITIAL_SIZE
+
+      recent_mean_latency_ms = (self._commit_time_per_entity_ms.sum(now)
+                                / self._commit_time_per_entity_ms.count(now))
+      return max(_Mutate._WRITE_BATCH_MIN_SIZE,
+                 min(_Mutate._WRITE_BATCH_MAX_SIZE,
+                     _Mutate._WRITE_BATCH_TARGET_LATENCY_MS
+                     / max(recent_mean_latency_ms, 1)
+                    ))
+
+    def report_latency(self, now, latency_ms, num_mutations):
+      """Reports the latency of an RPC to Datastore.
+
+      Args:
+        now: double, completion time of the RPC as seconds since the epoch.
+        latency_ms: double, the observed latency in milliseconds for this RPC.
+        num_mutations: int, number of mutations contained in the RPC.
+      """
+      self._commit_time_per_entity_ms.add(now, latency_ms / num_mutations)
+
   class DatastoreWriteFn(DoFn):
     """A ``DoFn`` that write mutations to Datastore.
 
     Mutations are written in batches, where the maximum batch size is
-    `Mutate._WRITE_BATCH_SIZE`.
+    `_Mutate._WRITE_BATCH_SIZE`.
 
     Commits are non-transactional. If a commit fails because of a conflict over
     an entity group, the commit will be retried. This means that the mutation
     should be idempotent (`upsert` and `delete` mutations) to prevent duplicate
     data or errors.
     """
-    def __init__(self, project):
+    def __init__(self, project, fixed_batch_size=None):
+      """
+      Args:
+        project: str, the cloud project id.
+        fixed_batch_size: int, for testing only, this forces all batches of
+           writes to be a fixed size, for easier unittesting.
+      """
       self._project = project
       self._datastore = None
-      self._mutations = []
-      self._mutations_size = 0  # Total size of entries in _mutations.
+      self._fixed_batch_size = fixed_batch_size
+      self._rpc_successes = Metrics.counter(
+          _Mutate.DatastoreWriteFn, "datastoreRpcSuccesses")
+      self._rpc_errors = Metrics.counter(
+          _Mutate.DatastoreWriteFn, "datastoreRpcErrors")
+
+    def _update_rpc_stats(self, successes=0, errors=0):
+      self._rpc_successes.inc(successes)
+      self._rpc_errors.inc(errors)
 
     def start_bundle(self):
       self._mutations = []
       self._mutations_size = 0
       self._datastore = helper.get_datastore(self._project)
+      if self._fixed_batch_size:
+        self._target_batch_size = self._fixed_batch_size
+      else:
+        self._batch_sizer = _Mutate._DynamicBatchSizer()
+        self._target_batch_size = self._batch_sizer.get_batch_size(time.time())
 
     def process(self, element):
       size = element.ByteSize()
       if (self._mutations and
-          size + self._mutations_size > _Mutate._WRITE_BATCH_BYTES_SIZE):
+          size + self._mutations_size > _Mutate._WRITE_BATCH_MAX_BYTES_SIZE):
         self._flush_batch()
       self._mutations.append(element)
       self._mutations_size += size
-      if len(self._mutations) >= _Mutate._WRITE_BATCH_SIZE:
+      if len(self._mutations) >= self._target_batch_size:
         self._flush_batch()
 
     def finish_bundle(self):
       if self._mutations:
         self._flush_batch()
-      self._mutations = []
-      self._mutations_size = 0
 
     def _flush_batch(self):
       # Flush the current batch of mutations to Cloud Datastore.
-      helper.write_mutations(self._datastore, self._project, self._mutations)
-      logging.debug("Successfully wrote %d mutations.", len(self._mutations))
+      _, latency_ms = helper.write_mutations(
+          self._datastore, self._project, self._mutations,
+          self._update_rpc_stats)
+      logging.debug("Successfully wrote %d mutations in %dms.",
+                    len(self._mutations), latency_ms)
+
+      if not self._fixed_batch_size:
+        now = time.time()
+        self._batch_sizer.report_latency(now, latency_ms, len(self._mutations))
+        self._target_batch_size = self._batch_sizer.get_batch_size(now)
+
       self._mutations = []
       self._mutations_size = 0
 

http://git-wip-us.apache.org/repos/asf/beam/blob/0a5157e7/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio_test.py
index 94cac3e..72c4c8c 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/datastoreio_test.py
@@ -155,15 +155,15 @@ class DatastoreioTest(unittest.TestCase):
     self.check_DatastoreWriteFn(0)
 
   def test_DatastoreWriteFn_with_one_batch(self):
-    num_entities_to_write = _Mutate._WRITE_BATCH_SIZE * 1 - 50
+    num_entities_to_write = _Mutate._WRITE_BATCH_INITIAL_SIZE * 1 - 50
     self.check_DatastoreWriteFn(num_entities_to_write)
 
   def test_DatastoreWriteFn_with_multiple_batches(self):
-    num_entities_to_write = _Mutate._WRITE_BATCH_SIZE * 3 + 50
+    num_entities_to_write = _Mutate._WRITE_BATCH_INITIAL_SIZE * 3 + 50
     self.check_DatastoreWriteFn(num_entities_to_write)
 
   def test_DatastoreWriteFn_with_batch_size_exact_multiple(self):
-    num_entities_to_write = _Mutate._WRITE_BATCH_SIZE * 2
+    num_entities_to_write = _Mutate._WRITE_BATCH_INITIAL_SIZE * 2
     self.check_DatastoreWriteFn(num_entities_to_write)
 
   def check_DatastoreWriteFn(self, num_entities):
@@ -180,7 +180,8 @@ class DatastoreioTest(unittest.TestCase):
       self._mock_datastore.commit.side_effect = (
           fake_datastore.create_commit(actual_mutations))
 
-      datastore_write_fn = _Mutate.DatastoreWriteFn(self._PROJECT)
+      datastore_write_fn = _Mutate.DatastoreWriteFn(
+          self._PROJECT, fixed_batch_size=_Mutate._WRITE_BATCH_INITIAL_SIZE)
 
       datastore_write_fn.start_bundle()
       for mutation in expected_mutations:
@@ -188,8 +189,9 @@ class DatastoreioTest(unittest.TestCase):
       datastore_write_fn.finish_bundle()
 
       self.assertEqual(actual_mutations, expected_mutations)
-      self.assertEqual((num_entities - 1) / _Mutate._WRITE_BATCH_SIZE + 1,
-                       self._mock_datastore.commit.call_count)
+      self.assertEqual(
+          (num_entities - 1) / _Mutate._WRITE_BATCH_INITIAL_SIZE + 1,
+          self._mock_datastore.commit.call_count)
 
   def test_DatastoreWriteLargeEntities(self):
     """100*100kB entities gets split over two Commit RPCs."""
@@ -197,7 +199,8 @@ class DatastoreioTest(unittest.TestCase):
                       return_value=self._mock_datastore):
       entities = [e.entity for e in fake_datastore.create_entities(100)]
 
-      datastore_write_fn = _Mutate.DatastoreWriteFn(self._PROJECT)
+      datastore_write_fn = _Mutate.DatastoreWriteFn(
+          self._PROJECT, fixed_batch_size=_Mutate._WRITE_BATCH_INITIAL_SIZE)
       datastore_write_fn.start_bundle()
       for entity in entities:
         datastore_helper.add_properties(
@@ -258,5 +261,41 @@ class DatastoreioTest(unittest.TestCase):
     return split_queries
 
 
+@unittest.skipIf(datastore_pb2 is None, 'GCP dependencies are not installed')
+class DynamicWriteBatcherTest(unittest.TestCase):
+
+  def setUp(self):
+    self._batcher = _Mutate._DynamicBatchSizer()
+
+  # If possible, keep these test cases aligned with the Java test cases in
+  # DatastoreV1Test.java
+  def test_no_data(self):
+    self.assertEquals(_Mutate._WRITE_BATCH_INITIAL_SIZE,
+                      self._batcher.get_batch_size(0))
+
+  def test_fast_queries(self):
+    self._batcher.report_latency(0, 1000, 200)
+    self._batcher.report_latency(0, 1000, 200)
+    self.assertEquals(_Mutate._WRITE_BATCH_MAX_SIZE,
+                      self._batcher.get_batch_size(0))
+
+  def test_slow_queries(self):
+    self._batcher.report_latency(0, 10000, 200)
+    self._batcher.report_latency(0, 10000, 200)
+    self.assertEquals(100, self._batcher.get_batch_size(0))
+
+  def test_size_not_below_minimum(self):
+    self._batcher.report_latency(0, 30000, 50)
+    self._batcher.report_latency(0, 30000, 50)
+    self.assertEquals(_Mutate._WRITE_BATCH_MIN_SIZE,
+                      self._batcher.get_batch_size(0))
+
+  def test_sliding_window(self):
+    self._batcher.report_latency(0, 30000, 50)
+    self._batcher.report_latency(50000, 5000, 200)
+    self._batcher.report_latency(100000, 5000, 200)
+    self.assertEquals(200, self._batcher.get_batch_size(150000))
+
+
 if __name__ == '__main__':
   unittest.main()

http://git-wip-us.apache.org/repos/asf/beam/blob/0a5157e7/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
index 996dace..da14cc4 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
@@ -23,6 +23,7 @@ For internal use only; no backwards-compatibility guarantees.
 import errno
 from socket import error as SocketError
 import sys
+import time
 
 # Protect against environments where datastore library is not available.
 # pylint: disable=wrong-import-order, wrong-import-position
@@ -166,13 +167,25 @@ def is_key_valid(key):
   return key.path[-1].HasField('id') or key.path[-1].HasField('name')
 
 
-def write_mutations(datastore, project, mutations):
+def write_mutations(datastore, project, mutations, rpc_stats_callback=None):
   """A helper function to write a batch of mutations to Cloud Datastore.
 
   If a commit fails, it will be retried upto 5 times. All mutations in the
   batch will be committed again, even if the commit was partially successful.
   If the retry limit is exceeded, the last exception from Cloud Datastore will
   be raised.
+
+  Args:
+    datastore: googledatastore.connection.Datastore
+    project: str, project id
+    mutations: list of google.cloud.proto.datastore.v1.datastore_pb2.Mutation
+    rpc_stats_callback: a function to call with arguments `successes` and
+        `failures`; this is called to record successful and failed RPCs to
+        Datastore.
+
+  Returns a tuple of:
+    CommitResponse, the response from Datastore;
+    int, the latency of the successful RPC in milliseconds.
   """
   commit_request = datastore_pb2.CommitRequest()
   commit_request.mode = datastore_pb2.CommitRequest.NON_TRANSACTIONAL
@@ -182,10 +195,22 @@ def write_mutations(datastore, project, mutations):
 
   @retry.with_exponential_backoff(num_retries=5,
                                   retry_filter=retry_on_rpc_error)
-  def commit(req):
-    datastore.commit(req)
-
-  commit(commit_request)
+  def commit(request):
+    try:
+      start_time = time.time()
+      response = datastore.commit(request)
+      end_time = time.time()
+      rpc_stats_callback(successes=1)
+
+      commit_time_ms = int((end_time-start_time)*1000)
+      return response, commit_time_ms
+    except (RPCError, SocketError):
+      if rpc_stats_callback:
+        rpc_stats_callback(errors=1)
+      raise
+
+  response, commit_time_ms = commit(commit_request)
+  return response, commit_time_ms
 
 
 def make_latest_timestamp_query(namespace):

http://git-wip-us.apache.org/repos/asf/beam/blob/0a5157e7/sdks/python/apache_beam/io/gcp/datastore/v1/util.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/util.py b/sdks/python/apache_beam/io/gcp/datastore/v1/util.py
new file mode 100644
index 0000000..5670a24
--- /dev/null
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/util.py
@@ -0,0 +1,95 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Utility functions & classes that are _not_ specific to the datastore client.
+#
+# For internal use only; no backwards-compatibility guarantees.
+
+import math
+
+
+class MovingSum(object):
+  """Class that keeps track of a rolling window sum.
+
+  For use in tracking recent performance of the connector.
+
+  Intended to be similar to
+  org.apache.beam.sdk.util.MovingFunction(..., Sum.ofLongs()), but for
+  convenience we expose the count of entries as well so this doubles as a
+  moving average tracker.
+  """
+
+  def __init__(self, window_ms, bucket_ms):
+    if window_ms <= bucket_ms or bucket_ms <= 0:
+      raise ValueError("window_ms > bucket_ms > 0 please")
+    self._num_buckets = int(math.ceil(window_ms / bucket_ms))
+    self._bucket_ms = bucket_ms
+    self._Reset(now=0)  # initialize the moving window members
+
+  def _Reset(self, now):
+    self._current_index = 0  # pointer into self._buckets
+    self._current_ms_since_epoch = math.floor(
+        now / self._bucket_ms) * self._bucket_ms
+
+    # _buckets is a list where each element is a list [sum, num_samples]
+    # This is a circular buffer where
+    # [_current_index] represents the time range
+    #     [_current_ms_since_epoch, _current_ms_since_epoch+_bucket_ms)
+    # [_current_index-1] represents immediatly prior time range
+    #     [_current_ms_since_epoch-_bucket_ms, _current_ms_since_epoch)
+    # etc, wrapping around from the start to the end of the array, so
+    # [_current_index+1] is the element representing the oldest bucket.
+    self._buckets = [[0, 0] for _ in range(0, self._num_buckets)]
+
+  def _Flush(self, now):
+    """
+
+    Args:
+      now: int, milliseconds since epoch
+    """
+    if now >= (self._current_ms_since_epoch
+               + self._bucket_ms * self._num_buckets):
+      # Time moved forward so far that all currently held data is outside of
+      # the window.  It is faster to simply reset our data.
+      self._Reset(now)
+      return
+
+    while now > self._current_ms_since_epoch + self._bucket_ms:
+      # Advance time by one _bucket_ms, setting the new bucket's counts to 0.
+      self._current_ms_since_epoch += self._bucket_ms
+      self._current_index = (self._current_index+1) % self._num_buckets
+      self._buckets[self._current_index] = [0, 0]
+      # Intentional dead reckoning here; we don't care about staying precisely
+      # aligned with multiples of _bucket_ms since the epoch, we just need our
+      # buckets to represent the most recent _window_ms time window.
+
+  def sum(self, now):
+    self._Flush(now)
+    return sum(bucket[0] for bucket in self._buckets)
+
+  def add(self, now, inc):
+    self._Flush(now)
+    bucket = self._buckets[self._current_index]
+    bucket[0] += inc
+    bucket[1] += 1
+
+  def count(self, now):
+    self._Flush(now)
+    return sum(bucket[1] for bucket in self._buckets)
+
+  def has_data(self, now):
+    return self.count(now) > 0

http://git-wip-us.apache.org/repos/asf/beam/blob/0a5157e7/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py
new file mode 100644
index 0000000..8f17c21
--- /dev/null
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/util_test.py
@@ -0,0 +1,67 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Tests for util.py."""
+import unittest
+
+from apache_beam.io.gcp.datastore.v1 import util
+
+
+class MovingSumTest(unittest.TestCase):
+
+  TIMESTAMP = 1500000000
+
+  def test_bad_bucket_size(self):
+    with self.assertRaises(ValueError):
+      _ = util.MovingSum(1, 0)
+
+  def test_bad_window_size(self):
+    with self.assertRaises(ValueError):
+      _ = util.MovingSum(1, 2)
+
+  def test_no_data(self):
+    ms = util.MovingSum(10, 1)
+    self.assertEqual(0, ms.sum(MovingSumTest.TIMESTAMP))
+    self.assertEqual(0, ms.count(MovingSumTest.TIMESTAMP))
+    self.assertFalse(ms.has_data(MovingSumTest.TIMESTAMP))
+
+  def test_one_data_point(self):
+    ms = util.MovingSum(10, 1)
+    ms.add(MovingSumTest.TIMESTAMP, 5)
+    self.assertEqual(5, ms.sum(MovingSumTest.TIMESTAMP))
+    self.assertEqual(1, ms.count(MovingSumTest.TIMESTAMP))
+    self.assertTrue(ms.has_data(MovingSumTest.TIMESTAMP))
+
+  def test_aggregates_within_window(self):
+    ms = util.MovingSum(10, 1)
+    ms.add(MovingSumTest.TIMESTAMP, 5)
+    ms.add(MovingSumTest.TIMESTAMP+1, 3)
+    ms.add(MovingSumTest.TIMESTAMP+2, 7)
+    self.assertEqual(15, ms.sum(MovingSumTest.TIMESTAMP+3))
+    self.assertEqual(3, ms.count(MovingSumTest.TIMESTAMP+3))
+
+  def test_data_expires_from_moving_window(self):
+    ms = util.MovingSum(5, 1)
+    ms.add(MovingSumTest.TIMESTAMP, 5)
+    ms.add(MovingSumTest.TIMESTAMP+3, 3)
+    ms.add(MovingSumTest.TIMESTAMP+6, 7)
+    self.assertEqual(10, ms.sum(MovingSumTest.TIMESTAMP+7))
+    self.assertEqual(2, ms.count(MovingSumTest.TIMESTAMP+7))
+
+
+if __name__ == '__main__':
+  unittest.main()


[22/50] [abbrv] beam git commit: This closes #2896

Posted by jb...@apache.org.
This closes #2896


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d2201f9c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d2201f9c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d2201f9c

Branch: refs/heads/DSL_SQL
Commit: d2201f9cf4a45e5470bc5e1032eb497937946bbb
Parents: 7c36318 a48eefa
Author: Ben Chambers <bc...@bchambers-macbookpro2.roam.corp.google.com>
Authored: Tue Jul 18 09:58:57 2017 -0700
Committer: Ben Chambers <bc...@bchambers-macbookpro2.roam.corp.google.com>
Committed: Tue Jul 18 09:58:57 2017 -0700

----------------------------------------------------------------------
 .../org/apache/beam/examples/WordCount.java     |   4 +
 pom.xml                                         |   2 +-
 .../beam/runners/dataflow/DataflowMetrics.java  | 310 +++++++++++++------
 .../runners/dataflow/DataflowPipelineJob.java   |   4 +
 .../runners/dataflow/DataflowMetricsTest.java   | 174 ++++++++++-
 .../beam/sdk/metrics/MetricResultsMatchers.java |   2 +-
 6 files changed, 388 insertions(+), 108 deletions(-)
----------------------------------------------------------------------



[13/50] [abbrv] beam git commit: This closes #3534: [BEAM-933] Fix and enable findbugs in Java examples

Posted by jb...@apache.org.
This closes #3534: [BEAM-933] Fix and enable findbugs in Java examples


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/19968690
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/19968690
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/19968690

Branch: refs/heads/DSL_SQL
Commit: 19968690767d72fb384d4b3be899b3fb9efd6471
Parents: 02905c2 f6daad4
Author: Kenneth Knowles <kl...@google.com>
Authored: Mon Jul 17 15:52:24 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Mon Jul 17 15:52:24 2017 -0700

----------------------------------------------------------------------
 examples/java/pom.xml                           | 12 ----------
 .../apache/beam/examples/complete/TfIdf.java    |  3 ++-
 .../examples/complete/TopWikipediaSessions.java | 24 ++++++++++----------
 .../beam/examples/complete/TrafficRoutes.java   | 19 ++++++++++++++++
 .../beam/examples/cookbook/TriggerExample.java  |  6 +++--
 5 files changed, 37 insertions(+), 27 deletions(-)
----------------------------------------------------------------------



[21/50] [abbrv] beam git commit: [BEAM-2084] Adding querying facility for distribution metrics in Java

Posted by jb...@apache.org.
[BEAM-2084] Adding querying facility for distribution metrics in Java


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/a48eefac
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/a48eefac
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/a48eefac

Branch: refs/heads/DSL_SQL
Commit: a48eeface8c5257f34e85c22f312ec03801b0f82
Parents: 7c36318
Author: Pablo <pa...@google.com>
Authored: Thu May 4 14:56:14 2017 -0700
Committer: Ben Chambers <bc...@bchambers-macbookpro2.roam.corp.google.com>
Committed: Tue Jul 18 09:58:47 2017 -0700

----------------------------------------------------------------------
 .../org/apache/beam/examples/WordCount.java     |   4 +
 pom.xml                                         |   2 +-
 .../beam/runners/dataflow/DataflowMetrics.java  | 310 +++++++++++++------
 .../runners/dataflow/DataflowPipelineJob.java   |   4 +
 .../runners/dataflow/DataflowMetricsTest.java   | 174 ++++++++++-
 .../beam/sdk/metrics/MetricResultsMatchers.java |   2 +-
 6 files changed, 388 insertions(+), 108 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/a48eefac/examples/java/src/main/java/org/apache/beam/examples/WordCount.java
----------------------------------------------------------------------
diff --git a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java
index bfa7eb3..2d568ce 100644
--- a/examples/java/src/main/java/org/apache/beam/examples/WordCount.java
+++ b/examples/java/src/main/java/org/apache/beam/examples/WordCount.java
@@ -21,6 +21,7 @@ import org.apache.beam.examples.common.ExampleUtils;
 import org.apache.beam.sdk.Pipeline;
 import org.apache.beam.sdk.io.TextIO;
 import org.apache.beam.sdk.metrics.Counter;
+import org.apache.beam.sdk.metrics.Distribution;
 import org.apache.beam.sdk.metrics.Metrics;
 import org.apache.beam.sdk.options.Default;
 import org.apache.beam.sdk.options.Description;
@@ -88,9 +89,12 @@ public class WordCount {
    */
   static class ExtractWordsFn extends DoFn<String, String> {
     private final Counter emptyLines = Metrics.counter(ExtractWordsFn.class, "emptyLines");
+    private final Distribution lineLenDist = Metrics.distribution(
+        ExtractWordsFn.class, "lineLenDistro");
 
     @ProcessElement
     public void processElement(ProcessContext c) {
+      lineLenDist.update(c.element().length());
       if (c.element().trim().isEmpty()) {
         emptyLines.inc();
       }

http://git-wip-us.apache.org/repos/asf/beam/blob/a48eefac/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index d9ab9ae..d27d367 100644
--- a/pom.xml
+++ b/pom.xml
@@ -112,7 +112,7 @@
     <cloudresourcemanager.version>v1-rev6-1.22.0</cloudresourcemanager.version>
     <pubsubgrpc.version>0.1.0</pubsubgrpc.version>
     <clouddebugger.version>v2-rev8-1.22.0</clouddebugger.version>
-    <dataflow.version>v1b3-rev196-1.22.0</dataflow.version>
+    <dataflow.version>v1b3-rev198-1.20.0</dataflow.version>
     <dataflow.proto.version>0.5.160222</dataflow.proto.version>
     <datastore.client.version>1.4.0</datastore.client.version>
     <datastore.proto.version>1.3.0</datastore.proto.version>

http://git-wip-us.apache.org/repos/asf/beam/blob/a48eefac/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowMetrics.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowMetrics.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowMetrics.java
index 330cc7e..4c9c493 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowMetrics.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowMetrics.java
@@ -19,7 +19,9 @@ package org.apache.beam.runners.dataflow;
 
 import static com.google.common.base.MoreObjects.firstNonNull;
 
+import com.google.api.client.util.ArrayMap;
 import com.google.api.services.dataflow.model.JobMetrics;
+import com.google.api.services.dataflow.model.MetricUpdate;
 import com.google.auto.value.AutoValue;
 import com.google.common.base.Objects;
 import com.google.common.collect.ImmutableList;
@@ -28,6 +30,7 @@ import java.util.Collections;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
+import javax.annotation.Nullable;
 import org.apache.beam.runners.core.construction.metrics.MetricFiltering;
 import org.apache.beam.runners.core.construction.metrics.MetricKey;
 import org.apache.beam.sdk.metrics.DistributionResult;
@@ -73,39 +76,6 @@ class DataflowMetrics extends MetricResults {
   }
 
   /**
-   * Build an immutable map that serves as a hash key for a metric update.
-   * @return a {@link MetricKey} that can be hashed and used to identify a metric.
-   */
-  private MetricKey metricHashKey(
-      com.google.api.services.dataflow.model.MetricUpdate metricUpdate) {
-    String fullStepName = metricUpdate.getName().getContext().get("step");
-    if (dataflowPipelineJob.transformStepNames == null
-        || !dataflowPipelineJob.transformStepNames.inverse().containsKey(fullStepName)) {
-      // If we can't translate internal step names to user step names, we just skip them
-      // altogether.
-      return null;
-    }
-    fullStepName = dataflowPipelineJob.transformStepNames
-        .inverse().get(fullStepName).getFullName();
-    return MetricKey.create(
-        fullStepName,
-        MetricName.named(
-            metricUpdate.getName().getContext().get("namespace"),
-            metricUpdate.getName().getName()));
-  }
-
-  /**
-   * Check whether a {@link com.google.api.services.dataflow.model.MetricUpdate} is a tentative
-   * update or not.
-   * @return true if update is tentative, false otherwise
-   */
-  private boolean isMetricTentative(
-      com.google.api.services.dataflow.model.MetricUpdate metricUpdate) {
-    return (metricUpdate.getName().getContext().containsKey("tentative")
-        && Objects.equal(metricUpdate.getName().getContext().get("tentative"), "true"));
-  }
-
-  /**
    * Take a list of metric updates coming from the Dataflow service, and format it into a
    * Metrics API MetricQueryResults instance.
    * @param metricUpdates
@@ -114,68 +84,8 @@ class DataflowMetrics extends MetricResults {
   private MetricQueryResults populateMetricQueryResults(
       List<com.google.api.services.dataflow.model.MetricUpdate> metricUpdates,
       MetricsFilter filter) {
-    // Separate metric updates by name and by tentative/committed.
-    HashMap<MetricKey, com.google.api.services.dataflow.model.MetricUpdate>
-        tentativeByName = new HashMap<>();
-    HashMap<MetricKey, com.google.api.services.dataflow.model.MetricUpdate>
-        committedByName = new HashMap<>();
-    HashSet<MetricKey> metricHashKeys = new HashSet<>();
-
-    // If the Context of the metric update does not have a namespace, then these are not
-    // actual metrics counters.
-    for (com.google.api.services.dataflow.model.MetricUpdate update : metricUpdates) {
-      if (Objects.equal(update.getName().getOrigin(), "user")
-          && update.getName().getContext().containsKey("namespace")) {
-        MetricKey key = metricHashKey(update);
-        if (key == null) {
-          continue;
-        }
-        metricHashKeys.add(key);
-        if (isMetricTentative(update)) {
-          tentativeByName.put(key, update);
-        } else {
-          committedByName.put(key, update);
-        }
-      }
-    }
-    // Create the lists with the metric result information.
-    ImmutableList.Builder<MetricResult<Long>> counterResults = ImmutableList.builder();
-    ImmutableList.Builder<MetricResult<DistributionResult>> distributionResults =
-        ImmutableList.builder();
-    ImmutableList.Builder<MetricResult<GaugeResult>> gaugeResults = ImmutableList.builder();
-    for (MetricKey metricKey : metricHashKeys) {
-      if (!MetricFiltering.matches(filter, metricKey)) {
-        // Skip unmatched metrics early.
-        continue;
-      }
-
-      // This code is not robust to evolutions in the types of metrics that can be returned, so
-      // wrap it in a try-catch and log errors.
-      try {
-        String metricName = metricKey.metricName().name();
-        if (metricName.endsWith("[MIN]") || metricName.endsWith("[MAX]")
-            || metricName.endsWith("[MEAN]") || metricName.endsWith("[COUNT]")) {
-          // Skip distribution metrics, as these are not yet properly supported.
-          LOG.warn("Distribution metrics are not yet supported. You can see them in the Dataflow"
-              + " User Interface");
-          continue;
-        }
-
-        String namespace = metricKey.metricName().namespace();
-        String step = metricKey.stepName();
-        Long committed = ((Number) committedByName.get(metricKey).getScalar()).longValue();
-        Long attempted = ((Number) tentativeByName.get(metricKey).getScalar()).longValue();
-        counterResults.add(
-            DataflowMetricResult.create(
-                MetricName.named(namespace, metricName), step, committed, attempted));
-      } catch (Exception e) {
-        LOG.warn("Error handling metric {} for filter {}, skipping result.", metricKey, filter);
-      }
-    }
-    return DataflowMetricQueryResults.create(
-        counterResults.build(),
-        distributionResults.build(),
-        gaugeResults.build());
+    return DataflowMetricQueryResultsFactory.create(dataflowPipelineJob, metricUpdates, filter)
+        .build();
   }
 
   private MetricQueryResults queryServiceForMetrics(MetricsFilter filter) {
@@ -214,6 +124,214 @@ class DataflowMetrics extends MetricResults {
     return result;
   }
 
+  private static class DataflowMetricResultExtractor {
+    private final ImmutableList.Builder<MetricResult<Long>> counterResults;
+    private final ImmutableList.Builder<MetricResult<DistributionResult>> distributionResults;
+    private final ImmutableList.Builder<MetricResult<GaugeResult>> gaugeResults;
+    private final boolean isStreamingJob;
+
+    DataflowMetricResultExtractor(boolean isStreamingJob) {
+      counterResults = ImmutableList.builder();
+      distributionResults = ImmutableList.builder();
+      gaugeResults = ImmutableList.builder();
+      this.isStreamingJob = isStreamingJob;
+    }
+
+    public void addMetricResult(
+        MetricKey metricKey,
+        @Nullable com.google.api.services.dataflow.model.MetricUpdate committed,
+        @Nullable com.google.api.services.dataflow.model.MetricUpdate attempted) {
+      if (committed == null || attempted == null) {
+        LOG.warn(
+            "Metric {} did not have both a committed ({}) and tentative value ({}).",
+            metricKey, committed, attempted);
+      } else if (committed.getDistribution() != null && attempted.getDistribution() != null) {
+        // distribution metric
+        DistributionResult value = getDistributionValue(committed);
+        distributionResults.add(
+            DataflowMetricResult.create(
+                metricKey.metricName(),
+                metricKey.stepName(),
+                isStreamingJob ? null : value, // Committed
+                isStreamingJob ? value : null)); // Attempted
+        /* In Dataflow streaming jobs, only ATTEMPTED metrics are available.
+         * In Dataflow batch jobs, only COMMITTED metrics are available.
+         * Reporting the appropriate metric depending on whether it's a batch/streaming job.
+         */
+      } else if (committed.getScalar() != null && attempted.getScalar() != null) {
+        // counter metric
+        Long value = getCounterValue(committed);
+        counterResults.add(
+            DataflowMetricResult.create(
+                metricKey.metricName(),
+                metricKey.stepName(),
+                isStreamingJob ? null : value, // Committed
+                isStreamingJob ? value : null)); // Attempted
+        /* In Dataflow streaming jobs, only ATTEMPTED metrics are available.
+         * In Dataflow batch jobs, only COMMITTED metrics are available.
+         * Reporting the appropriate metric depending on whether it's a batch/streaming job.
+         */
+      } else {
+        // This is exceptionally unexpected. We expect matching user metrics to only have the
+        // value types provided by the Metrics API.
+        LOG.warn("Unexpected / mismatched metric types."
+            + " Please report JOB ID to Dataflow Support. Metric key: {}."
+            + " Committed / attempted Metric updates: {} / {}",
+            metricKey.toString(), committed.toString(), attempted.toString());
+      }
+    }
+
+    private Long getCounterValue(com.google.api.services.dataflow.model.MetricUpdate metricUpdate) {
+      if (metricUpdate.getScalar() == null) {
+        return 0L;
+      }
+      return ((Number) metricUpdate.getScalar()).longValue();
+    }
+
+    private DistributionResult getDistributionValue(
+        com.google.api.services.dataflow.model.MetricUpdate metricUpdate) {
+      if (metricUpdate.getDistribution() == null) {
+        return DistributionResult.ZERO;
+      }
+      ArrayMap distributionMap = (ArrayMap) metricUpdate.getDistribution();
+      Long count = ((Number) distributionMap.get("count")).longValue();
+      Long min = ((Number) distributionMap.get("min")).longValue();
+      Long max = ((Number) distributionMap.get("max")).longValue();
+      Long sum = ((Number) distributionMap.get("sum")).longValue();
+      return DistributionResult.create(sum, count, min, max);
+    }
+
+    public Iterable<MetricResult<DistributionResult>> getDistributionResults() {
+      return distributionResults.build();
+    }
+
+    public Iterable<MetricResult<Long>> getCounterResults() {
+      return counterResults.build();
+    }
+
+    public Iterable<MetricResult<GaugeResult>> getGaugeResults() {
+      return gaugeResults.build();
+    }
+  }
+
+  private static class DataflowMetricQueryResultsFactory {
+    private final Iterable<com.google.api.services.dataflow.model.MetricUpdate> metricUpdates;
+    private final MetricsFilter filter;
+    private final HashMap<MetricKey, com.google.api.services.dataflow.model.MetricUpdate>
+        tentativeByName;
+    private final HashMap<MetricKey, com.google.api.services.dataflow.model.MetricUpdate>
+        committedByName;
+    private final HashSet<MetricKey> metricHashKeys;
+    private final DataflowPipelineJob dataflowPipelineJob;
+
+    public static DataflowMetricQueryResultsFactory create(DataflowPipelineJob dataflowPipelineJob,
+        Iterable<com.google.api.services.dataflow.model.MetricUpdate> metricUpdates,
+        MetricsFilter filter) {
+      return new DataflowMetricQueryResultsFactory(dataflowPipelineJob, metricUpdates, filter);
+    }
+
+    private DataflowMetricQueryResultsFactory(DataflowPipelineJob dataflowPipelineJob,
+        Iterable<com.google.api.services.dataflow.model.MetricUpdate> metricUpdates,
+        MetricsFilter filter) {
+      this.dataflowPipelineJob = dataflowPipelineJob;
+      this.metricUpdates = metricUpdates;
+      this.filter = filter;
+
+      tentativeByName = new HashMap<>();
+      committedByName = new HashMap<>();
+      metricHashKeys = new HashSet<>();
+    }
+
+    /**
+     * Check whether a {@link com.google.api.services.dataflow.model.MetricUpdate} is a tentative
+     * update or not.
+     * @return true if update is tentative, false otherwise
+     */
+    private boolean isMetricTentative(
+        com.google.api.services.dataflow.model.MetricUpdate metricUpdate) {
+      return (metricUpdate.getName().getContext().containsKey("tentative")
+          && Objects.equal(metricUpdate.getName().getContext().get("tentative"), "true"));
+    }
+
+    /**
+     * Build an {@link MetricKey} that serves as a hash key for a metric update.
+     * @return a {@link MetricKey} that can be hashed and used to identify a metric.
+     */
+    private MetricKey getMetricHashKey(
+        com.google.api.services.dataflow.model.MetricUpdate metricUpdate) {
+      String fullStepName = metricUpdate.getName().getContext().get("step");
+      if (dataflowPipelineJob.transformStepNames == null
+          || !dataflowPipelineJob.transformStepNames.inverse().containsKey(fullStepName)) {
+        // If we can't translate internal step names to user step names, we just skip them
+        // altogether.
+        return null;
+      }
+      fullStepName = dataflowPipelineJob.transformStepNames
+          .inverse().get(fullStepName).getFullName();
+      return MetricKey.create(
+          fullStepName,
+          MetricName.named(
+              metricUpdate.getName().getContext().get("namespace"),
+              metricUpdate.getName().getName()));
+    }
+
+    private void buildMetricsIndex() {
+      // If the Context of the metric update does not have a namespace, then these are not
+      // actual metrics counters.
+      for (com.google.api.services.dataflow.model.MetricUpdate update : metricUpdates) {
+        if (update.getName().getOrigin() != null
+            && (!update.getName().getOrigin().toLowerCase().equals("user")
+            || !update.getName().getContext().containsKey("namespace"))) {
+          // Skip non-user metrics, which should have both a "user" origin and a namespace.
+          continue;
+        }
+
+        MetricKey updateKey = getMetricHashKey(update);
+        if (updateKey == null || !MetricFiltering.matches(filter, updateKey)) {
+          // Skip unmatched metrics early.
+          continue;
+        }
+
+        metricHashKeys.add(updateKey);
+        if (isMetricTentative(update)) {
+          MetricUpdate previousUpdate = tentativeByName.put(updateKey, update);
+          if (previousUpdate != null) {
+            LOG.warn("Metric {} already had a tentative value of {}", updateKey, previousUpdate);
+          }
+        } else {
+          MetricUpdate previousUpdate = committedByName.put(updateKey, update);
+          if (previousUpdate != null) {
+            LOG.warn("Metric {} already had a committed value of {}", updateKey, previousUpdate);
+          }
+        }
+      }
+    }
+
+    public MetricQueryResults build() {
+      buildMetricsIndex();
+
+      DataflowMetricResultExtractor extractor = new DataflowMetricResultExtractor(
+          dataflowPipelineJob.getDataflowOptions().isStreaming());
+      for (MetricKey metricKey : metricHashKeys) {
+        String metricName = metricKey.metricName().name();
+        if (metricName.endsWith("[MIN]") || metricName.endsWith("[MAX]")
+            || metricName.endsWith("[MEAN]") || metricName.endsWith("[COUNT]")) {
+          // Skip distribution metrics, as these are not yet properly supported.
+          // TODO: remove this when distributions stop being broken up for the UI.
+          continue;
+        }
+
+        extractor.addMetricResult(metricKey,
+            committedByName.get(metricKey),
+            tentativeByName.get(metricKey));
+      }
+      return DataflowMetricQueryResults.create(
+          extractor.getCounterResults(),
+          extractor.getDistributionResults(),
+          extractor.getGaugeResults());
+    }
+  }
+
   @AutoValue
   abstract static class DataflowMetricQueryResults implements MetricQueryResults {
     public static MetricQueryResults create(
@@ -231,7 +349,9 @@ class DataflowMetrics extends MetricResults {
     // and the generated constructor is usable and consistent
     public abstract MetricName name();
     public abstract String step();
+    @Nullable
     public abstract T committed();
+    @Nullable
     public abstract T attempted();
 
     public static <T> MetricResult<T> create(MetricName name, String scope,

http://git-wip-us.apache.org/repos/asf/beam/blob/a48eefac/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineJob.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineJob.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineJob.java
index 2d23983..e30d426 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineJob.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineJob.java
@@ -164,6 +164,10 @@ public class DataflowPipelineJob implements PipelineResult {
     return dataflowOptions.getProject();
   }
 
+  public DataflowPipelineOptions getDataflowOptions() {
+    return dataflowOptions;
+  }
+
   /**
    * Returns a new {@link DataflowPipelineJob} for the job that replaced this one, if applicable.
    *

http://git-wip-us.apache.org/repos/asf/beam/blob/a48eefac/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowMetricsTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowMetricsTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowMetricsTest.java
index c3c741c..05fe687 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowMetricsTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowMetricsTest.java
@@ -20,6 +20,7 @@ package org.apache.beam.runners.dataflow;
 import static org.apache.beam.sdk.metrics.MetricResultsMatchers.attemptedMetricsResult;
 import static org.apache.beam.sdk.metrics.MetricResultsMatchers.committedMetricsResult;
 import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.empty;
 import static org.hamcrest.Matchers.is;
@@ -28,6 +29,7 @@ import static org.mockito.Mockito.times;
 import static org.mockito.Mockito.verify;
 import static org.mockito.Mockito.when;
 
+import com.google.api.client.util.ArrayMap;
 import com.google.api.services.dataflow.Dataflow;
 import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.JobMetrics;
@@ -38,9 +40,11 @@ import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import java.io.IOException;
 import java.math.BigDecimal;
+import org.apache.beam.runners.dataflow.options.DataflowPipelineOptions;
 import org.apache.beam.sdk.PipelineResult.State;
 import org.apache.beam.sdk.extensions.gcp.auth.TestCredential;
 import org.apache.beam.sdk.extensions.gcp.storage.NoopPathValidator;
+import org.apache.beam.sdk.metrics.DistributionResult;
 import org.apache.beam.sdk.metrics.MetricQueryResults;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
 import org.apache.beam.sdk.runners.AppliedPTransform;
@@ -95,6 +99,9 @@ public class DataflowMetricsTest {
     modelJob.setCurrentState(State.RUNNING.toString());
 
     DataflowPipelineJob job = mock(DataflowPipelineJob.class);
+    DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
+    when(options.isStreaming()).thenReturn(false);
+    when(job.getDataflowOptions()).thenReturn(options);
     when(job.getState()).thenReturn(State.RUNNING);
     job.jobId = JOB_ID;
 
@@ -115,6 +122,9 @@ public class DataflowMetricsTest {
     modelJob.setCurrentState(State.RUNNING.toString());
 
     DataflowPipelineJob job = mock(DataflowPipelineJob.class);
+    DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
+    when(options.isStreaming()).thenReturn(false);
+    when(job.getDataflowOptions()).thenReturn(options);
     when(job.getState()).thenReturn(State.DONE);
     job.jobId = JOB_ID;
 
@@ -131,11 +141,8 @@ public class DataflowMetricsTest {
     verify(dataflowClient, times(1)).getJobMetrics(JOB_ID);
   }
 
-  private MetricUpdate makeCounterMetricUpdate(String name, String namespace, String step,
-      long scalar, boolean tentative) {
-    MetricUpdate update = new MetricUpdate();
-    update.setScalar(new BigDecimal(scalar));
-
+  private MetricUpdate setStructuredName(MetricUpdate update, String name, String namespace,
+      String step, boolean tentative) {
     MetricStructuredName structuredName = new MetricStructuredName();
     structuredName.setName(name);
     structuredName.setOrigin("user");
@@ -150,10 +157,34 @@ public class DataflowMetricsTest {
     return update;
   }
 
+  private MetricUpdate makeDistributionMetricUpdate(String name, String namespace, String step,
+      Long sum, Long count, Long min, Long max, boolean tentative) {
+    MetricUpdate update = new MetricUpdate();
+    ArrayMap<String, BigDecimal> distribution = ArrayMap.create();
+    distribution.add("count", new BigDecimal(count));
+    distribution.add("mean", new BigDecimal(sum / count));
+    distribution.add("sum", new BigDecimal(sum));
+    distribution.add("min", new BigDecimal(min));
+    distribution.add("max", new BigDecimal(max));
+    update.setDistribution(distribution);
+    return setStructuredName(update, name, namespace, step, tentative);
+  }
+
+  private MetricUpdate makeCounterMetricUpdate(String name, String namespace, String step,
+      long scalar, boolean tentative) {
+    MetricUpdate update = new MetricUpdate();
+    update.setScalar(new BigDecimal(scalar));
+    return setStructuredName(update, name, namespace, step, tentative);
+
+  }
+
   @Test
   public void testSingleCounterUpdates() throws IOException {
     JobMetrics jobMetrics = new JobMetrics();
     DataflowPipelineJob job = mock(DataflowPipelineJob.class);
+    DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
+    when(options.isStreaming()).thenReturn(false);
+    when(job.getDataflowOptions()).thenReturn(options);
     when(job.getState()).thenReturn(State.RUNNING);
     job.jobId = JOB_ID;
 
@@ -179,7 +210,7 @@ public class DataflowMetricsTest {
     DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
     MetricQueryResults result = dataflowMetrics.queryMetrics(null);
     assertThat(result.counters(), containsInAnyOrder(
-        attemptedMetricsResult("counterNamespace", "counterName", "myStepName", 1233L)));
+        attemptedMetricsResult("counterNamespace", "counterName", "myStepName", (Long) null)));
     assertThat(result.counters(), containsInAnyOrder(
         committedMetricsResult("counterNamespace", "counterName", "myStepName", 1234L)));
   }
@@ -190,6 +221,9 @@ public class DataflowMetricsTest {
     DataflowClient dataflowClient = mock(DataflowClient.class);
     when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics);
     DataflowPipelineJob job = mock(DataflowPipelineJob.class);
+    DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
+    when(options.isStreaming()).thenReturn(false);
+    when(job.getDataflowOptions()).thenReturn(options);
     when(job.getState()).thenReturn(State.RUNNING);
     job.jobId = JOB_ID;
 
@@ -202,24 +236,97 @@ public class DataflowMetricsTest {
     // the job metrics results.
     jobMetrics.setMetrics(ImmutableList.of(
         makeCounterMetricUpdate("counterName", "counterNamespace", "s2", 1233L, false),
-        makeCounterMetricUpdate("counterName", "counterNamespace", "s2", 1234L, true),
+        makeCounterMetricUpdate("counterName", "counterNamespace", "s2", 1233L, true),
         makeCounterMetricUpdate("otherCounter[MIN]", "otherNamespace", "s2", 0L, false),
         makeCounterMetricUpdate("otherCounter[MIN]", "otherNamespace", "s2", 0L, true)));
 
     DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
     MetricQueryResults result = dataflowMetrics.queryMetrics(null);
     assertThat(result.counters(), containsInAnyOrder(
-        attemptedMetricsResult("counterNamespace", "counterName", "myStepName", 1234L)));
+        attemptedMetricsResult("counterNamespace", "counterName", "myStepName", (Long) null)));
     assertThat(result.counters(), containsInAnyOrder(
         committedMetricsResult("counterNamespace", "counterName", "myStepName", 1233L)));
   }
 
   @Test
+  public void testDistributionUpdates() throws IOException {
+    JobMetrics jobMetrics = new JobMetrics();
+    DataflowClient dataflowClient = mock(DataflowClient.class);
+    when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics);
+    DataflowPipelineJob job = mock(DataflowPipelineJob.class);
+    DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
+    when(options.isStreaming()).thenReturn(false);
+    when(job.getDataflowOptions()).thenReturn(options);
+    when(job.getState()).thenReturn(State.RUNNING);
+    job.jobId = JOB_ID;
+
+    AppliedPTransform<?, ?, ?> myStep2 = mock(AppliedPTransform.class);
+    when(myStep2.getFullName()).thenReturn("myStepName");
+    job.transformStepNames = HashBiMap.create();
+    job.transformStepNames.put(myStep2, "s2");
+
+    // The parser relies on the fact that one tentative and one committed metric update exist in
+    // the job metrics results.
+    jobMetrics.setMetrics(ImmutableList.of(
+        makeDistributionMetricUpdate("distributionName", "distributionNamespace", "s2",
+            18L, 2L, 2L, 16L, false),
+        makeDistributionMetricUpdate("distributionName", "distributionNamespace", "s2",
+            18L, 2L, 2L, 16L, true)));
+
+    DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
+    MetricQueryResults result = dataflowMetrics.queryMetrics(null);
+    assertThat(result.distributions(), contains(
+        attemptedMetricsResult("distributionNamespace", "distributionName", "myStepName",
+            (DistributionResult) null)));
+    assertThat(result.distributions(), contains(
+        committedMetricsResult("distributionNamespace", "distributionName", "myStepName",
+            DistributionResult.create(18, 2, 2, 16))));
+  }
+
+  @Test
+  public void testDistributionUpdatesStreaming() throws IOException {
+    JobMetrics jobMetrics = new JobMetrics();
+    DataflowClient dataflowClient = mock(DataflowClient.class);
+    when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics);
+    DataflowPipelineJob job = mock(DataflowPipelineJob.class);
+    DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
+    when(options.isStreaming()).thenReturn(true);
+    when(job.getDataflowOptions()).thenReturn(options);
+    when(job.getState()).thenReturn(State.RUNNING);
+    job.jobId = JOB_ID;
+
+    AppliedPTransform<?, ?, ?> myStep2 = mock(AppliedPTransform.class);
+    when(myStep2.getFullName()).thenReturn("myStepName");
+    job.transformStepNames = HashBiMap.create();
+    job.transformStepNames.put(myStep2, "s2");
+
+    // The parser relies on the fact that one tentative and one committed metric update exist in
+    // the job metrics results.
+    jobMetrics.setMetrics(ImmutableList.of(
+        makeDistributionMetricUpdate("distributionName", "distributionNamespace", "s2",
+            18L, 2L, 2L, 16L, false),
+        makeDistributionMetricUpdate("distributionName", "distributionNamespace", "s2",
+            18L, 2L, 2L, 16L, true)));
+
+    DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
+    MetricQueryResults result = dataflowMetrics.queryMetrics(null);
+    assertThat(result.distributions(), contains(
+        committedMetricsResult("distributionNamespace", "distributionName", "myStepName",
+            (DistributionResult) null)));
+    assertThat(result.distributions(), contains(
+        attemptedMetricsResult("distributionNamespace", "distributionName", "myStepName",
+            DistributionResult.create(18, 2, 2, 16))));
+  }
+
+  @Test
   public void testMultipleCounterUpdates() throws IOException {
     JobMetrics jobMetrics = new JobMetrics();
     DataflowClient dataflowClient = mock(DataflowClient.class);
     when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics);
     DataflowPipelineJob job = mock(DataflowPipelineJob.class);
+    DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
+    when(options.isStreaming()).thenReturn(false);
+    when(job.getDataflowOptions()).thenReturn(options);
     when(job.getState()).thenReturn(State.RUNNING);
     job.jobId = JOB_ID;
 
@@ -251,12 +358,57 @@ public class DataflowMetricsTest {
     DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
     MetricQueryResults result = dataflowMetrics.queryMetrics(null);
     assertThat(result.counters(), containsInAnyOrder(
-        attemptedMetricsResult("counterNamespace", "counterName", "myStepName", 1234L),
-        attemptedMetricsResult("otherNamespace", "otherCounter", "myStepName3", 12L),
-        attemptedMetricsResult("otherNamespace", "counterName", "myStepName4", 1233L)));
+        attemptedMetricsResult("counterNamespace", "counterName", "myStepName", (Long) null),
+        attemptedMetricsResult("otherNamespace", "otherCounter", "myStepName3", (Long) null),
+        attemptedMetricsResult("otherNamespace", "counterName", "myStepName4", (Long) null)));
     assertThat(result.counters(), containsInAnyOrder(
         committedMetricsResult("counterNamespace", "counterName", "myStepName", 1233L),
         committedMetricsResult("otherNamespace", "otherCounter", "myStepName3", 12L),
         committedMetricsResult("otherNamespace", "counterName", "myStepName4", 1200L)));
   }
+
+  @Test
+  public void testMultipleCounterUpdatesStreaming() throws IOException {
+    JobMetrics jobMetrics = new JobMetrics();
+    DataflowClient dataflowClient = mock(DataflowClient.class);
+    when(dataflowClient.getJobMetrics(JOB_ID)).thenReturn(jobMetrics);
+    DataflowPipelineJob job = mock(DataflowPipelineJob.class);
+    DataflowPipelineOptions options = mock(DataflowPipelineOptions.class);
+    when(options.isStreaming()).thenReturn(true);
+    when(job.getDataflowOptions()).thenReturn(options);
+    when(job.getState()).thenReturn(State.RUNNING);
+    job.jobId = JOB_ID;
+
+    AppliedPTransform<?, ?, ?> myStep2 = mock(AppliedPTransform.class);
+    when(myStep2.getFullName()).thenReturn("myStepName");
+    job.transformStepNames = HashBiMap.create();
+    job.transformStepNames.put(myStep2, "s2");
+    AppliedPTransform<?, ?, ?> myStep3 = mock(AppliedPTransform.class);
+    when(myStep3.getFullName()).thenReturn("myStepName3");
+    job.transformStepNames.put(myStep3, "s3");
+    AppliedPTransform<?, ?, ?> myStep4 = mock(AppliedPTransform.class);
+    when(myStep4.getFullName()).thenReturn("myStepName4");
+    job.transformStepNames.put(myStep4, "s4");
+
+    // The parser relies on the fact that one tentative and one committed metric update exist in
+    // the job metrics results.
+    jobMetrics.setMetrics(ImmutableList.of(
+        makeCounterMetricUpdate("counterName", "counterNamespace", "s2", 1233L, false),
+        makeCounterMetricUpdate("counterName", "counterNamespace", "s2", 1234L, true),
+        makeCounterMetricUpdate("otherCounter", "otherNamespace", "s3", 12L, false),
+        makeCounterMetricUpdate("otherCounter", "otherNamespace", "s3", 12L, true),
+        makeCounterMetricUpdate("counterName", "otherNamespace", "s4", 1200L, false),
+        makeCounterMetricUpdate("counterName", "otherNamespace", "s4", 1233L, true)));
+
+    DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
+    MetricQueryResults result = dataflowMetrics.queryMetrics(null);
+    assertThat(result.counters(), containsInAnyOrder(
+        committedMetricsResult("counterNamespace", "counterName", "myStepName", (Long) null),
+        committedMetricsResult("otherNamespace", "otherCounter", "myStepName3", (Long) null),
+        committedMetricsResult("otherNamespace", "counterName", "myStepName4", (Long) null)));
+    assertThat(result.counters(), containsInAnyOrder(
+        attemptedMetricsResult("counterNamespace", "counterName", "myStepName", 1233L),
+        attemptedMetricsResult("otherNamespace", "otherCounter", "myStepName3", 12L),
+        attemptedMetricsResult("otherNamespace", "counterName", "myStepName4", 1200L)));
+  }
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/a48eefac/sdks/java/core/src/test/java/org/apache/beam/sdk/metrics/MetricResultsMatchers.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/metrics/MetricResultsMatchers.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/metrics/MetricResultsMatchers.java
index 5031952..030a759 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/metrics/MetricResultsMatchers.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/metrics/MetricResultsMatchers.java
@@ -96,7 +96,7 @@ public class MetricResultsMatchers {
     if (result1 instanceof GaugeResult) {
       return (((GaugeResult) result1).value()) == (((GaugeResult) result2).value());
     } else {
-      return result1.equals(result2);
+      return Objects.equals(result1, result2);
     }
   }
 


[37/50] [abbrv] beam git commit: This closes #3584: [BEAM-2532] add a Serializable TableSchema Supplier in BigQuerySourceBase

Posted by jb...@apache.org.
This closes #3584: [BEAM-2532] add a Serializable TableSchema Supplier in BigQuerySourceBase


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7fde976d
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7fde976d
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7fde976d

Branch: refs/heads/DSL_SQL
Commit: 7fde976d14fe697dd88d2b161540c73d5cb01517
Parents: d510175 e86c004
Author: Eugene Kirpichov <ki...@google.com>
Authored: Tue Jul 18 22:33:58 2017 -0700
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Jul 18 22:33:58 2017 -0700

----------------------------------------------------------------------
 .../sdk/io/gcp/bigquery/BigQuerySourceBase.java  | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)
----------------------------------------------------------------------



[06/50] [abbrv] beam git commit: Fix split package in SDK harness

Posted by jb...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java
deleted file mode 100644
index b3cf3a7..0000000
--- a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/FnApiDoFnRunner.java
+++ /dev/null
@@ -1,547 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.core;
-
-import static com.google.common.base.Preconditions.checkArgument;
-
-import com.google.auto.service.AutoService;
-import com.google.common.collect.Collections2;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.ImmutableMultimap;
-import com.google.common.collect.Multimap;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.BytesValue;
-import com.google.protobuf.InvalidProtocolBufferException;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.Objects;
-import java.util.function.Consumer;
-import java.util.function.Supplier;
-import org.apache.beam.fn.harness.data.BeamFnDataClient;
-import org.apache.beam.fn.harness.fn.ThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.runners.core.construction.ParDoTranslation;
-import org.apache.beam.runners.dataflow.util.DoFnInfo;
-import org.apache.beam.sdk.common.runner.v1.RunnerApi;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.state.State;
-import org.apache.beam.sdk.state.TimeDomain;
-import org.apache.beam.sdk.state.Timer;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.DoFn.OnTimerContext;
-import org.apache.beam.sdk.transforms.DoFn.ProcessContext;
-import org.apache.beam.sdk.transforms.reflect.DoFnInvoker;
-import org.apache.beam.sdk.transforms.reflect.DoFnInvokers;
-import org.apache.beam.sdk.transforms.splittabledofn.RestrictionTracker;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.PaneInfo;
-import org.apache.beam.sdk.util.SerializableUtils;
-import org.apache.beam.sdk.util.UserCodeException;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.PCollectionView;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.sdk.values.WindowingStrategy;
-import org.joda.time.Instant;
-
-/**
- * A {@link DoFnRunner} specific to integrating with the Fn Api. This is to remove the layers
- * of abstraction caused by StateInternals/TimerInternals since they model state and timer
- * concepts differently.
- */
-public class FnApiDoFnRunner<InputT, OutputT> implements DoFnRunner<InputT, OutputT> {
-  /**
-   * A registrar which provides a factory to handle Java {@link DoFn}s.
-   */
-  @AutoService(PTransformRunnerFactory.Registrar.class)
-  public static class Registrar implements
-      PTransformRunnerFactory.Registrar {
-
-    @Override
-    public Map<String, PTransformRunnerFactory> getPTransformRunnerFactories() {
-      return ImmutableMap.of(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN, new Factory());
-    }
-  }
-
-  /** A factory for {@link FnApiDoFnRunner}. */
-  static class Factory<InputT, OutputT>
-      implements PTransformRunnerFactory<DoFnRunner<InputT, OutputT>> {
-
-    @Override
-    public DoFnRunner<InputT, OutputT> createRunnerForPTransform(
-        PipelineOptions pipelineOptions,
-        BeamFnDataClient beamFnDataClient,
-        String pTransformId,
-        RunnerApi.PTransform pTransform,
-        Supplier<String> processBundleInstructionId,
-        Map<String, RunnerApi.PCollection> pCollections,
-        Map<String, RunnerApi.Coder> coders,
-        Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
-        Consumer<ThrowingRunnable> addStartFunction,
-        Consumer<ThrowingRunnable> addFinishFunction) {
-
-      // For every output PCollection, create a map from output name to Consumer
-      ImmutableMap.Builder<String, Collection<ThrowingConsumer<WindowedValue<?>>>>
-          outputMapBuilder = ImmutableMap.builder();
-      for (Map.Entry<String, String> entry : pTransform.getOutputsMap().entrySet()) {
-        outputMapBuilder.put(
-            entry.getKey(),
-            pCollectionIdsToConsumers.get(entry.getValue()));
-      }
-      ImmutableMap<String, Collection<ThrowingConsumer<WindowedValue<?>>>> outputMap =
-          outputMapBuilder.build();
-
-      // Get the DoFnInfo from the serialized blob.
-      ByteString serializedFn;
-      try {
-        serializedFn = pTransform.getSpec().getParameter().unpack(BytesValue.class).getValue();
-      } catch (InvalidProtocolBufferException e) {
-        throw new IllegalArgumentException(
-            String.format("Unable to unwrap DoFn %s", pTransform.getSpec()), e);
-      }
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      DoFnInfo<InputT, OutputT> doFnInfo = (DoFnInfo) SerializableUtils.deserializeFromByteArray(
-          serializedFn.toByteArray(), "DoFnInfo");
-
-      // Verify that the DoFnInfo tag to output map matches the output map on the PTransform.
-      checkArgument(
-          Objects.equals(
-              new HashSet<>(Collections2.transform(outputMap.keySet(), Long::parseLong)),
-              doFnInfo.getOutputMap().keySet()),
-          "Unexpected mismatch between transform output map %s and DoFnInfo output map %s.",
-          outputMap.keySet(),
-          doFnInfo.getOutputMap());
-
-      ImmutableMultimap.Builder<TupleTag<?>,
-          ThrowingConsumer<WindowedValue<?>>> tagToOutputMapBuilder =
-          ImmutableMultimap.builder();
-      for (Map.Entry<Long, TupleTag<?>> entry : doFnInfo.getOutputMap().entrySet()) {
-        @SuppressWarnings({"unchecked", "rawtypes"})
-        Collection<ThrowingConsumer<WindowedValue<?>>> consumers =
-            outputMap.get(Long.toString(entry.getKey()));
-        tagToOutputMapBuilder.putAll(entry.getValue(), consumers);
-      }
-
-      ImmutableMultimap<TupleTag<?>, ThrowingConsumer<WindowedValue<?>>> tagToOutputMap =
-          tagToOutputMapBuilder.build();
-
-      @SuppressWarnings({"unchecked", "rawtypes"})
-      DoFnRunner<InputT, OutputT> runner = new FnApiDoFnRunner<>(
-          pipelineOptions,
-          doFnInfo.getDoFn(),
-          (Collection<ThrowingConsumer<WindowedValue<OutputT>>>) (Collection)
-              tagToOutputMap.get(doFnInfo.getOutputMap().get(doFnInfo.getMainOutput())),
-          tagToOutputMap,
-          doFnInfo.getWindowingStrategy());
-
-      // Register the appropriate handlers.
-      addStartFunction.accept(runner::startBundle);
-      for (String pcollectionId : pTransform.getInputsMap().values()) {
-        pCollectionIdsToConsumers.put(
-            pcollectionId,
-            (ThrowingConsumer) (ThrowingConsumer<WindowedValue<InputT>>) runner::processElement);
-      }
-      addFinishFunction.accept(runner::finishBundle);
-      return runner;
-    }
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////////
-
-  private final PipelineOptions pipelineOptions;
-  private final DoFn<InputT, OutputT> doFn;
-  private final Collection<ThrowingConsumer<WindowedValue<OutputT>>> mainOutputConsumers;
-  private final Multimap<TupleTag<?>, ThrowingConsumer<WindowedValue<?>>> outputMap;
-  private final DoFnInvoker<InputT, OutputT> doFnInvoker;
-  private final StartBundleContext startBundleContext;
-  private final ProcessBundleContext processBundleContext;
-  private final FinishBundleContext finishBundleContext;
-
-  /**
-   * The lifetime of this member is only valid during {@link #processElement(WindowedValue)}.
-   */
-  private WindowedValue<InputT> currentElement;
-
-  /**
-   * The lifetime of this member is only valid during {@link #processElement(WindowedValue)}.
-   */
-  private BoundedWindow currentWindow;
-
-  FnApiDoFnRunner(
-      PipelineOptions pipelineOptions,
-      DoFn<InputT, OutputT> doFn,
-      Collection<ThrowingConsumer<WindowedValue<OutputT>>> mainOutputConsumers,
-      Multimap<TupleTag<?>, ThrowingConsumer<WindowedValue<?>>> outputMap,
-      WindowingStrategy windowingStrategy) {
-    this.pipelineOptions = pipelineOptions;
-    this.doFn = doFn;
-    this.mainOutputConsumers = mainOutputConsumers;
-    this.outputMap = outputMap;
-    this.doFnInvoker = DoFnInvokers.invokerFor(doFn);
-    this.startBundleContext = new StartBundleContext();
-    this.processBundleContext = new ProcessBundleContext();
-    this.finishBundleContext = new FinishBundleContext();
-  }
-
-  @Override
-  public void startBundle() {
-    doFnInvoker.invokeStartBundle(startBundleContext);
-  }
-
-  @Override
-  public void processElement(WindowedValue<InputT> elem) {
-    currentElement = elem;
-    try {
-      Iterator<BoundedWindow> windowIterator =
-          (Iterator<BoundedWindow>) elem.getWindows().iterator();
-      while (windowIterator.hasNext()) {
-        currentWindow = windowIterator.next();
-        doFnInvoker.invokeProcessElement(processBundleContext);
-      }
-    } finally {
-      currentElement = null;
-      currentWindow = null;
-    }
-  }
-
-  @Override
-  public void onTimer(
-      String timerId,
-      BoundedWindow window,
-      Instant timestamp,
-      TimeDomain timeDomain) {
-    throw new UnsupportedOperationException("TODO: Add support for timers");
-  }
-
-  @Override
-  public void finishBundle() {
-    doFnInvoker.invokeFinishBundle(finishBundleContext);
-  }
-
-  /**
-   * Outputs the given element to the specified set of consumers wrapping any exceptions.
-   */
-  private <T> void outputTo(
-      Collection<ThrowingConsumer<WindowedValue<T>>> consumers,
-      WindowedValue<T> output) {
-    Iterator<ThrowingConsumer<WindowedValue<T>>> consumerIterator;
-    try {
-      for (ThrowingConsumer<WindowedValue<T>> consumer : consumers) {
-        consumer.accept(output);
-      }
-    } catch (Throwable t) {
-      throw UserCodeException.wrap(t);
-    }
-  }
-
-  /**
-   * Provides arguments for a {@link DoFnInvoker} for {@link DoFn.StartBundle @StartBundle}.
-   */
-  private class StartBundleContext
-      extends DoFn<InputT, OutputT>.StartBundleContext
-      implements DoFnInvoker.ArgumentProvider<InputT, OutputT> {
-
-    private StartBundleContext() {
-      doFn.super();
-    }
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return pipelineOptions;
-    }
-
-    @Override
-    public PipelineOptions pipelineOptions() {
-      return pipelineOptions;
-    }
-
-    @Override
-    public BoundedWindow window() {
-      throw new UnsupportedOperationException(
-          "Cannot access window outside of @ProcessElement and @OnTimer methods.");
-    }
-
-    @Override
-    public DoFn<InputT, OutputT>.StartBundleContext startBundleContext(
-        DoFn<InputT, OutputT> doFn) {
-      return this;
-    }
-
-    @Override
-    public DoFn<InputT, OutputT>.FinishBundleContext finishBundleContext(
-        DoFn<InputT, OutputT> doFn) {
-      throw new UnsupportedOperationException(
-          "Cannot access FinishBundleContext outside of @FinishBundle method.");
-    }
-
-    @Override
-    public DoFn<InputT, OutputT>.ProcessContext processContext(DoFn<InputT, OutputT> doFn) {
-      throw new UnsupportedOperationException(
-          "Cannot access ProcessContext outside of @ProcessElement method.");
-    }
-
-    @Override
-    public DoFn<InputT, OutputT>.OnTimerContext onTimerContext(DoFn<InputT, OutputT> doFn) {
-      throw new UnsupportedOperationException(
-          "Cannot access OnTimerContext outside of @OnTimer methods.");
-    }
-
-    @Override
-    public RestrictionTracker<?> restrictionTracker() {
-      throw new UnsupportedOperationException(
-          "Cannot access RestrictionTracker outside of @ProcessElement method.");
-    }
-
-    @Override
-    public State state(String stateId) {
-      throw new UnsupportedOperationException(
-          "Cannot access state outside of @ProcessElement and @OnTimer methods.");
-    }
-
-    @Override
-    public Timer timer(String timerId) {
-      throw new UnsupportedOperationException(
-          "Cannot access timers outside of @ProcessElement and @OnTimer methods.");
-    }
-  }
-
-  /**
-   * Provides arguments for a {@link DoFnInvoker} for {@link DoFn.ProcessElement @ProcessElement}.
-   */
-  private class ProcessBundleContext
-      extends DoFn<InputT, OutputT>.ProcessContext
-      implements DoFnInvoker.ArgumentProvider<InputT, OutputT> {
-
-    private ProcessBundleContext() {
-      doFn.super();
-    }
-
-    @Override
-    public BoundedWindow window() {
-      return currentWindow;
-    }
-
-    @Override
-    public DoFn.StartBundleContext startBundleContext(DoFn<InputT, OutputT> doFn) {
-      throw new UnsupportedOperationException(
-          "Cannot access StartBundleContext outside of @StartBundle method.");
-    }
-
-    @Override
-    public DoFn.FinishBundleContext finishBundleContext(DoFn<InputT, OutputT> doFn) {
-      throw new UnsupportedOperationException(
-          "Cannot access FinishBundleContext outside of @FinishBundle method.");
-    }
-
-    @Override
-    public ProcessContext processContext(DoFn<InputT, OutputT> doFn) {
-      return this;
-    }
-
-    @Override
-    public OnTimerContext onTimerContext(DoFn<InputT, OutputT> doFn) {
-      throw new UnsupportedOperationException("TODO: Add support for timers");
-    }
-
-    @Override
-    public RestrictionTracker<?> restrictionTracker() {
-      throw new UnsupportedOperationException("TODO: Add support for SplittableDoFn");
-    }
-
-    @Override
-    public State state(String stateId) {
-      throw new UnsupportedOperationException("TODO: Add support for state");
-    }
-
-    @Override
-    public Timer timer(String timerId) {
-      throw new UnsupportedOperationException("TODO: Add support for timers");
-    }
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return pipelineOptions;
-    }
-
-    @Override
-    public PipelineOptions pipelineOptions() {
-      return pipelineOptions;
-    }
-
-    @Override
-    public void output(OutputT output) {
-      outputTo(mainOutputConsumers,
-          WindowedValue.of(
-              output,
-              currentElement.getTimestamp(),
-              currentWindow,
-              currentElement.getPane()));
-    }
-
-    @Override
-    public void outputWithTimestamp(OutputT output, Instant timestamp) {
-      outputTo(mainOutputConsumers,
-          WindowedValue.of(
-              output,
-              timestamp,
-              currentWindow,
-              currentElement.getPane()));
-    }
-
-    @Override
-    public <T> void output(TupleTag<T> tag, T output) {
-      Collection<ThrowingConsumer<WindowedValue<T>>> consumers = (Collection) outputMap.get(tag);
-      if (consumers == null) {
-        throw new IllegalArgumentException(String.format("Unknown output tag %s", tag));
-      }
-      outputTo(consumers,
-          WindowedValue.of(
-              output,
-              currentElement.getTimestamp(),
-              currentWindow,
-              currentElement.getPane()));
-    }
-
-    @Override
-    public <T> void outputWithTimestamp(TupleTag<T> tag, T output, Instant timestamp) {
-      Collection<ThrowingConsumer<WindowedValue<T>>> consumers = (Collection) outputMap.get(tag);
-      if (consumers == null) {
-        throw new IllegalArgumentException(String.format("Unknown output tag %s", tag));
-      }
-      outputTo(consumers,
-          WindowedValue.of(
-              output,
-              timestamp,
-              currentWindow,
-              currentElement.getPane()));
-    }
-
-    @Override
-    public InputT element() {
-      return currentElement.getValue();
-    }
-
-    @Override
-    public <T> T sideInput(PCollectionView<T> view) {
-      throw new UnsupportedOperationException("TODO: Support side inputs");
-    }
-
-    @Override
-    public Instant timestamp() {
-      return currentElement.getTimestamp();
-    }
-
-    @Override
-    public PaneInfo pane() {
-      return currentElement.getPane();
-    }
-
-    @Override
-    public void updateWatermark(Instant watermark) {
-      throw new UnsupportedOperationException("TODO: Add support for SplittableDoFn");
-    }
-  }
-
-  /**
-   * Provides arguments for a {@link DoFnInvoker} for {@link DoFn.FinishBundle @FinishBundle}.
-   */
-  private class FinishBundleContext
-      extends DoFn<InputT, OutputT>.FinishBundleContext
-      implements DoFnInvoker.ArgumentProvider<InputT, OutputT> {
-
-    private FinishBundleContext() {
-      doFn.super();
-    }
-
-    @Override
-    public PipelineOptions getPipelineOptions() {
-      return pipelineOptions;
-    }
-
-    @Override
-    public PipelineOptions pipelineOptions() {
-      return pipelineOptions;
-    }
-
-    @Override
-    public BoundedWindow window() {
-      throw new UnsupportedOperationException(
-          "Cannot access window outside of @ProcessElement and @OnTimer methods.");
-    }
-
-    @Override
-    public DoFn<InputT, OutputT>.StartBundleContext startBundleContext(
-        DoFn<InputT, OutputT> doFn) {
-      throw new UnsupportedOperationException(
-          "Cannot access StartBundleContext outside of @StartBundle method.");
-    }
-
-    @Override
-    public DoFn<InputT, OutputT>.FinishBundleContext finishBundleContext(
-        DoFn<InputT, OutputT> doFn) {
-      return this;
-    }
-
-    @Override
-    public DoFn<InputT, OutputT>.ProcessContext processContext(DoFn<InputT, OutputT> doFn) {
-      throw new UnsupportedOperationException(
-          "Cannot access ProcessContext outside of @ProcessElement method.");
-    }
-
-    @Override
-    public DoFn<InputT, OutputT>.OnTimerContext onTimerContext(DoFn<InputT, OutputT> doFn) {
-      throw new UnsupportedOperationException(
-          "Cannot access OnTimerContext outside of @OnTimer methods.");
-    }
-
-    @Override
-    public RestrictionTracker<?> restrictionTracker() {
-      throw new UnsupportedOperationException(
-          "Cannot access RestrictionTracker outside of @ProcessElement method.");
-    }
-
-    @Override
-    public State state(String stateId) {
-      throw new UnsupportedOperationException(
-          "Cannot access state outside of @ProcessElement and @OnTimer methods.");
-    }
-
-    @Override
-    public Timer timer(String timerId) {
-      throw new UnsupportedOperationException(
-          "Cannot access timers outside of @ProcessElement and @OnTimer methods.");
-    }
-
-    @Override
-    public void output(OutputT output, Instant timestamp, BoundedWindow window) {
-      outputTo(mainOutputConsumers,
-          WindowedValue.of(output, timestamp, window, PaneInfo.NO_FIRING));
-    }
-
-    @Override
-    public <T> void output(TupleTag<T> tag, T output, Instant timestamp, BoundedWindow window) {
-      Collection<ThrowingConsumer<WindowedValue<T>>> consumers = (Collection) outputMap.get(tag);
-      if (consumers == null) {
-        throw new IllegalArgumentException(String.format("Unknown output tag %s", tag));
-      }
-      outputTo(consumers,
-          WindowedValue.of(output, timestamp, window, PaneInfo.NO_FIRING));
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/runners/core/PTransformRunnerFactory.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/PTransformRunnerFactory.java b/sdks/java/harness/src/main/java/org/apache/beam/runners/core/PTransformRunnerFactory.java
deleted file mode 100644
index b325db4..0000000
--- a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/PTransformRunnerFactory.java
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.beam.runners.core;
-
-import com.google.common.collect.Multimap;
-import java.io.IOException;
-import java.util.Map;
-import java.util.function.Consumer;
-import java.util.function.Supplier;
-import org.apache.beam.fn.harness.data.BeamFnDataClient;
-import org.apache.beam.fn.harness.fn.ThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.sdk.common.runner.v1.RunnerApi;
-import org.apache.beam.sdk.options.PipelineOptions;
-import org.apache.beam.sdk.util.WindowedValue;
-
-/**
- * A factory able to instantiate an appropriate handler for a given PTransform.
- */
-public interface PTransformRunnerFactory<T> {
-
-  /**
-   * Creates and returns a handler for a given PTransform. Note that the handler must support
-   * processing multiple bundles. The handler will be discarded if an error is thrown during
-   * element processing, or during execution of start/finish.
-   *
-   * @param pipelineOptions Pipeline options
-   * @param beamFnDataClient
-   * @param pTransformId The id of the PTransform.
-   * @param pTransform The PTransform definition.
-   * @param processBundleInstructionId A supplier containing the active process bundle instruction
-   * id.
-   * @param pCollections A mapping from PCollection id to PCollection definition.
-   * @param coders A mapping from coder id to coder definition.
-   * @param pCollectionIdsToConsumers A mapping from PCollection id to a collection of consumers.
-   * Note that if this handler is a consumer, it should register itself within this multimap under
-   * the appropriate PCollection ids. Also note that all output consumers needed by this PTransform
-   * (based on the values of the {@link RunnerApi.PTransform#getOutputsMap()} will have already
-   * registered within this multimap.
-   * @param addStartFunction A consumer to register a start bundle handler with.
-   * @param addFinishFunction A consumer to register a finish bundle handler with.
-   */
-  T createRunnerForPTransform(
-      PipelineOptions pipelineOptions,
-      BeamFnDataClient beamFnDataClient,
-      String pTransformId,
-      RunnerApi.PTransform pTransform,
-      Supplier<String> processBundleInstructionId,
-      Map<String, RunnerApi.PCollection> pCollections,
-      Map<String, RunnerApi.Coder> coders,
-      Multimap<String, ThrowingConsumer<WindowedValue<?>>> pCollectionIdsToConsumers,
-      Consumer<ThrowingRunnable> addStartFunction,
-      Consumer<ThrowingRunnable> addFinishFunction) throws IOException;
-
-  /**
-   * A registrar which can return a mapping from {@link RunnerApi.FunctionSpec#getUrn()} to
-   * a factory capable of instantiating an appropriate handler.
-   */
-  interface Registrar {
-    /**
-     * Returns a mapping from {@link RunnerApi.FunctionSpec#getUrn()} to a factory capable of
-     * instantiating an appropriate handler.
-     */
-    Map<String, PTransformRunnerFactory> getPTransformRunnerFactories();
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/main/java/org/apache/beam/runners/core/package-info.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/package-info.java b/sdks/java/harness/src/main/java/org/apache/beam/runners/core/package-info.java
deleted file mode 100644
index d250a6a..0000000
--- a/sdks/java/harness/src/main/java/org/apache/beam/runners/core/package-info.java
+++ /dev/null
@@ -1,22 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Provides utilities for Beam runner authors.
- */
-package org.apache.beam.runners.core;

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BeamFnDataReadRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BeamFnDataReadRunnerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BeamFnDataReadRunnerTest.java
new file mode 100644
index 0000000..a7c6666
--- /dev/null
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BeamFnDataReadRunnerTest.java
@@ -0,0 +1,281 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.fn.harness;
+
+import static org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.verifyZeroInteractions;
+import static org.mockito.Mockito.when;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Suppliers;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Multimap;
+import com.google.common.util.concurrent.Uninterruptibles;
+import com.google.protobuf.Any;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.BytesValue;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ServiceLoader;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicReference;
+import org.apache.beam.fn.harness.PTransformRunnerFactory.Registrar;
+import org.apache.beam.fn.harness.data.BeamFnDataClient;
+import org.apache.beam.fn.harness.fn.ThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingRunnable;
+import org.apache.beam.fn.harness.test.TestExecutors;
+import org.apache.beam.fn.harness.test.TestExecutors.TestExecutorService;
+import org.apache.beam.fn.v1.BeamFnApi;
+import org.apache.beam.runners.dataflow.util.CloudObjects;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.KV;
+import org.hamcrest.collection.IsMapContaining;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Captor;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+/** Tests for {@link BeamFnDataReadRunner}. */
+@RunWith(JUnit4.class)
+public class BeamFnDataReadRunnerTest {
+
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+  private static final BeamFnApi.RemoteGrpcPort PORT_SPEC = BeamFnApi.RemoteGrpcPort.newBuilder()
+      .setApiServiceDescriptor(BeamFnApi.ApiServiceDescriptor.getDefaultInstance()).build();
+  private static final RunnerApi.FunctionSpec FUNCTION_SPEC = RunnerApi.FunctionSpec.newBuilder()
+      .setParameter(Any.pack(PORT_SPEC)).build();
+  private static final Coder<WindowedValue<String>> CODER =
+      WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE);
+  private static final String CODER_SPEC_ID = "string-coder-id";
+  private static final RunnerApi.Coder CODER_SPEC;
+  private static final String URN = "urn:org.apache.beam:source:runner:0.1";
+
+  static {
+    try {
+      CODER_SPEC = RunnerApi.Coder.newBuilder().setSpec(
+          RunnerApi.SdkFunctionSpec.newBuilder().setSpec(
+              RunnerApi.FunctionSpec.newBuilder().setParameter(
+                  Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(
+                      OBJECT_MAPPER.writeValueAsBytes(CloudObjects.asCloudObject(CODER))))
+                      .build()))
+                  .build())
+              .build())
+          .build();
+    } catch (IOException e) {
+      throw new ExceptionInInitializerError(e);
+    }
+  }
+  private static final BeamFnApi.Target INPUT_TARGET = BeamFnApi.Target.newBuilder()
+      .setPrimitiveTransformReference("1")
+      .setName("out")
+      .build();
+
+  @Rule public TestExecutorService executor = TestExecutors.from(Executors::newCachedThreadPool);
+  @Mock private BeamFnDataClient mockBeamFnDataClient;
+  @Captor private ArgumentCaptor<ThrowingConsumer<WindowedValue<String>>> consumerCaptor;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+  @Test
+  public void testCreatingAndProcessingBeamFnDataReadRunner() throws Exception {
+    String bundleId = "57";
+    String outputId = "101";
+
+    List<WindowedValue<String>> outputValues = new ArrayList<>();
+
+    Multimap<String, ThrowingConsumer<WindowedValue<?>>> consumers = HashMultimap.create();
+    consumers.put("outputPC",
+        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) outputValues::add);
+    List<ThrowingRunnable> startFunctions = new ArrayList<>();
+    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
+
+    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder()
+        .setUrn("urn:org.apache.beam:source:runner:0.1")
+        .setParameter(Any.pack(PORT_SPEC))
+        .build();
+
+    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder()
+        .setSpec(functionSpec)
+        .putOutputs(outputId, "outputPC")
+        .build();
+
+    new BeamFnDataReadRunner.Factory<String>().createRunnerForPTransform(
+        PipelineOptionsFactory.create(),
+        mockBeamFnDataClient,
+        "pTransformId",
+        pTransform,
+        Suppliers.ofInstance(bundleId)::get,
+        ImmutableMap.of("outputPC",
+            RunnerApi.PCollection.newBuilder().setCoderId(CODER_SPEC_ID).build()),
+        ImmutableMap.of(CODER_SPEC_ID, CODER_SPEC),
+        consumers,
+        startFunctions::add,
+        finishFunctions::add);
+
+    verifyZeroInteractions(mockBeamFnDataClient);
+
+    CompletableFuture<Void> completionFuture = new CompletableFuture<>();
+    when(mockBeamFnDataClient.forInboundConsumer(any(), any(), any(), any()))
+        .thenReturn(completionFuture);
+    Iterables.getOnlyElement(startFunctions).run();
+    verify(mockBeamFnDataClient).forInboundConsumer(
+        eq(PORT_SPEC.getApiServiceDescriptor()),
+        eq(KV.of(bundleId, BeamFnApi.Target.newBuilder()
+            .setPrimitiveTransformReference("pTransformId")
+            .setName(outputId)
+            .build())),
+        eq(CODER),
+        consumerCaptor.capture());
+
+    consumerCaptor.getValue().accept(valueInGlobalWindow("TestValue"));
+    assertThat(outputValues, contains(valueInGlobalWindow("TestValue")));
+    outputValues.clear();
+
+    assertThat(consumers.keySet(), containsInAnyOrder("outputPC"));
+
+    completionFuture.complete(null);
+    Iterables.getOnlyElement(finishFunctions).run();
+
+    verifyNoMoreInteractions(mockBeamFnDataClient);
+  }
+
+  @Test
+  public void testReuseForMultipleBundles() throws Exception {
+    CompletableFuture<Void> bundle1Future = new CompletableFuture<>();
+    CompletableFuture<Void> bundle2Future = new CompletableFuture<>();
+    when(mockBeamFnDataClient.forInboundConsumer(
+        any(),
+        any(),
+        any(),
+        any())).thenReturn(bundle1Future).thenReturn(bundle2Future);
+    List<WindowedValue<String>> valuesA = new ArrayList<>();
+    List<WindowedValue<String>> valuesB = new ArrayList<>();
+
+    AtomicReference<String> bundleId = new AtomicReference<>("0");
+    BeamFnDataReadRunner<String> readRunner = new BeamFnDataReadRunner<>(
+        FUNCTION_SPEC,
+        bundleId::get,
+        INPUT_TARGET,
+        CODER_SPEC,
+        mockBeamFnDataClient,
+        ImmutableList.of(valuesA::add, valuesB::add));
+
+    // Process for bundle id 0
+    readRunner.registerInputLocation();
+
+    verify(mockBeamFnDataClient).forInboundConsumer(
+        eq(PORT_SPEC.getApiServiceDescriptor()),
+        eq(KV.of(bundleId.get(), INPUT_TARGET)),
+        eq(CODER),
+        consumerCaptor.capture());
+
+    executor.submit(new Runnable() {
+      @Override
+      public void run() {
+        // Sleep for some small amount of time simulating the parent blocking
+        Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
+        try {
+          consumerCaptor.getValue().accept(valueInGlobalWindow("ABC"));
+          consumerCaptor.getValue().accept(valueInGlobalWindow("DEF"));
+        } catch (Exception e) {
+          bundle1Future.completeExceptionally(e);
+        } finally {
+          bundle1Future.complete(null);
+        }
+      }
+    });
+
+    readRunner.blockTillReadFinishes();
+    assertThat(valuesA, contains(valueInGlobalWindow("ABC"), valueInGlobalWindow("DEF")));
+    assertThat(valuesB, contains(valueInGlobalWindow("ABC"), valueInGlobalWindow("DEF")));
+
+    // Process for bundle id 1
+    bundleId.set("1");
+    valuesA.clear();
+    valuesB.clear();
+    readRunner.registerInputLocation();
+
+    verify(mockBeamFnDataClient).forInboundConsumer(
+        eq(PORT_SPEC.getApiServiceDescriptor()),
+        eq(KV.of(bundleId.get(), INPUT_TARGET)),
+        eq(CODER),
+        consumerCaptor.capture());
+
+    executor.submit(new Runnable() {
+      @Override
+      public void run() {
+        // Sleep for some small amount of time simulating the parent blocking
+        Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
+        try {
+          consumerCaptor.getValue().accept(valueInGlobalWindow("GHI"));
+          consumerCaptor.getValue().accept(valueInGlobalWindow("JKL"));
+        } catch (Exception e) {
+          bundle2Future.completeExceptionally(e);
+        } finally {
+          bundle2Future.complete(null);
+        }
+      }
+    });
+
+    readRunner.blockTillReadFinishes();
+    assertThat(valuesA, contains(valueInGlobalWindow("GHI"), valueInGlobalWindow("JKL")));
+    assertThat(valuesB, contains(valueInGlobalWindow("GHI"), valueInGlobalWindow("JKL")));
+
+    verifyNoMoreInteractions(mockBeamFnDataClient);
+  }
+
+  @Test
+  public void testRegistration() {
+    for (Registrar registrar :
+        ServiceLoader.load(Registrar.class)) {
+      if (registrar instanceof BeamFnDataReadRunner.Registrar) {
+        assertThat(registrar.getPTransformRunnerFactories(), IsMapContaining.hasKey(URN));
+        return;
+      }
+    }
+    fail("Expected registrar not found.");
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BeamFnDataWriteRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BeamFnDataWriteRunnerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BeamFnDataWriteRunnerTest.java
new file mode 100644
index 0000000..28838b1
--- /dev/null
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BeamFnDataWriteRunnerTest.java
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.fn.harness;
+
+import static org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+import static org.mockito.Matchers.any;
+import static org.mockito.Matchers.eq;
+import static org.mockito.Mockito.verify;
+import static org.mockito.Mockito.verifyNoMoreInteractions;
+import static org.mockito.Mockito.verifyZeroInteractions;
+import static org.mockito.Mockito.when;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Suppliers;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Multimap;
+import com.google.protobuf.Any;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.BytesValue;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.ServiceLoader;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicReference;
+import org.apache.beam.fn.harness.PTransformRunnerFactory.Registrar;
+import org.apache.beam.fn.harness.data.BeamFnDataClient;
+import org.apache.beam.fn.harness.fn.CloseableThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingRunnable;
+import org.apache.beam.fn.v1.BeamFnApi;
+import org.apache.beam.runners.dataflow.util.CloudObjects;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.KV;
+import org.hamcrest.collection.IsMapContaining;
+import org.junit.Before;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+import org.mockito.Matchers;
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+
+/** Tests for {@link BeamFnDataWriteRunner}. */
+@RunWith(JUnit4.class)
+public class BeamFnDataWriteRunnerTest {
+
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+  private static final BeamFnApi.RemoteGrpcPort PORT_SPEC = BeamFnApi.RemoteGrpcPort.newBuilder()
+      .setApiServiceDescriptor(BeamFnApi.ApiServiceDescriptor.getDefaultInstance()).build();
+  private static final RunnerApi.FunctionSpec FUNCTION_SPEC = RunnerApi.FunctionSpec.newBuilder()
+      .setParameter(Any.pack(PORT_SPEC)).build();
+  private static final String CODER_ID = "string-coder-id";
+  private static final Coder<WindowedValue<String>> CODER =
+      WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE);
+  private static final RunnerApi.Coder CODER_SPEC;
+  private static final String URN = "urn:org.apache.beam:sink:runner:0.1";
+
+  static {
+    try {
+      CODER_SPEC = RunnerApi.Coder.newBuilder().setSpec(
+          RunnerApi.SdkFunctionSpec.newBuilder().setSpec(
+              RunnerApi.FunctionSpec.newBuilder().setParameter(
+                  Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(
+                      OBJECT_MAPPER.writeValueAsBytes(CloudObjects.asCloudObject(CODER))))
+                      .build()))
+                  .build())
+              .build())
+          .build();
+    } catch (IOException e) {
+      throw new ExceptionInInitializerError(e);
+    }
+  }
+  private static final BeamFnApi.Target OUTPUT_TARGET = BeamFnApi.Target.newBuilder()
+      .setPrimitiveTransformReference("1")
+      .setName("out")
+      .build();
+
+  @Mock private BeamFnDataClient mockBeamFnDataClient;
+
+  @Before
+  public void setUp() {
+    MockitoAnnotations.initMocks(this);
+  }
+
+
+  @Test
+  public void testCreatingAndProcessingBeamFnDataWriteRunner() throws Exception {
+    String bundleId = "57L";
+    String inputId = "100L";
+
+    Multimap<String, ThrowingConsumer<WindowedValue<?>>> consumers = HashMultimap.create();
+    List<ThrowingRunnable> startFunctions = new ArrayList<>();
+    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
+
+    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder()
+        .setUrn("urn:org.apache.beam:sink:runner:0.1")
+        .setParameter(Any.pack(PORT_SPEC))
+        .build();
+
+    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder()
+        .setSpec(functionSpec)
+        .putInputs(inputId, "inputPC")
+        .build();
+
+    new BeamFnDataWriteRunner.Factory<String>().createRunnerForPTransform(
+        PipelineOptionsFactory.create(),
+        mockBeamFnDataClient,
+        "ptransformId",
+        pTransform,
+        Suppliers.ofInstance(bundleId)::get,
+        ImmutableMap.of("inputPC",
+            RunnerApi.PCollection.newBuilder().setCoderId(CODER_ID).build()),
+        ImmutableMap.of(CODER_ID, CODER_SPEC),
+        consumers,
+        startFunctions::add,
+        finishFunctions::add);
+
+    verifyZeroInteractions(mockBeamFnDataClient);
+
+    List<WindowedValue<String>> outputValues = new ArrayList<>();
+    AtomicBoolean wasCloseCalled = new AtomicBoolean();
+    CloseableThrowingConsumer<WindowedValue<String>> outputConsumer =
+        new CloseableThrowingConsumer<WindowedValue<String>>(){
+          @Override
+          public void close() throws Exception {
+            wasCloseCalled.set(true);
+          }
+
+          @Override
+          public void accept(WindowedValue<String> t) throws Exception {
+            outputValues.add(t);
+          }
+        };
+
+    when(mockBeamFnDataClient.forOutboundConsumer(
+        any(),
+        any(),
+        Matchers.<Coder<WindowedValue<String>>>any())).thenReturn(outputConsumer);
+    Iterables.getOnlyElement(startFunctions).run();
+    verify(mockBeamFnDataClient).forOutboundConsumer(
+        eq(PORT_SPEC.getApiServiceDescriptor()),
+        eq(KV.of(bundleId, BeamFnApi.Target.newBuilder()
+            .setPrimitiveTransformReference("ptransformId")
+            .setName(inputId)
+            .build())),
+        eq(CODER));
+
+    assertThat(consumers.keySet(), containsInAnyOrder("inputPC"));
+    Iterables.getOnlyElement(consumers.get("inputPC")).accept(valueInGlobalWindow("TestValue"));
+    assertThat(outputValues, contains(valueInGlobalWindow("TestValue")));
+    outputValues.clear();
+
+    assertFalse(wasCloseCalled.get());
+    Iterables.getOnlyElement(finishFunctions).run();
+    assertTrue(wasCloseCalled.get());
+
+    verifyNoMoreInteractions(mockBeamFnDataClient);
+  }
+
+  @Test
+  public void testReuseForMultipleBundles() throws Exception {
+    RecordingConsumer<WindowedValue<String>> valuesA = new RecordingConsumer<>();
+    RecordingConsumer<WindowedValue<String>> valuesB = new RecordingConsumer<>();
+    when(mockBeamFnDataClient.forOutboundConsumer(
+        any(),
+        any(),
+        Matchers.<Coder<WindowedValue<String>>>any())).thenReturn(valuesA).thenReturn(valuesB);
+    AtomicReference<String> bundleId = new AtomicReference<>("0");
+    BeamFnDataWriteRunner<String> writeRunner = new BeamFnDataWriteRunner<>(
+        FUNCTION_SPEC,
+        bundleId::get,
+        OUTPUT_TARGET,
+        CODER_SPEC,
+        mockBeamFnDataClient);
+
+    // Process for bundle id 0
+    writeRunner.registerForOutput();
+
+    verify(mockBeamFnDataClient).forOutboundConsumer(
+        eq(PORT_SPEC.getApiServiceDescriptor()),
+        eq(KV.of(bundleId.get(), OUTPUT_TARGET)),
+        eq(CODER));
+
+    writeRunner.consume(valueInGlobalWindow("ABC"));
+    writeRunner.consume(valueInGlobalWindow("DEF"));
+    writeRunner.close();
+
+    assertTrue(valuesA.closed);
+    assertThat(valuesA, contains(valueInGlobalWindow("ABC"), valueInGlobalWindow("DEF")));
+
+    // Process for bundle id 1
+    bundleId.set("1");
+    valuesA.clear();
+    valuesB.clear();
+    writeRunner.registerForOutput();
+
+    verify(mockBeamFnDataClient).forOutboundConsumer(
+        eq(PORT_SPEC.getApiServiceDescriptor()),
+        eq(KV.of(bundleId.get(), OUTPUT_TARGET)),
+        eq(CODER));
+
+    writeRunner.consume(valueInGlobalWindow("GHI"));
+    writeRunner.consume(valueInGlobalWindow("JKL"));
+    writeRunner.close();
+
+    assertTrue(valuesB.closed);
+    assertThat(valuesB, contains(valueInGlobalWindow("GHI"), valueInGlobalWindow("JKL")));
+    verifyNoMoreInteractions(mockBeamFnDataClient);
+  }
+
+  private static class RecordingConsumer<T> extends ArrayList<T>
+      implements CloseableThrowingConsumer<T> {
+    private boolean closed;
+    @Override
+    public void close() throws Exception {
+      closed = true;
+    }
+
+    @Override
+    public void accept(T t) throws Exception {
+      if (closed) {
+        throw new IllegalStateException("Consumer is closed but attempting to consume " + t);
+      }
+      add(t);
+    }
+  }
+
+  @Test
+  public void testRegistration() {
+    for (Registrar registrar :
+        ServiceLoader.load(Registrar.class)) {
+      if (registrar instanceof BeamFnDataWriteRunner.Registrar) {
+        assertThat(registrar.getPTransformRunnerFactories(), IsMapContaining.hasKey(URN));
+        return;
+      }
+    }
+    fail("Expected registrar not found.");
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BoundedSourceRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BoundedSourceRunnerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BoundedSourceRunnerTest.java
new file mode 100644
index 0000000..7aec161
--- /dev/null
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/BoundedSourceRunnerTest.java
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.fn.harness;
+
+import static org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.hamcrest.collection.IsEmptyCollection.empty;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
+import com.google.common.base.Suppliers;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Multimap;
+import com.google.protobuf.Any;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.BytesValue;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.ServiceLoader;
+import org.apache.beam.fn.harness.PTransformRunnerFactory.Registrar;
+import org.apache.beam.fn.harness.fn.ThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingRunnable;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.io.BoundedSource;
+import org.apache.beam.sdk.io.CountingSource;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.util.SerializableUtils;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.hamcrest.Matchers;
+import org.hamcrest.collection.IsMapContaining;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link BoundedSourceRunner}. */
+@RunWith(JUnit4.class)
+public class BoundedSourceRunnerTest {
+
+  public static final String URN = "urn:org.apache.beam:source:java:0.1";
+
+  @Test
+  public void testRunReadLoopWithMultipleSources() throws Exception {
+    List<WindowedValue<Long>> out1Values = new ArrayList<>();
+    List<WindowedValue<Long>> out2Values = new ArrayList<>();
+    Collection<ThrowingConsumer<WindowedValue<Long>>> consumers =
+        ImmutableList.of(out1Values::add, out2Values::add);
+
+    BoundedSourceRunner<BoundedSource<Long>, Long> runner = new BoundedSourceRunner<>(
+        PipelineOptionsFactory.create(),
+        RunnerApi.FunctionSpec.getDefaultInstance(),
+        consumers);
+
+    runner.runReadLoop(valueInGlobalWindow(CountingSource.upTo(2)));
+    runner.runReadLoop(valueInGlobalWindow(CountingSource.upTo(1)));
+
+    assertThat(out1Values,
+        contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(0L)));
+    assertThat(out2Values,
+        contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(0L)));
+  }
+
+  @Test
+  public void testRunReadLoopWithEmptySource() throws Exception {
+    List<WindowedValue<Long>> outValues = new ArrayList<>();
+    Collection<ThrowingConsumer<WindowedValue<Long>>> consumers =
+        ImmutableList.of(outValues::add);
+
+    BoundedSourceRunner<BoundedSource<Long>, Long> runner = new BoundedSourceRunner<>(
+        PipelineOptionsFactory.create(),
+        RunnerApi.FunctionSpec.getDefaultInstance(),
+        consumers);
+
+    runner.runReadLoop(valueInGlobalWindow(CountingSource.upTo(0)));
+
+    assertThat(outValues, empty());
+  }
+
+  @Test
+  public void testStart() throws Exception {
+    List<WindowedValue<Long>> outValues = new ArrayList<>();
+    Collection<ThrowingConsumer<WindowedValue<Long>>> consumers =
+        ImmutableList.of(outValues::add);
+
+    ByteString encodedSource =
+        ByteString.copyFrom(SerializableUtils.serializeToByteArray(CountingSource.upTo(3)));
+
+    BoundedSourceRunner<BoundedSource<Long>, Long> runner = new BoundedSourceRunner<>(
+        PipelineOptionsFactory.create(),
+        RunnerApi.FunctionSpec.newBuilder().setParameter(
+            Any.pack(BytesValue.newBuilder().setValue(encodedSource).build())).build(),
+        consumers);
+
+    runner.start();
+
+    assertThat(outValues,
+        contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(2L)));
+  }
+
+  @Test
+  public void testCreatingAndProcessingSourceFromFactory() throws Exception {
+    List<WindowedValue<String>> outputValues = new ArrayList<>();
+
+    Multimap<String, ThrowingConsumer<WindowedValue<?>>> consumers = HashMultimap.create();
+    consumers.put("outputPC",
+        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) outputValues::add);
+    List<ThrowingRunnable> startFunctions = new ArrayList<>();
+    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
+
+    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder()
+        .setUrn("urn:org.apache.beam:source:java:0.1")
+        .setParameter(Any.pack(BytesValue.newBuilder()
+            .setValue(ByteString.copyFrom(
+                SerializableUtils.serializeToByteArray(CountingSource.upTo(3))))
+            .build()))
+        .build();
+
+    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder()
+        .setSpec(functionSpec)
+        .putInputs("input", "inputPC")
+        .putOutputs("output", "outputPC")
+        .build();
+
+    new BoundedSourceRunner.Factory<>().createRunnerForPTransform(
+        PipelineOptionsFactory.create(),
+        null /* beamFnDataClient */,
+        "pTransformId",
+        pTransform,
+        Suppliers.ofInstance("57L")::get,
+        ImmutableMap.of(),
+        ImmutableMap.of(),
+        consumers,
+        startFunctions::add,
+        finishFunctions::add);
+
+    // This is testing a deprecated way of running sources and should be removed
+    // once all source definitions are instead propagated along the input edge.
+    Iterables.getOnlyElement(startFunctions).run();
+    assertThat(outputValues, contains(
+        valueInGlobalWindow(0L),
+        valueInGlobalWindow(1L),
+        valueInGlobalWindow(2L)));
+    outputValues.clear();
+
+    // Check that when passing a source along as an input, the source is processed.
+    assertThat(consumers.keySet(), containsInAnyOrder("inputPC", "outputPC"));
+    Iterables.getOnlyElement(consumers.get("inputPC")).accept(
+        valueInGlobalWindow(CountingSource.upTo(2)));
+    assertThat(outputValues, contains(
+        valueInGlobalWindow(0L),
+        valueInGlobalWindow(1L)));
+
+    assertThat(finishFunctions, Matchers.empty());
+  }
+
+  @Test
+  public void testRegistration() {
+    for (Registrar registrar :
+        ServiceLoader.load(Registrar.class)) {
+      if (registrar instanceof BoundedSourceRunner.Registrar) {
+        assertThat(registrar.getPTransformRunnerFactories(), IsMapContaining.hasKey(URN));
+        return;
+      }
+    }
+    fail("Expected registrar not found.");
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/FnApiDoFnRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/FnApiDoFnRunnerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/FnApiDoFnRunnerTest.java
new file mode 100644
index 0000000..98362a2
--- /dev/null
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/FnApiDoFnRunnerTest.java
@@ -0,0 +1,210 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.beam.fn.harness;
+
+import static org.apache.beam.sdk.util.WindowedValue.timestampedValueInGlobalWindow;
+import static org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow;
+import static org.hamcrest.Matchers.contains;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.base.Suppliers;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Multimap;
+import com.google.protobuf.Any;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.BytesValue;
+import com.google.protobuf.Message;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.ServiceLoader;
+import org.apache.beam.fn.harness.PTransformRunnerFactory.Registrar;
+import org.apache.beam.fn.harness.fn.ThrowingConsumer;
+import org.apache.beam.fn.harness.fn.ThrowingRunnable;
+import org.apache.beam.runners.core.construction.ParDoTranslation;
+import org.apache.beam.runners.dataflow.util.CloudObjects;
+import org.apache.beam.runners.dataflow.util.DoFnInfo;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.common.runner.v1.RunnerApi;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.transforms.DoFn;
+import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
+import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
+import org.apache.beam.sdk.util.SerializableUtils;
+import org.apache.beam.sdk.util.WindowedValue;
+import org.apache.beam.sdk.values.TupleTag;
+import org.apache.beam.sdk.values.WindowingStrategy;
+import org.hamcrest.collection.IsMapContaining;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link FnApiDoFnRunner}. */
+@RunWith(JUnit4.class)
+public class FnApiDoFnRunnerTest {
+
+  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
+  private static final Coder<WindowedValue<String>> STRING_CODER =
+      WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE);
+  private static final String STRING_CODER_SPEC_ID = "999L";
+  private static final RunnerApi.Coder STRING_CODER_SPEC;
+
+  static {
+    try {
+      STRING_CODER_SPEC = RunnerApi.Coder.newBuilder()
+          .setSpec(RunnerApi.SdkFunctionSpec.newBuilder()
+              .setSpec(RunnerApi.FunctionSpec.newBuilder()
+                  .setParameter(Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(
+                      OBJECT_MAPPER.writeValueAsBytes(CloudObjects.asCloudObject(STRING_CODER))))
+                      .build())))
+              .build())
+          .build();
+    } catch (IOException e) {
+      throw new ExceptionInInitializerError(e);
+    }
+  }
+
+  private static class TestDoFn extends DoFn<String, String> {
+    private static final TupleTag<String> mainOutput = new TupleTag<>("mainOutput");
+    private static final TupleTag<String> additionalOutput = new TupleTag<>("output");
+
+    private BoundedWindow window;
+
+    @ProcessElement
+    public void processElement(ProcessContext context, BoundedWindow window) {
+      context.output("MainOutput" + context.element());
+      context.output(additionalOutput, "AdditionalOutput" + context.element());
+      this.window = window;
+    }
+
+    @FinishBundle
+    public void finishBundle(FinishBundleContext context) {
+      if (window != null) {
+        context.output("FinishBundle", window.maxTimestamp(), window);
+        window = null;
+      }
+    }
+  }
+
+  /**
+   * Create a DoFn that has 3 inputs (inputATarget1, inputATarget2, inputBTarget) and 2 outputs
+   * (mainOutput, output). Validate that inputs are fed to the {@link DoFn} and that outputs
+   * are directed to the correct consumers.
+   */
+  @Test
+  public void testCreatingAndProcessingDoFn() throws Exception {
+    Map<String, Message> fnApiRegistry = ImmutableMap.of(STRING_CODER_SPEC_ID, STRING_CODER_SPEC);
+    String pTransformId = "pTransformId";
+    String mainOutputId = "101";
+    String additionalOutputId = "102";
+
+    DoFnInfo<?, ?> doFnInfo = DoFnInfo.forFn(
+        new TestDoFn(),
+        WindowingStrategy.globalDefault(),
+        ImmutableList.of(),
+        StringUtf8Coder.of(),
+        Long.parseLong(mainOutputId),
+        ImmutableMap.of(
+            Long.parseLong(mainOutputId), TestDoFn.mainOutput,
+            Long.parseLong(additionalOutputId), TestDoFn.additionalOutput));
+    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder()
+        .setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN)
+        .setParameter(Any.pack(BytesValue.newBuilder()
+            .setValue(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnInfo)))
+            .build()))
+        .build();
+    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder()
+        .setSpec(functionSpec)
+        .putInputs("inputA", "inputATarget")
+        .putInputs("inputB", "inputBTarget")
+        .putOutputs(mainOutputId, "mainOutputTarget")
+        .putOutputs(additionalOutputId, "additionalOutputTarget")
+        .build();
+
+    List<WindowedValue<String>> mainOutputValues = new ArrayList<>();
+    List<WindowedValue<String>> additionalOutputValues = new ArrayList<>();
+    Multimap<String, ThrowingConsumer<WindowedValue<?>>> consumers = HashMultimap.create();
+    consumers.put("mainOutputTarget",
+        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) mainOutputValues::add);
+    consumers.put("additionalOutputTarget",
+        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) additionalOutputValues::add);
+    List<ThrowingRunnable> startFunctions = new ArrayList<>();
+    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
+
+    new FnApiDoFnRunner.Factory<>().createRunnerForPTransform(
+        PipelineOptionsFactory.create(),
+        null /* beamFnDataClient */,
+        pTransformId,
+        pTransform,
+        Suppliers.ofInstance("57L")::get,
+        ImmutableMap.of(),
+        ImmutableMap.of(),
+        consumers,
+        startFunctions::add,
+        finishFunctions::add);
+
+    Iterables.getOnlyElement(startFunctions).run();
+    mainOutputValues.clear();
+
+    assertThat(consumers.keySet(), containsInAnyOrder(
+        "inputATarget", "inputBTarget", "mainOutputTarget", "additionalOutputTarget"));
+
+    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("A1"));
+    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("A2"));
+    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("B"));
+    assertThat(mainOutputValues, contains(
+        valueInGlobalWindow("MainOutputA1"),
+        valueInGlobalWindow("MainOutputA2"),
+        valueInGlobalWindow("MainOutputB")));
+    assertThat(additionalOutputValues, contains(
+        valueInGlobalWindow("AdditionalOutputA1"),
+        valueInGlobalWindow("AdditionalOutputA2"),
+        valueInGlobalWindow("AdditionalOutputB")));
+    mainOutputValues.clear();
+    additionalOutputValues.clear();
+
+    Iterables.getOnlyElement(finishFunctions).run();
+    assertThat(
+        mainOutputValues,
+        contains(
+            timestampedValueInGlobalWindow("FinishBundle", GlobalWindow.INSTANCE.maxTimestamp())));
+    mainOutputValues.clear();
+  }
+
+  @Test
+  public void testRegistration() {
+    for (Registrar registrar :
+        ServiceLoader.load(Registrar.class)) {
+      if (registrar instanceof FnApiDoFnRunner.Registrar) {
+        assertThat(registrar.getPTransformRunnerFactories(),
+            IsMapContaining.hasKey(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN));
+        return;
+      }
+    }
+    fail("Expected registrar not found.");
+  }
+}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java
index a616b2c..0a94b5b 100644
--- a/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java
+++ b/sdks/java/harness/src/test/java/org/apache/beam/fn/harness/control/ProcessBundleHandlerTest.java
@@ -31,11 +31,11 @@ import java.util.List;
 import java.util.Map;
 import java.util.function.Consumer;
 import java.util.function.Supplier;
+import org.apache.beam.fn.harness.PTransformRunnerFactory;
 import org.apache.beam.fn.harness.data.BeamFnDataClient;
 import org.apache.beam.fn.harness.fn.ThrowingConsumer;
 import org.apache.beam.fn.harness.fn.ThrowingRunnable;
 import org.apache.beam.fn.v1.BeamFnApi;
-import org.apache.beam.runners.core.PTransformRunnerFactory;
 import org.apache.beam.sdk.common.runner.v1.RunnerApi;
 import org.apache.beam.sdk.options.PipelineOptions;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BeamFnDataReadRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BeamFnDataReadRunnerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BeamFnDataReadRunnerTest.java
deleted file mode 100644
index d6a476e..0000000
--- a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BeamFnDataReadRunnerTest.java
+++ /dev/null
@@ -1,281 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.core;
-
-import static org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow;
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.fail;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.verifyZeroInteractions;
-import static org.mockito.Mockito.when;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.base.Suppliers;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Multimap;
-import com.google.common.util.concurrent.Uninterruptibles;
-import com.google.protobuf.Any;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.BytesValue;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.ServiceLoader;
-import java.util.concurrent.CompletableFuture;
-import java.util.concurrent.Executors;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicReference;
-import org.apache.beam.fn.harness.data.BeamFnDataClient;
-import org.apache.beam.fn.harness.fn.ThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.fn.harness.test.TestExecutors;
-import org.apache.beam.fn.harness.test.TestExecutors.TestExecutorService;
-import org.apache.beam.fn.v1.BeamFnApi;
-import org.apache.beam.runners.core.PTransformRunnerFactory.Registrar;
-import org.apache.beam.runners.dataflow.util.CloudObjects;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.common.runner.v1.RunnerApi;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.KV;
-import org.hamcrest.collection.IsMapContaining;
-import org.junit.Before;
-import org.junit.Rule;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.ArgumentCaptor;
-import org.mockito.Captor;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-
-/** Tests for {@link BeamFnDataReadRunner}. */
-@RunWith(JUnit4.class)
-public class BeamFnDataReadRunnerTest {
-
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-  private static final BeamFnApi.RemoteGrpcPort PORT_SPEC = BeamFnApi.RemoteGrpcPort.newBuilder()
-      .setApiServiceDescriptor(BeamFnApi.ApiServiceDescriptor.getDefaultInstance()).build();
-  private static final RunnerApi.FunctionSpec FUNCTION_SPEC = RunnerApi.FunctionSpec.newBuilder()
-      .setParameter(Any.pack(PORT_SPEC)).build();
-  private static final Coder<WindowedValue<String>> CODER =
-      WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE);
-  private static final String CODER_SPEC_ID = "string-coder-id";
-  private static final RunnerApi.Coder CODER_SPEC;
-  private static final String URN = "urn:org.apache.beam:source:runner:0.1";
-
-  static {
-    try {
-      CODER_SPEC = RunnerApi.Coder.newBuilder().setSpec(
-          RunnerApi.SdkFunctionSpec.newBuilder().setSpec(
-              RunnerApi.FunctionSpec.newBuilder().setParameter(
-                  Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(
-                      OBJECT_MAPPER.writeValueAsBytes(CloudObjects.asCloudObject(CODER))))
-                      .build()))
-                  .build())
-              .build())
-          .build();
-    } catch (IOException e) {
-      throw new ExceptionInInitializerError(e);
-    }
-  }
-  private static final BeamFnApi.Target INPUT_TARGET = BeamFnApi.Target.newBuilder()
-      .setPrimitiveTransformReference("1")
-      .setName("out")
-      .build();
-
-  @Rule public TestExecutorService executor = TestExecutors.from(Executors::newCachedThreadPool);
-  @Mock private BeamFnDataClient mockBeamFnDataClient;
-  @Captor private ArgumentCaptor<ThrowingConsumer<WindowedValue<String>>> consumerCaptor;
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-  }
-
-  @Test
-  public void testCreatingAndProcessingBeamFnDataReadRunner() throws Exception {
-    String bundleId = "57";
-    String outputId = "101";
-
-    List<WindowedValue<String>> outputValues = new ArrayList<>();
-
-    Multimap<String, ThrowingConsumer<WindowedValue<?>>> consumers = HashMultimap.create();
-    consumers.put("outputPC",
-        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) outputValues::add);
-    List<ThrowingRunnable> startFunctions = new ArrayList<>();
-    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
-
-    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder()
-        .setUrn("urn:org.apache.beam:source:runner:0.1")
-        .setParameter(Any.pack(PORT_SPEC))
-        .build();
-
-    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder()
-        .setSpec(functionSpec)
-        .putOutputs(outputId, "outputPC")
-        .build();
-
-    new BeamFnDataReadRunner.Factory<String>().createRunnerForPTransform(
-        PipelineOptionsFactory.create(),
-        mockBeamFnDataClient,
-        "pTransformId",
-        pTransform,
-        Suppliers.ofInstance(bundleId)::get,
-        ImmutableMap.of("outputPC",
-            RunnerApi.PCollection.newBuilder().setCoderId(CODER_SPEC_ID).build()),
-        ImmutableMap.of(CODER_SPEC_ID, CODER_SPEC),
-        consumers,
-        startFunctions::add,
-        finishFunctions::add);
-
-    verifyZeroInteractions(mockBeamFnDataClient);
-
-    CompletableFuture<Void> completionFuture = new CompletableFuture<>();
-    when(mockBeamFnDataClient.forInboundConsumer(any(), any(), any(), any()))
-        .thenReturn(completionFuture);
-    Iterables.getOnlyElement(startFunctions).run();
-    verify(mockBeamFnDataClient).forInboundConsumer(
-        eq(PORT_SPEC.getApiServiceDescriptor()),
-        eq(KV.of(bundleId, BeamFnApi.Target.newBuilder()
-            .setPrimitiveTransformReference("pTransformId")
-            .setName(outputId)
-            .build())),
-        eq(CODER),
-        consumerCaptor.capture());
-
-    consumerCaptor.getValue().accept(valueInGlobalWindow("TestValue"));
-    assertThat(outputValues, contains(valueInGlobalWindow("TestValue")));
-    outputValues.clear();
-
-    assertThat(consumers.keySet(), containsInAnyOrder("outputPC"));
-
-    completionFuture.complete(null);
-    Iterables.getOnlyElement(finishFunctions).run();
-
-    verifyNoMoreInteractions(mockBeamFnDataClient);
-  }
-
-  @Test
-  public void testReuseForMultipleBundles() throws Exception {
-    CompletableFuture<Void> bundle1Future = new CompletableFuture<>();
-    CompletableFuture<Void> bundle2Future = new CompletableFuture<>();
-    when(mockBeamFnDataClient.forInboundConsumer(
-        any(),
-        any(),
-        any(),
-        any())).thenReturn(bundle1Future).thenReturn(bundle2Future);
-    List<WindowedValue<String>> valuesA = new ArrayList<>();
-    List<WindowedValue<String>> valuesB = new ArrayList<>();
-
-    AtomicReference<String> bundleId = new AtomicReference<>("0");
-    BeamFnDataReadRunner<String> readRunner = new BeamFnDataReadRunner<>(
-        FUNCTION_SPEC,
-        bundleId::get,
-        INPUT_TARGET,
-        CODER_SPEC,
-        mockBeamFnDataClient,
-        ImmutableList.of(valuesA::add, valuesB::add));
-
-    // Process for bundle id 0
-    readRunner.registerInputLocation();
-
-    verify(mockBeamFnDataClient).forInboundConsumer(
-        eq(PORT_SPEC.getApiServiceDescriptor()),
-        eq(KV.of(bundleId.get(), INPUT_TARGET)),
-        eq(CODER),
-        consumerCaptor.capture());
-
-    executor.submit(new Runnable() {
-      @Override
-      public void run() {
-        // Sleep for some small amount of time simulating the parent blocking
-        Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
-        try {
-          consumerCaptor.getValue().accept(valueInGlobalWindow("ABC"));
-          consumerCaptor.getValue().accept(valueInGlobalWindow("DEF"));
-        } catch (Exception e) {
-          bundle1Future.completeExceptionally(e);
-        } finally {
-          bundle1Future.complete(null);
-        }
-      }
-    });
-
-    readRunner.blockTillReadFinishes();
-    assertThat(valuesA, contains(valueInGlobalWindow("ABC"), valueInGlobalWindow("DEF")));
-    assertThat(valuesB, contains(valueInGlobalWindow("ABC"), valueInGlobalWindow("DEF")));
-
-    // Process for bundle id 1
-    bundleId.set("1");
-    valuesA.clear();
-    valuesB.clear();
-    readRunner.registerInputLocation();
-
-    verify(mockBeamFnDataClient).forInboundConsumer(
-        eq(PORT_SPEC.getApiServiceDescriptor()),
-        eq(KV.of(bundleId.get(), INPUT_TARGET)),
-        eq(CODER),
-        consumerCaptor.capture());
-
-    executor.submit(new Runnable() {
-      @Override
-      public void run() {
-        // Sleep for some small amount of time simulating the parent blocking
-        Uninterruptibles.sleepUninterruptibly(100, TimeUnit.MILLISECONDS);
-        try {
-          consumerCaptor.getValue().accept(valueInGlobalWindow("GHI"));
-          consumerCaptor.getValue().accept(valueInGlobalWindow("JKL"));
-        } catch (Exception e) {
-          bundle2Future.completeExceptionally(e);
-        } finally {
-          bundle2Future.complete(null);
-        }
-      }
-    });
-
-    readRunner.blockTillReadFinishes();
-    assertThat(valuesA, contains(valueInGlobalWindow("GHI"), valueInGlobalWindow("JKL")));
-    assertThat(valuesB, contains(valueInGlobalWindow("GHI"), valueInGlobalWindow("JKL")));
-
-    verifyNoMoreInteractions(mockBeamFnDataClient);
-  }
-
-  @Test
-  public void testRegistration() {
-    for (Registrar registrar :
-        ServiceLoader.load(Registrar.class)) {
-      if (registrar instanceof BeamFnDataReadRunner.Registrar) {
-        assertThat(registrar.getPTransformRunnerFactories(), IsMapContaining.hasKey(URN));
-        return;
-      }
-    }
-    fail("Expected registrar not found.");
-  }
-}


[43/50] [abbrv] beam git commit: Increase the gRPC message size to max value

Posted by jb...@apache.org.
Increase the gRPC message size to max value

This closes #3586


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/eb0850ef
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/eb0850ef
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/eb0850ef

Branch: refs/heads/DSL_SQL
Commit: eb0850ef889891eac7a2c96d744aa0ef18afb8f8
Parents: 4d1db22 b424aa0
Author: Luke Cwik <lc...@google.com>
Authored: Wed Jul 19 13:18:09 2017 -0700
Committer: Luke Cwik <lc...@google.com>
Committed: Wed Jul 19 13:18:09 2017 -0700

----------------------------------------------------------------------
 .../beam/fn/harness/channel/ManagedChannelFactory.java       | 6 ++++++
 sdks/python/apache_beam/runners/worker/data_plane.py         | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)
----------------------------------------------------------------------



[08/50] [abbrv] beam git commit: Closes #3520

Posted by jb...@apache.org.
Closes #3520


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/532256e8
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/532256e8
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/532256e8

Branch: refs/heads/DSL_SQL
Commit: 532256e8811b790fdf25fb4e11b7c2b89383761a
Parents: 7e4719c 7257507
Author: Robert Bradshaw <ro...@google.com>
Authored: Mon Jul 17 14:33:01 2017 -0700
Committer: Robert Bradshaw <ro...@google.com>
Committed: Mon Jul 17 14:33:01 2017 -0700

----------------------------------------------------------------------
 .../runners/dataflow/dataflow_runner.py           | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)
----------------------------------------------------------------------



[03/50] [abbrv] beam git commit: datastoreio: retry on socket errors

Posted by jb...@apache.org.
datastoreio: retry on socket errors


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/095e7916
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/095e7916
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/095e7916

Branch: refs/heads/DSL_SQL
Commit: 095e7916d23e49859acb42b9316ddf4222fbc9d9
Parents: ae0de1b
Author: Vikas Kedigehalli <vi...@google.com>
Authored: Thu Jul 13 10:29:23 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Mon Jul 17 09:16:06 2017 -0700

----------------------------------------------------------------------
 .../apache_beam/io/gcp/datastore/v1/helper.py   |  8 +++++++
 .../io/gcp/datastore/v1/helper_test.py          | 22 ++++++++++++++++----
 2 files changed, 26 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/095e7916/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
index f977536..996dace 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/helper.py
@@ -19,6 +19,9 @@
 
 For internal use only; no backwards-compatibility guarantees.
 """
+
+import errno
+from socket import error as SocketError
 import sys
 
 # Protect against environments where datastore library is not available.
@@ -130,6 +133,11 @@ def retry_on_rpc_error(exception):
             err_code == code_pb2.UNAVAILABLE or
             err_code == code_pb2.UNKNOWN or
             err_code == code_pb2.INTERNAL)
+
+  if isinstance(exception, SocketError):
+    return (exception.errno == errno.ECONNRESET or
+            exception.errno == errno.ETIMEDOUT)
+
   return False
 
 

http://git-wip-us.apache.org/repos/asf/beam/blob/095e7916/sdks/python/apache_beam/io/gcp/datastore/v1/helper_test.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/io/gcp/datastore/v1/helper_test.py b/sdks/python/apache_beam/io/gcp/datastore/v1/helper_test.py
index a804c09..a8b1bb1 100644
--- a/sdks/python/apache_beam/io/gcp/datastore/v1/helper_test.py
+++ b/sdks/python/apache_beam/io/gcp/datastore/v1/helper_test.py
@@ -16,6 +16,9 @@
 #
 
 """Tests for datastore helper."""
+import errno
+import random
+from socket import error as SocketError
 import sys
 import unittest
 
@@ -49,6 +52,16 @@ class HelperTest(unittest.TestCase):
     self._query = query_pb2.Query()
     self._query.kind.add().name = 'dummy_kind'
     patch_retry(self, helper)
+    self._retriable_errors = [
+        RPCError("dummy", code_pb2.INTERNAL, "failed"),
+        SocketError(errno.ECONNRESET, "Connection Reset"),
+        SocketError(errno.ETIMEDOUT, "Timed out")
+    ]
+
+    self._non_retriable_errors = [
+        RPCError("dummy", code_pb2.UNAUTHENTICATED, "failed"),
+        SocketError(errno.EADDRNOTAVAIL, "Address not available")
+    ]
 
   def permanent_retriable_datastore_failure(self, req):
     raise RPCError("dummy", code_pb2.UNAVAILABLE, "failed")
@@ -56,12 +69,12 @@ class HelperTest(unittest.TestCase):
   def transient_retriable_datastore_failure(self, req):
     if self._transient_fail_count:
       self._transient_fail_count -= 1
-      raise RPCError("dummy", code_pb2.INTERNAL, "failed")
+      raise random.choice(self._retriable_errors)
     else:
       return datastore_pb2.RunQueryResponse()
 
   def non_retriable_datastore_failure(self, req):
-    raise RPCError("dummy", code_pb2.UNAUTHENTICATED, "failed")
+    raise random.choice(self._non_retriable_errors)
 
   def test_query_iterator(self):
     self._mock_datastore.run_query.side_effect = (
@@ -76,7 +89,7 @@ class HelperTest(unittest.TestCase):
         self.transient_retriable_datastore_failure)
     query_iterator = helper.QueryIterator("project", None, self._query,
                                           self._mock_datastore)
-    fail_count = 2
+    fail_count = 5
     self._transient_fail_count = fail_count
     for _ in query_iterator:
       pass
@@ -89,7 +102,8 @@ class HelperTest(unittest.TestCase):
         self.non_retriable_datastore_failure)
     query_iterator = helper.QueryIterator("project", None, self._query,
                                           self._mock_datastore)
-    self.assertRaises(RPCError, iter(query_iterator).next)
+    self.assertRaises(tuple(map(type, self._non_retriable_errors)),
+                      iter(query_iterator).next)
     self.assertEqual(1, len(self._mock_datastore.run_query.call_args_list))
 
   def test_query_iterator_with_single_batch(self):


[34/50] [abbrv] beam git commit: [BEAM-2630] TestPipeline: construct job/app names based on Description in junit TestRule.

Posted by jb...@apache.org.
[BEAM-2630] TestPipeline: construct job/app names based on Description in junit TestRule.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/bdf5bd6e
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/bdf5bd6e
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/bdf5bd6e

Branch: refs/heads/DSL_SQL
Commit: bdf5bd6e50fa9f44ad7560714cd41ac3f346d124
Parents: 0d927ef
Author: Pei He <pe...@apache.org>
Authored: Mon Jul 17 23:34:27 2017 +0800
Committer: Pei He <pe...@apache.org>
Committed: Wed Jul 19 11:30:12 2017 +0800

----------------------------------------------------------------------
 .../apache/beam/sdk/testing/TestPipeline.java   | 63 ++++----------------
 .../beam/sdk/testing/TestPipelineTest.java      | 38 +-----------
 2 files changed, 13 insertions(+), 88 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/bdf5bd6e/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipeline.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipeline.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipeline.java
index 9206e04..34f1c83 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipeline.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/TestPipeline.java
@@ -31,9 +31,7 @@ import com.google.common.base.Predicate;
 import com.google.common.base.Predicates;
 import com.google.common.base.Strings;
 import com.google.common.collect.FluentIterable;
-import com.google.common.collect.Iterators;
 import java.io.IOException;
-import java.lang.reflect.Method;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.LinkedList;
@@ -307,6 +305,7 @@ public class TestPipeline extends Pipeline implements TestRule {
 
       @Override
       public void evaluate() throws Throwable {
+        options.as(ApplicationNameOptions.class).setAppName(getAppName(description));
 
         setDeducedEnforcementLevel();
 
@@ -402,7 +401,6 @@ public class TestPipeline extends Pipeline implements TestRule {
               MAPPER.readValue(beamTestPipelineOptions, String[].class))
               .as(TestPipelineOptions.class);
 
-      options.as(ApplicationNameOptions.class).setAppName(getAppName());
       // If no options were specified, set some reasonable defaults
       if (Strings.isNullOrEmpty(beamTestPipelineOptions)) {
         // If there are no provided options, check to see if a dummy runner should be used.
@@ -450,56 +448,17 @@ public class TestPipeline extends Pipeline implements TestRule {
     }
   }
 
-  /** Returns the class + method name of the test, or a default name. */
-  private static String getAppName() {
-    Optional<StackTraceElement> stackTraceElement = findCallersStackTrace();
-    if (stackTraceElement.isPresent()) {
-      String methodName = stackTraceElement.get().getMethodName();
-      String className = stackTraceElement.get().getClassName();
-      if (className.contains(".")) {
-        className = className.substring(className.lastIndexOf(".") + 1);
-      }
-      return className + "-" + methodName;
-    }
-    return "UnitTest";
-  }
-
-  /** Returns the {@link StackTraceElement} of the calling class. */
-  private static Optional<StackTraceElement> findCallersStackTrace() {
-    Iterator<StackTraceElement> elements =
-        Iterators.forArray(Thread.currentThread().getStackTrace());
-    // First find the TestPipeline class in the stack trace.
-    while (elements.hasNext()) {
-      StackTraceElement next = elements.next();
-      if (TestPipeline.class.getName().equals(next.getClassName())) {
-        break;
-      }
-    }
-    // Then find the first instance after that is not the TestPipeline
-    Optional<StackTraceElement> firstInstanceAfterTestPipeline = Optional.absent();
-    while (elements.hasNext()) {
-      StackTraceElement next = elements.next();
-      if (!TestPipeline.class.getName().equals(next.getClassName())) {
-        if (!firstInstanceAfterTestPipeline.isPresent()) {
-          firstInstanceAfterTestPipeline = Optional.of(next);
-        }
-        try {
-          Class<?> nextClass = Class.forName(next.getClassName());
-          for (Method method : nextClass.getMethods()) {
-            if (method.getName().equals(next.getMethodName())) {
-              if (method.isAnnotationPresent(org.junit.Test.class)) {
-                return Optional.of(next);
-              } else if (method.isAnnotationPresent(org.junit.Before.class)) {
-                break;
-              }
-            }
-          }
-        } catch (Throwable t) {
-          break;
-        }
-      }
+  /** Returns the class + method name of the test. */
+  private String getAppName(Description description) {
+    String methodName = description.getMethodName();
+    Class<?> testClass = description.getTestClass();
+    if (testClass.isMemberClass()) {
+      return String.format(
+          "%s$%s-%s",
+          testClass.getEnclosingClass().getSimpleName(), testClass.getSimpleName(), methodName);
+    } else {
+      return String.format("%s-%s", testClass.getSimpleName(), methodName);
     }
-    return firstInstanceAfterTestPipeline;
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/beam/blob/bdf5bd6e/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/TestPipelineTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/TestPipelineTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/TestPipelineTest.java
index 05abb59..664f2f4 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/TestPipelineTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/testing/TestPipelineTest.java
@@ -100,7 +100,7 @@ public class TestPipelineTest implements Serializable {
 
     @Test
     public void testCreationOfPipelineOptionsFromReallyVerboselyNamedTestCase() throws Exception {
-      PipelineOptions options = TestPipeline.testingPipelineOptions();
+      PipelineOptions options = pipeline.getOptions();
       assertThat(
           options.as(ApplicationNameOptions.class).getAppName(),
           startsWith(
@@ -112,23 +112,7 @@ public class TestPipelineTest implements Serializable {
     public void testToString() {
       assertEquals(
           "TestPipeline#TestPipelineTest$TestPipelineCreationTest-testToString",
-          TestPipeline.create().toString());
-    }
-
-    @Test
-    public void testToStringNestedMethod() {
-      TestPipeline p = nestedMethod();
-
-      assertEquals(
-          "TestPipeline#TestPipelineTest$TestPipelineCreationTest-testToStringNestedMethod",
-          p.toString());
-      assertEquals(
-          "TestPipelineTest$TestPipelineCreationTest-testToStringNestedMethod",
-          p.getOptions().as(ApplicationNameOptions.class).getAppName());
-    }
-
-    private TestPipeline nestedMethod() {
-      return TestPipeline.create();
+          pipeline.toString());
     }
 
     @Test
@@ -144,24 +128,6 @@ public class TestPipelineTest implements Serializable {
     }
 
     @Test
-    public void testToStringNestedClassMethod() {
-      TestPipeline p = new NestedTester().p();
-
-      assertEquals(
-          "TestPipeline#TestPipelineTest$TestPipelineCreationTest-testToStringNestedClassMethod",
-          p.toString());
-      assertEquals(
-          "TestPipelineTest$TestPipelineCreationTest-testToStringNestedClassMethod",
-          p.getOptions().as(ApplicationNameOptions.class).getAppName());
-    }
-
-    private static class NestedTester {
-      public TestPipeline p() {
-        return TestPipeline.create();
-      }
-    }
-
-    @Test
     public void testRunWithDummyEnvironmentVariableFails() {
       System.getProperties()
           .setProperty(TestPipeline.PROPERTY_USE_DEFAULT_DUMMY_RUNNER, Boolean.toString(true));


[44/50] [abbrv] beam git commit: [BEAM-2636] Make sure we only override the correct class

Posted by jb...@apache.org.
[BEAM-2636] Make sure we only override the correct class


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d128c3b3
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d128c3b3
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d128c3b3

Branch: refs/heads/DSL_SQL
Commit: d128c3b378a58b0c2c31c2d30fd29e211e118324
Parents: eb0850e
Author: Sourabh Bajaj <so...@google.com>
Authored: Wed Jul 19 10:08:14 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Wed Jul 19 14:07:54 2017 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/runners/dataflow/dataflow_runner.py | 3 +++
 1 file changed, 3 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/d128c3b3/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
index 89c18d4..aec7d00 100644
--- a/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/dataflow_runner.py
@@ -418,6 +418,9 @@ class DataflowRunner(PipelineRunner):
           PropertyNames.OUTPUT_NAME: PropertyNames.OUT}])
 
   def apply_WriteToBigQuery(self, transform, pcoll):
+    # Make sure this is the WriteToBigQuery class that we expected
+    if not isinstance(transform, beam.io.WriteToBigQuery):
+      return self.apply_PTransform(transform, pcoll)
     standard_options = pcoll.pipeline._options.view_as(StandardOptions)
     if standard_options.streaming:
       if (transform.write_disposition ==


[29/50] [abbrv] beam git commit: Accept Region in Dataflow Monitoring Page URL

Posted by jb...@apache.org.
Accept Region in Dataflow Monitoring Page URL

Update Google Cloud Dataflow FE URLs from the Dataflow Runners to
regionalized paths.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/111603a9
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/111603a9
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/111603a9

Branch: refs/heads/DSL_SQL
Commit: 111603a9952f415fa1386046f7a2d3bde5b6532d
Parents: 2d5b6d7
Author: Robert Burke <ro...@frantil.com>
Authored: Tue Jun 27 15:41:56 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Tue Jul 18 14:49:56 2017 -0700

----------------------------------------------------------------------
 .../beam/runners/dataflow/DataflowPipelineJob.java  | 14 ++++++++++++--
 .../beam/runners/dataflow/DataflowRunner.java       |  3 ++-
 .../beam/runners/dataflow/util/MonitoringUtil.java  | 16 +++++++++++++---
 .../dataflow/BatchStatefulParDoOverridesTest.java   |  1 +
 .../dataflow/DataflowPipelineTranslatorTest.java    |  1 +
 .../runners/dataflow/internal/apiclient.py          |  7 +++++--
 .../runners/dataflow/test_dataflow_runner.py        |  5 +++--
 7 files changed, 37 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/111603a9/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineJob.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineJob.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineJob.java
index e30d426..e736373 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineJob.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowPipelineJob.java
@@ -169,6 +169,13 @@ public class DataflowPipelineJob implements PipelineResult {
   }
 
   /**
+   * Get the region this job exists in.
+   */
+  public String getRegion() {
+    return dataflowOptions.getRegion();
+  }
+
+  /**
    * Returns a new {@link DataflowPipelineJob} for the job that replaced this one, if applicable.
    *
    * @throws IllegalStateException if called before the job has terminated or if the job terminated
@@ -344,7 +351,9 @@ public class DataflowPipelineJob implements PipelineResult {
                   getJobId(),
                   getReplacedByJob().getJobId(),
                   MonitoringUtil.getJobMonitoringPageURL(
-                      getReplacedByJob().getProjectId(), getReplacedByJob().getJobId()));
+                      getReplacedByJob().getProjectId(),
+                      getRegion(),
+                      getReplacedByJob().getJobId()));
               break;
             default:
               LOG.info("Job {} failed with status {}.", getJobId(), state);
@@ -422,7 +431,8 @@ public class DataflowPipelineJob implements PipelineResult {
                 "Failed to cancel job in state %s, "
                     + "please go to the Developers Console to cancel it manually: %s",
                 state,
-                MonitoringUtil.getJobMonitoringPageURL(getProjectId(), getJobId()));
+                MonitoringUtil.getJobMonitoringPageURL(
+                    getProjectId(), getRegion(), getJobId()));
             LOG.warn(errorMsg);
             throw new IOException(errorMsg, e);
           }

http://git-wip-us.apache.org/repos/asf/beam/blob/111603a9/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
index 8935759..57a5ea5 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
@@ -679,7 +679,8 @@ public class DataflowRunner extends PipelineRunner<DataflowPipelineJob> {
     }
 
     LOG.info("To access the Dataflow monitoring console, please navigate to {}",
-        MonitoringUtil.getJobMonitoringPageURL(options.getProject(), jobResult.getId()));
+        MonitoringUtil.getJobMonitoringPageURL(
+          options.getProject(), options.getRegion(), jobResult.getId()));
     System.out.println("Submitted job: " + jobResult.getId());
 
     LOG.info("To cancel the job using the 'gcloud' tool, run:\n> {}",

http://git-wip-us.apache.org/repos/asf/beam/blob/111603a9/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/MonitoringUtil.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/MonitoringUtil.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/MonitoringUtil.java
index 759387c..780a979 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/MonitoringUtil.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/util/MonitoringUtil.java
@@ -180,14 +180,24 @@ public class MonitoringUtil {
     return allMessages;
   }
 
+  /**
+   * @deprecated this method defaults the region to "us-central1". Prefer using the overload with
+   * an explicit regionId parameter.
+   */
+  @Deprecated
   public static String getJobMonitoringPageURL(String projectName, String jobId) {
+    return getJobMonitoringPageURL(projectName, "us-central1", jobId);
+  }
+
+  public static String getJobMonitoringPageURL(String projectName, String regionId, String jobId) {
     try {
       // Project name is allowed in place of the project id: the user will be redirected to a URL
       // that has the project name replaced with project id.
       return String.format(
-          "https://console.developers.google.com/project/%s/dataflow/job/%s",
-          URLEncoder.encode(projectName, "UTF-8"),
-          URLEncoder.encode(jobId, "UTF-8"));
+          "https://console.cloud.google.com/dataflow/jobsDetail/locations/%s/jobs/%s?project=%s",
+          URLEncoder.encode(regionId, "UTF-8"),
+          URLEncoder.encode(jobId, "UTF-8"),
+          URLEncoder.encode(projectName, "UTF-8"));
     } catch (UnsupportedEncodingException e) {
       // Should never happen.
       throw new AssertionError("UTF-8 encoding is not supported by the environment", e);

http://git-wip-us.apache.org/repos/asf/beam/blob/111603a9/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/BatchStatefulParDoOverridesTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/BatchStatefulParDoOverridesTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/BatchStatefulParDoOverridesTest.java
index d2ab357..e62a8b8 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/BatchStatefulParDoOverridesTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/BatchStatefulParDoOverridesTest.java
@@ -161,6 +161,7 @@ public class BatchStatefulParDoOverridesTest implements Serializable {
     options.setGcpCredential(new TestCredential());
     options.setJobName("some-job-name");
     options.setProject("some-project");
+    options.setRegion("some-region");
     options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
     options.setFilesToStage(new LinkedList<String>());
     options.setGcsUtil(mockGcsUtil);

http://git-wip-us.apache.org/repos/asf/beam/blob/111603a9/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
index 43b2788..9a0bdf8 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowPipelineTranslatorTest.java
@@ -200,6 +200,7 @@ public class DataflowPipelineTranslatorTest implements Serializable {
     options.setGcpCredential(new TestCredential());
     options.setJobName("some-job-name");
     options.setProject("some-project");
+    options.setRegion("some-region");
     options.setTempLocation(GcsPath.fromComponents("somebucket", "some/path").toString());
     options.setFilesToStage(new LinkedList<String>());
     options.setDataflowClient(buildMockDataflow(new IsValidCreateRequest()));

http://git-wip-us.apache.org/repos/asf/beam/blob/111603a9/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
index 33dfe19..dcaf74e 100644
--- a/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
+++ b/sdks/python/apache_beam/runners/dataflow/internal/apiclient.py
@@ -500,8 +500,11 @@ class DataflowApplicationClient(object):
     logging.info('Created job with id: [%s]', response.id)
     logging.info(
         'To access the Dataflow monitoring console, please navigate to '
-        'https://console.developers.google.com/project/%s/dataflow/job/%s',
-        self.google_cloud_options.project, response.id)
+        'https://console.cloud.google.com/dataflow/jobsDetail'
+        '/locations/%s/jobs/%s?project=%s',
+        self.google_cloud_options.region,
+        response.id,
+        self.google_cloud_options.project)
 
     return response
 

http://git-wip-us.apache.org/repos/asf/beam/blob/111603a9/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py b/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py
index b339882..96e6a66 100644
--- a/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py
+++ b/sdks/python/apache_beam/runners/dataflow/test_dataflow_runner.py
@@ -38,12 +38,13 @@ class TestDataflowRunner(DataflowRunner):
     self.result = super(TestDataflowRunner, self).run(pipeline)
     if self.result.has_job:
       project = pipeline._options.view_as(GoogleCloudOptions).project
+      region_id = pipeline._options.view_as(GoogleCloudOptions).region
       job_id = self.result.job_id()
       # TODO(markflyhigh)(BEAM-1890): Use print since Nose dosen't show logs
       # in some cases.
       print (
-          'Found: https://console.cloud.google.com/dataflow/job/%s?project=%s' %
-          (job_id, project))
+          'Found: https://console.cloud.google.com/dataflow/jobsDetail'
+          '/locations/%s/jobs/%s?project=%s' % (region_id, job_id, project))
     self.result.wait_until_finish()
 
     if on_success_matcher:


[45/50] [abbrv] beam git commit: This closes #3595

Posted by jb...@apache.org.
This closes #3595


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2e51bde5
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2e51bde5
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2e51bde5

Branch: refs/heads/DSL_SQL
Commit: 2e51bde5bd3fc2589b0e04f2ced8bd7c24d1046a
Parents: eb0850e d128c3b
Author: Ahmet Altay <al...@google.com>
Authored: Wed Jul 19 14:08:01 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Wed Jul 19 14:08:01 2017 -0700

----------------------------------------------------------------------
 sdks/python/apache_beam/runners/dataflow/dataflow_runner.py | 3 +++
 1 file changed, 3 insertions(+)
----------------------------------------------------------------------



[25/50] [abbrv] beam git commit: This closes #3577: Fix split package in SDK harness

Posted by jb...@apache.org.
This closes #3577: Fix split package in SDK harness


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/2c2d8a35
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/2c2d8a35
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/2c2d8a35

Branch: refs/heads/DSL_SQL
Commit: 2c2d8a35f154a6c5615917a859f58f8fcf7f2789
Parents: 5a0b74c f1b4700
Author: Kenneth Knowles <kl...@google.com>
Authored: Tue Jul 18 12:52:32 2017 -0700
Committer: Kenneth Knowles <kl...@google.com>
Committed: Tue Jul 18 12:52:32 2017 -0700

----------------------------------------------------------------------
 .../beam/fn/harness/BeamFnDataReadRunner.java   | 173 ++++++
 .../beam/fn/harness/BeamFnDataWriteRunner.java  | 159 ++++++
 .../beam/fn/harness/BoundedSourceRunner.java    | 167 ++++++
 .../apache/beam/fn/harness/FnApiDoFnRunner.java | 548 +++++++++++++++++++
 .../fn/harness/PTransformRunnerFactory.java     |  81 +++
 .../harness/control/ProcessBundleHandler.java   |   4 +-
 .../beam/runners/core/BeamFnDataReadRunner.java | 173 ------
 .../runners/core/BeamFnDataWriteRunner.java     | 159 ------
 .../beam/runners/core/BoundedSourceRunner.java  | 167 ------
 .../beam/runners/core/FnApiDoFnRunner.java      | 547 ------------------
 .../runners/core/PTransformRunnerFactory.java   |  81 ---
 .../apache/beam/runners/core/package-info.java  |  22 -
 .../fn/harness/BeamFnDataReadRunnerTest.java    | 281 ++++++++++
 .../fn/harness/BeamFnDataWriteRunnerTest.java   | 269 +++++++++
 .../fn/harness/BoundedSourceRunnerTest.java     | 187 +++++++
 .../beam/fn/harness/FnApiDoFnRunnerTest.java    | 210 +++++++
 .../control/ProcessBundleHandlerTest.java       |   2 +-
 .../runners/core/BeamFnDataReadRunnerTest.java  | 281 ----------
 .../runners/core/BeamFnDataWriteRunnerTest.java | 269 ---------
 .../runners/core/BoundedSourceRunnerTest.java   | 187 -------
 .../beam/runners/core/FnApiDoFnRunnerTest.java  | 210 -------
 21 files changed, 2078 insertions(+), 2099 deletions(-)
----------------------------------------------------------------------



[50/50] [abbrv] beam git commit: This closes #3603

Posted by jb...@apache.org.
This closes #3603


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/ada24c05
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/ada24c05
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/ada24c05

Branch: refs/heads/DSL_SQL
Commit: ada24c059b1337fe02517c9f66fa9d29fb8bcc61
Parents: 152115e afeba37
Author: Jean-Baptiste Onofré <jb...@apache.org>
Authored: Thu Jul 20 21:52:35 2017 +0200
Committer: Jean-Baptiste Onofré <jb...@apache.org>
Committed: Thu Jul 20 21:52:35 2017 +0200

----------------------------------------------------------------------
 .github/PULL_REQUEST_TEMPLATE.md                |   16 +-
 .gitignore                                      |    2 +-
 .../jenkins/common_job_properties.groovy        |    9 +-
 .../job_beam_PerformanceTests_Python.groovy     |   58 +
 ..._beam_PostCommit_Java_JDKVersionsTest.groovy |    2 +
 ..._PostCommit_Java_MavenInstall_Windows.groovy |    3 +-
 .../job_beam_PreCommit_Website_Merge.groovy     |   59 +
 README.md                                       |    4 +-
 examples/java/pom.xml                           |   32 +-
 .../org/apache/beam/examples/WordCount.java     |    4 +
 .../examples/common/WriteOneFilePerWindow.java  |   59 +-
 .../apache/beam/examples/complete/TfIdf.java    |    3 +-
 .../examples/complete/TopWikipediaSessions.java |   24 +-
 .../beam/examples/complete/TrafficRoutes.java   |   19 +
 .../beam/examples/cookbook/TriggerExample.java  |    6 +-
 .../beam/examples/DebuggingWordCountTest.java   |   11 +-
 .../beam/examples/WindowedWordCountIT.java      |    4 +-
 examples/java8/pom.xml                          |   20 +-
 .../complete/game/utils/WriteToText.java        |   49 +-
 .../examples/complete/game/LeaderBoardTest.java |    2 +
 examples/pom.xml                                |    2 +-
 pom.xml                                         |  127 +-
 runners/apex/pom.xml                            |   20 +-
 .../apache/beam/runners/apex/ApexRunner.java    |   61 +-
 .../translation/ApexPipelineTranslator.java     |   16 +-
 .../apex/translation/TranslationContext.java    |    4 +-
 .../operators/ApexParDoOperator.java            |   21 +-
 .../runners/apex/examples/WordCountTest.java    |    8 +-
 .../utils/ApexStateInternalsTest.java           |  411 ++-----
 runners/core-construction-java/pom.xml          |    2 +-
 .../CreatePCollectionViewTranslation.java       |   15 +-
 .../construction/ElementAndRestriction.java     |   42 -
 .../ElementAndRestrictionCoder.java             |   88 --
 .../construction/PCollectionTranslation.java    |   16 +
 .../core/construction/PTransformMatchers.java   |  109 +-
 .../construction/PTransformTranslation.java     |   11 +-
 .../core/construction/ParDoTranslation.java     |   82 +-
 .../construction/RunnerPCollectionView.java     |   31 +-
 .../core/construction/SplittableParDo.java      |  124 +-
 .../construction/TestStreamTranslation.java     |   49 +-
 .../core/construction/TransformInputs.java      |   50 +
 .../WindowingStrategyTranslation.java           |   27 +-
 .../construction/WriteFilesTranslation.java     |   67 +-
 .../construction/metrics/MetricFiltering.java   |  102 ++
 .../core/construction/metrics/MetricKey.java    |   43 +
 .../core/construction/metrics/package-info.java |   22 +
 .../runners/core/metrics/MetricFiltering.java   |  102 --
 .../beam/runners/core/metrics/MetricKey.java    |   43 -
 .../beam/runners/core/metrics/package-info.java |   22 -
 .../ElementAndRestrictionCoderTest.java         |  126 --
 .../PCollectionTranslationTest.java             |   22 +
 .../construction/PTransformMatchersTest.java    |   54 +-
 .../core/construction/ParDoTranslationTest.java |   28 +-
 .../core/construction/SplittableParDoTest.java  |   18 +-
 .../core/construction/TransformInputsTest.java  |  166 +++
 .../WindowingStrategyTranslationTest.java       |    3 +
 .../construction/WriteFilesTranslationTest.java |   68 +-
 .../metrics/MetricFilteringTest.java            |  148 +++
 .../core/metrics/MetricFilteringTest.java       |  148 ---
 runners/core-java/pom.xml                       |    2 +-
 .../runners/core/InMemoryTimerInternals.java    |    9 +
 .../core/LateDataDroppingDoFnRunner.java        |   33 +-
 ...eBoundedSplittableProcessElementInvoker.java |   40 +-
 .../beam/runners/core/ProcessFnRunner.java      |   16 +-
 .../beam/runners/core/ReduceFnRunner.java       |  135 ++-
 .../beam/runners/core/SimpleDoFnRunner.java     |   20 +
 .../core/SplittableParDoViaKeyedWorkItems.java  |   58 +-
 .../core/SplittableProcessElementInvoker.java   |   25 +-
 .../org/apache/beam/runners/core/StateTags.java |    3 +
 .../beam/runners/core/SystemReduceFn.java       |    6 +
 .../runners/core/metrics/MetricUpdates.java     |    1 +
 .../core/metrics/MetricsContainerImpl.java      |    1 +
 .../core/metrics/MetricsContainerStepMap.java   |    2 +
 .../core/triggers/AfterAllStateMachine.java     |   25 +-
 .../AfterDelayFromFirstElementStateMachine.java |    6 +-
 .../core/triggers/AfterFirstStateMachine.java   |   20 +-
 .../core/triggers/AfterPaneStateMachine.java    |    6 +-
 .../triggers/AfterWatermarkStateMachine.java    |    7 +-
 .../triggers/ExecutableTriggerStateMachine.java |   23 +-
 .../core/triggers/NeverStateMachine.java        |    5 +-
 .../core/triggers/TriggerStateMachine.java      |   27 -
 .../core/InMemoryStateInternalsTest.java        |  569 +--------
 ...ndedSplittableProcessElementInvokerTest.java |   47 +-
 .../beam/runners/core/ReduceFnRunnerTest.java   |  374 +++++-
 .../beam/runners/core/ReduceFnTester.java       |   48 +-
 .../core/SplittableParDoProcessFnTest.java      |  117 +-
 .../beam/runners/core/StateInternalsTest.java   |  613 ++++++++++
 .../beam/runners/core/WindowMatchers.java       |   15 +
 .../triggers/AfterFirstStateMachineTest.java    |    5 +-
 .../AfterWatermarkStateMachineTest.java         |    7 +-
 .../core/triggers/StubTriggerStateMachine.java  |    7 +-
 runners/direct-java/pom.xml                     |    7 +-
 .../beam/runners/direct/CommittedResult.java    |   12 +-
 .../apache/beam/runners/direct/DirectGraph.java |   38 +-
 .../beam/runners/direct/DirectGraphVisitor.java |   48 +-
 .../beam/runners/direct/DirectGroupByKey.java   |   13 +-
 .../direct/DirectGroupByKeyOverrideFactory.java |   14 +-
 .../beam/runners/direct/DirectMetrics.java      |    4 +-
 .../beam/runners/direct/DirectRegistrar.java    |    2 +-
 .../beam/runners/direct/DirectRunner.java       |   64 +-
 .../beam/runners/direct/DirectTestOptions.java  |   42 +
 .../runners/direct/DirectTimerInternals.java    |    9 +
 .../beam/runners/direct/EvaluationContext.java  |   26 +-
 .../direct/ExecutorServiceParallelExecutor.java |   27 +-
 .../runners/direct/ParDoEvaluatorFactory.java   |    9 +-
 .../direct/ParDoMultiOverrideFactory.java       |  121 +-
 ...littableProcessElementsEvaluatorFactory.java |   37 +-
 .../direct/StatefulParDoEvaluatorFactory.java   |   12 +-
 .../direct/TestStreamEvaluatorFactory.java      |   20 +-
 .../runners/direct/ViewEvaluatorFactory.java    |    8 +-
 .../runners/direct/ViewOverrideFactory.java     |   69 +-
 .../beam/runners/direct/WatermarkManager.java   |   18 +-
 .../direct/WriteWithShardingFactory.java        |   34 +-
 .../runners/direct/CommittedResultTest.java     |   17 +-
 .../runners/direct/DirectGraphVisitorTest.java  |   10 +-
 .../beam/runners/direct/DirectGraphs.java       |    7 +
 .../beam/runners/direct/DirectMetricsTest.java  |    2 +-
 .../runners/direct/DirectRegistrarTest.java     |    2 +-
 .../runners/direct/EvaluationContextTest.java   |    7 +-
 .../ImmutabilityEnforcementFactoryTest.java     |    4 +-
 .../beam/runners/direct/ParDoEvaluatorTest.java |    7 +-
 .../StatefulParDoEvaluatorFactoryTest.java      |   65 +-
 .../runners/direct/TransformExecutorTest.java   |   12 +-
 .../direct/ViewEvaluatorFactoryTest.java        |    8 +-
 .../runners/direct/ViewOverrideFactoryTest.java |   37 +-
 .../direct/WatermarkCallbackExecutorTest.java   |    1 +
 .../runners/direct/WatermarkManagerTest.java    |   16 +-
 .../direct/WriteWithShardingFactoryTest.java    |   44 +-
 runners/flink/pom.xml                           |   11 +-
 .../runners/flink/CreateStreamingFlinkView.java |  154 +++
 .../flink/FlinkBatchTranslationContext.java     |    3 +-
 .../FlinkPipelineExecutionEnvironment.java      |    2 +
 .../flink/FlinkStreamingPipelineTranslator.java |   86 +-
 .../FlinkStreamingTransformTranslators.java     |   36 +-
 .../flink/FlinkStreamingTranslationContext.java |    3 +-
 .../flink/FlinkStreamingViewOverrides.java      |  372 ------
 .../runners/flink/FlinkTransformOverrides.java  |   53 +
 .../wrappers/streaming/DoFnOperator.java        |    9 +
 .../streaming/SplittableDoFnOperator.java       |   16 +-
 .../streaming/state/FlinkStateInternals.java    |  425 +++----
 .../FlinkBroadcastStateInternalsTest.java       |  242 +---
 .../FlinkKeyGroupStateInternalsTest.java        |  359 +++---
 .../streaming/FlinkSplitStateInternalsTest.java |  132 ++-
 .../streaming/FlinkStateInternalsTest.java      |  343 +-----
 runners/google-cloud-dataflow-java/pom.xml      |   10 +-
 .../dataflow/BatchStatefulParDoOverrides.java   |    4 +
 .../runners/dataflow/BatchViewOverrides.java    |  182 +--
 .../runners/dataflow/CreateDataflowView.java    |    8 +-
 .../beam/runners/dataflow/DataflowMetrics.java  |  306 +++--
 .../runners/dataflow/DataflowPipelineJob.java   |   18 +-
 .../dataflow/DataflowPipelineTranslator.java    |   62 +-
 .../beam/runners/dataflow/DataflowRunner.java   |  139 ++-
 .../dataflow/SplittableParDoOverrides.java      |   76 ++
 .../dataflow/StreamingViewOverrides.java        |   10 +-
 .../runners/dataflow/TransformTranslator.java   |    4 +-
 .../DataflowPipelineWorkerPoolOptions.java      |    3 +
 .../runners/dataflow/util/MonitoringUtil.java   |   16 +-
 .../runners/dataflow/util/PropertyNames.java    |    1 +
 .../beam/runners/dataflow/util/TimeUtil.java    |   24 +-
 .../BatchStatefulParDoOverridesTest.java        |    1 +
 .../runners/dataflow/DataflowMetricsTest.java   |  215 +++-
 .../DataflowPipelineTranslatorTest.java         |   96 +-
 .../runners/dataflow/DataflowRunnerTest.java    |  198 +++-
 .../runners/dataflow/util/TimeUtilTest.java     |    6 +
 runners/pom.xml                                 |    2 +-
 runners/spark/pom.xml                           |   70 +-
 .../spark/SparkNativePipelineVisitor.java       |    3 +-
 .../apache/beam/runners/spark/SparkRunner.java  |    9 +-
 .../beam/runners/spark/TestSparkRunner.java     |    2 +-
 .../SparkGroupAlsoByWindowViaWindowSet.java     |    6 +-
 .../spark/stateful/SparkTimerInternals.java     |   18 +-
 .../spark/translation/EvaluationContext.java    |    4 +-
 .../spark/translation/TransformTranslator.java  |   50 +-
 .../spark/util/GlobalWatermarkHolder.java       |  127 +-
 .../spark/GlobalWatermarkHolderTest.java        |   18 +-
 .../runners/spark/SparkRunnerDebuggerTest.java  |   26 +-
 .../spark/stateful/SparkStateInternalsTest.java |   66 ++
 .../spark/translation/StorageLevelTest.java     |    4 +-
 sdks/common/fn-api/pom.xml                      |    2 +-
 .../fn-api/src/main/proto/beam_fn_api.proto     |  237 +---
 sdks/common/pom.xml                             |    2 +-
 sdks/common/runner-api/pom.xml                  |    2 +-
 .../src/main/proto/beam_runner_api.proto        |   26 +-
 sdks/java/build-tools/pom.xml                   |    2 +-
 .../src/main/resources/beam/checkstyle.xml      |    8 +
 .../src/main/resources/beam/findbugs-filter.xml |    9 +
 sdks/java/core/pom.xml                          |    2 +-
 .../java/org/apache/beam/sdk/coders/Coder.java  |   12 +-
 .../apache/beam/sdk/coders/CoderRegistry.java   |    9 +
 .../apache/beam/sdk/coders/ShardedKeyCoder.java |   66 ++
 .../java/org/apache/beam/sdk/io/AvroIO.java     |  220 ++--
 .../java/org/apache/beam/sdk/io/AvroSink.java   |   32 +-
 .../java/org/apache/beam/sdk/io/AvroSource.java |    6 -
 .../apache/beam/sdk/io/CompressedSource.java    |   40 +-
 .../beam/sdk/io/DefaultFilenamePolicy.java      |  326 ++++--
 .../beam/sdk/io/DynamicFileDestinations.java    |  115 ++
 .../org/apache/beam/sdk/io/FileBasedSink.java   |  684 +++++------
 .../apache/beam/sdk/io/OffsetBasedSource.java   |   22 +-
 .../java/org/apache/beam/sdk/io/TFRecordIO.java |   44 +-
 .../java/org/apache/beam/sdk/io/TextIO.java     |  713 +++++++++---
 .../java/org/apache/beam/sdk/io/TextSink.java   |   22 +-
 .../java/org/apache/beam/sdk/io/WriteFiles.java |  704 +++++++----
 .../beam/sdk/io/range/ByteKeyRangeTracker.java  |   22 +-
 .../apache/beam/sdk/io/range/OffsetRange.java   |  101 ++
 .../beam/sdk/io/range/OffsetRangeTracker.java   |    3 +
 .../sdk/options/PipelineOptionsFactory.java     |   18 +-
 .../sdk/options/PipelineOptionsValidator.java   |   34 +-
 .../sdk/options/ProxyInvocationHandler.java     |   19 +-
 .../beam/sdk/runners/TransformHierarchy.java    |  165 ++-
 .../org/apache/beam/sdk/testing/PAssert.java    |    5 +-
 .../apache/beam/sdk/testing/StaticWindows.java  |    5 +
 .../apache/beam/sdk/testing/StreamingIT.java    |    4 +
 .../apache/beam/sdk/testing/TestPipeline.java   |   63 +-
 .../org/apache/beam/sdk/testing/TestStream.java |   12 +
 .../org/apache/beam/sdk/transforms/Combine.java |   31 +-
 .../org/apache/beam/sdk/transforms/DoFn.java    |   55 +-
 .../apache/beam/sdk/transforms/DoFnTester.java  |   21 +-
 .../org/apache/beam/sdk/transforms/ParDo.java   |   41 +-
 .../sdk/transforms/SerializableFunctions.java   |   50 +
 .../org/apache/beam/sdk/transforms/View.java    |   40 +-
 .../reflect/ByteBuddyDoFnInvokerFactory.java    |   27 +
 .../reflect/ByteBuddyOnTimerInvokerFactory.java |   73 +-
 .../sdk/transforms/reflect/DoFnInvoker.java     |   17 +-
 .../sdk/transforms/reflect/DoFnInvokers.java    |    9 -
 .../sdk/transforms/reflect/DoFnSignature.java   |   33 +-
 .../sdk/transforms/reflect/DoFnSignatures.java  |   44 +-
 .../reflect/OnTimerMethodSpecifier.java         |   37 +
 .../transforms/splittabledofn/OffsetRange.java  |   77 --
 .../splittabledofn/OffsetRangeTracker.java      |   11 +
 .../splittabledofn/RestrictionTracker.java      |   11 +-
 .../sdk/transforms/windowing/GlobalWindows.java |    5 +
 .../windowing/PartitioningWindowFn.java         |    5 +
 .../transforms/windowing/SlidingWindows.java    |    5 +
 .../beam/sdk/transforms/windowing/Window.java   |   32 +
 .../beam/sdk/transforms/windowing/WindowFn.java |   11 +
 .../apache/beam/sdk/util/IdentityWindowFn.java  |    6 +-
 .../org/apache/beam/sdk/values/PCollection.java |   12 +
 .../beam/sdk/values/PCollectionViews.java       |   37 +
 .../java/org/apache/beam/sdk/values/PValue.java |    4 +-
 .../org/apache/beam/sdk/values/PValueBase.java  |   12 -
 .../org/apache/beam/sdk/values/ShardedKey.java  |   65 ++
 .../beam/sdk/values/WindowingStrategy.java      |   46 +-
 .../beam/sdk/coders/DefaultCoderTest.java       |    3 +-
 .../java/org/apache/beam/sdk/io/AvroIOTest.java |  366 +++---
 .../beam/sdk/io/DefaultFilenamePolicyTest.java  |  135 ++-
 .../sdk/io/DrunkWritableByteChannelFactory.java |    2 +-
 .../apache/beam/sdk/io/FileBasedSinkTest.java   |  175 ++-
 .../java/org/apache/beam/sdk/io/SimpleSink.java |   56 +-
 .../org/apache/beam/sdk/io/TextIOReadTest.java  |  847 ++++++++++++++
 .../java/org/apache/beam/sdk/io/TextIOTest.java | 1095 +-----------------
 .../org/apache/beam/sdk/io/TextIOWriteTest.java |  604 ++++++++++
 .../org/apache/beam/sdk/io/WriteFilesTest.java  |  504 ++++++--
 .../beam/sdk/metrics/MetricResultsMatchers.java |    2 +-
 .../options/PipelineOptionsValidatorTest.java   |   44 +
 .../sdk/options/ProxyInvocationHandlerTest.java |   19 +
 .../sdk/runners/TransformHierarchyTest.java     |  197 ++++
 .../sdk/testing/PCollectionViewTesting.java     |    8 +
 .../beam/sdk/testing/TestPipelineTest.java      |   38 +-
 .../apache/beam/sdk/transforms/CombineTest.java |  365 ++++--
 .../beam/sdk/transforms/DoFnTesterTest.java     |   32 +
 .../beam/sdk/transforms/GroupByKeyTest.java     |  195 +++-
 .../apache/beam/sdk/transforms/ParDoTest.java   |  202 ++++
 .../beam/sdk/transforms/SplittableDoFnTest.java |  155 ++-
 .../transforms/reflect/DoFnInvokersTest.java    |   93 +-
 .../DoFnSignaturesProcessElementTest.java       |    2 +-
 .../DoFnSignaturesSplittableDoFnTest.java       |   83 +-
 .../transforms/reflect/DoFnSignaturesTest.java  |   14 +
 .../splittabledofn/OffsetRangeTrackerTest.java  |    1 +
 .../windowing/SlidingWindowsTest.java           |   30 +-
 .../google-cloud-platform-core/pom.xml          |    2 +-
 .../java/org/apache/beam/sdk/util/GcsUtil.java  |    2 +-
 .../sdk/util/RetryHttpRequestInitializer.java   |  147 ++-
 .../extensions/gcp/GcpCoreApiSurfaceTest.java   |   48 +-
 .../util/RetryHttpRequestInitializerTest.java   |   31 +-
 sdks/java/extensions/jackson/pom.xml            |    2 +-
 sdks/java/extensions/join-library/pom.xml       |    2 +-
 sdks/java/extensions/pom.xml                    |    2 +-
 sdks/java/extensions/protobuf/pom.xml           |    2 +-
 sdks/java/extensions/sorter/pom.xml             |    8 +-
 sdks/java/harness/pom.xml                       |   18 +-
 .../beam/fn/harness/BeamFnDataReadRunner.java   |  173 +++
 .../beam/fn/harness/BeamFnDataWriteRunner.java  |  159 +++
 .../beam/fn/harness/BoundedSourceRunner.java    |  167 +++
 .../apache/beam/fn/harness/FnApiDoFnRunner.java |  548 +++++++++
 .../fn/harness/PTransformRunnerFactory.java     |   81 ++
 .../harness/channel/ManagedChannelFactory.java  |    6 +
 .../harness/control/ProcessBundleHandler.java   |  295 ++---
 .../fn/harness/control/RegisterHandler.java     |    2 +-
 .../beam/runners/core/BeamFnDataReadRunner.java |  115 --
 .../runners/core/BeamFnDataWriteRunner.java     |   98 --
 .../beam/runners/core/BoundedSourceRunner.java  |  105 --
 .../apache/beam/runners/core/package-info.java  |   22 -
 .../fn/harness/BeamFnDataReadRunnerTest.java    |  281 +++++
 .../fn/harness/BeamFnDataWriteRunnerTest.java   |  269 +++++
 .../fn/harness/BoundedSourceRunnerTest.java     |  187 +++
 .../beam/fn/harness/FnApiDoFnRunnerTest.java    |  210 ++++
 .../control/ProcessBundleHandlerTest.java       |  521 ++-------
 .../fn/harness/control/RegisterHandlerTest.java |    8 +-
 .../runners/core/BeamFnDataReadRunnerTest.java  |  195 ----
 .../runners/core/BeamFnDataWriteRunnerTest.java |  163 ---
 .../runners/core/BoundedSourceRunnerTest.java   |  113 --
 sdks/java/io/amqp/pom.xml                       |  100 ++
 .../org/apache/beam/sdk/io/amqp/AmqpIO.java     |  399 +++++++
 .../beam/sdk/io/amqp/AmqpMessageCoder.java      |   79 ++
 .../amqp/AmqpMessageCoderProviderRegistrar.java |   44 +
 .../apache/beam/sdk/io/amqp/package-info.java   |   22 +
 .../org/apache/beam/sdk/io/amqp/AmqpIOTest.java |  148 +++
 .../beam/sdk/io/amqp/AmqpMessageCoderTest.java  |   89 ++
 sdks/java/io/cassandra/pom.xml                  |    2 +-
 .../beam/sdk/io/cassandra/CassandraIO.java      |    2 +-
 sdks/java/io/common/pom.xml                     |   12 +-
 .../sdk/io/common/IOTestPipelineOptions.java    |    6 +-
 .../org/apache/beam/sdk/io/common/TestRow.java  |  114 ++
 sdks/java/io/elasticsearch/pom.xml              |   10 +-
 .../sdk/io/elasticsearch/ElasticsearchIO.java   |   17 +-
 .../elasticsearch/ElasticSearchIOTestUtils.java |   81 +-
 .../sdk/io/elasticsearch/ElasticsearchIOIT.java |   14 +-
 .../io/elasticsearch/ElasticsearchIOTest.java   |   36 +-
 .../elasticsearch/ElasticsearchTestDataSet.java |   37 +-
 sdks/java/io/google-cloud-platform/pom.xml      |   14 +-
 .../beam/sdk/io/gcp/bigquery/BatchLoads.java    |    2 +
 .../beam/sdk/io/gcp/bigquery/BigQueryIO.java    |  181 ++-
 .../sdk/io/gcp/bigquery/BigQuerySourceBase.java |   64 +-
 .../io/gcp/bigquery/DynamicDestinations.java    |   29 +-
 .../io/gcp/bigquery/GenerateShardedTable.java   |    1 +
 .../io/gcp/bigquery/PassThroughThenCleanup.java |   46 +-
 .../beam/sdk/io/gcp/bigquery/ShardedKey.java    |   67 --
 .../sdk/io/gcp/bigquery/ShardedKeyCoder.java    |   74 --
 .../sdk/io/gcp/bigquery/StreamingWriteFn.java   |    1 +
 .../io/gcp/bigquery/StreamingWriteTables.java   |    2 +
 .../sdk/io/gcp/bigquery/TagWithUniqueIds.java   |    1 +
 .../io/gcp/bigquery/WriteBundlesToFiles.java    |    2 +
 .../bigquery/WriteGroupedRecordsToFiles.java    |    1 +
 .../sdk/io/gcp/bigquery/WritePartition.java     |    1 +
 .../beam/sdk/io/gcp/bigquery/WriteTables.java   |    1 +
 .../beam/sdk/io/gcp/bigtable/BigtableIO.java    |    8 +-
 .../io/gcp/bigtable/BigtableServiceImpl.java    |    9 +-
 .../sdk/io/gcp/datastore/AdaptiveThrottler.java |  103 ++
 .../beam/sdk/io/gcp/datastore/DatastoreV1.java  |  194 +++-
 .../sdk/io/gcp/datastore/MovingAverage.java     |   50 +
 .../sdk/io/gcp/spanner/AbstractSpannerFn.java   |   58 +
 .../sdk/io/gcp/spanner/CreateTransactionFn.java |   51 +
 .../beam/sdk/io/gcp/spanner/MutationGroup.java  |   67 ++
 .../io/gcp/spanner/MutationSizeEstimator.java   |    9 +
 .../sdk/io/gcp/spanner/NaiveSpannerReadFn.java  |   74 ++
 .../beam/sdk/io/gcp/spanner/ReadOperation.java  |   96 ++
 .../beam/sdk/io/gcp/spanner/SpannerConfig.java  |  137 +++
 .../beam/sdk/io/gcp/spanner/SpannerIO.java      |  715 +++++++++---
 .../sdk/io/gcp/spanner/SpannerWriteGroupFn.java |  125 ++
 .../beam/sdk/io/gcp/spanner/Transaction.java    |   33 +
 .../beam/sdk/io/gcp/GcpApiSurfaceTest.java      |   10 +
 .../sdk/io/gcp/bigquery/BigQueryIOTest.java     |   76 +-
 .../sdk/io/gcp/bigquery/FakeDatasetService.java |    5 +-
 .../sdk/io/gcp/bigtable/BigtableReadIT.java     |    5 +-
 .../io/gcp/bigtable/BigtableTestOptions.java    |    5 -
 .../sdk/io/gcp/bigtable/BigtableWriteIT.java    |    4 +-
 .../io/gcp/datastore/AdaptiveThrottlerTest.java |  111 ++
 .../sdk/io/gcp/datastore/DatastoreV1Test.java   |  141 ++-
 .../beam/sdk/io/gcp/datastore/V1TestUtil.java   |    2 +-
 .../sdk/io/gcp/spanner/FakeServiceFactory.java  |   82 ++
 .../gcp/spanner/MutationSizeEstimatorTest.java  |   12 +
 .../beam/sdk/io/gcp/spanner/RandomUtils.java    |   41 +
 .../sdk/io/gcp/spanner/SpannerIOReadTest.java   |  332 ++++++
 .../beam/sdk/io/gcp/spanner/SpannerIOTest.java  |  244 ----
 .../sdk/io/gcp/spanner/SpannerIOWriteTest.java  |  258 +++++
 .../beam/sdk/io/gcp/spanner/SpannerReadIT.java  |  166 +++
 .../beam/sdk/io/gcp/spanner/SpannerWriteIT.java |   26 +-
 sdks/java/io/hadoop-common/pom.xml              |    2 +-
 sdks/java/io/hadoop-file-system/pom.xml         |   33 +-
 sdks/java/io/hadoop/input-format/pom.xml        |    2 +-
 .../hadoop/inputformat/HadoopInputFormatIO.java |    2 +-
 sdks/java/io/hadoop/jdk1.8-tests/pom.xml        |    4 +-
 .../inputformat/HIFIOWithElasticTest.java       |   11 +-
 sdks/java/io/hadoop/pom.xml                     |    2 +-
 sdks/java/io/hbase/pom.xml                      |   26 +-
 .../io/hbase/HBaseCoderProviderRegistrar.java   |   40 +
 .../org/apache/beam/sdk/io/hbase/HBaseIO.java   |   48 +-
 .../beam/sdk/io/hbase/HBaseMutationCoder.java   |   42 +
 .../hbase/HBaseCoderProviderRegistrarTest.java  |   45 +
 .../apache/beam/sdk/io/hbase/HBaseIOTest.java   |   49 +-
 sdks/java/io/hcatalog/pom.xml                   |  175 +++
 .../apache/beam/sdk/io/hcatalog/HCatalogIO.java |  492 ++++++++
 .../beam/sdk/io/hcatalog/package-info.java      |   22 +
 .../io/hcatalog/EmbeddedMetastoreService.java   |   87 ++
 .../beam/sdk/io/hcatalog/HCatalogIOTest.java    |  277 +++++
 .../sdk/io/hcatalog/HCatalogIOTestUtils.java    |  108 ++
 .../hcatalog/src/test/resources/hive-site.xml   |  301 +++++
 sdks/java/io/jdbc/pom.xml                       |   14 +-
 .../org/apache/beam/sdk/io/jdbc/JdbcIO.java     |    2 +-
 .../org/apache/beam/sdk/io/jdbc/JdbcIOIT.java   |  203 ++--
 .../org/apache/beam/sdk/io/jdbc/JdbcIOTest.java |  115 +-
 .../beam/sdk/io/jdbc/JdbcTestDataSet.java       |  130 ---
 .../apache/beam/sdk/io/jdbc/JdbcTestHelper.java |   81 ++
 sdks/java/io/jms/pom.xml                        |    2 +-
 .../java/org/apache/beam/sdk/io/jms/JmsIO.java  |    2 +-
 sdks/java/io/kafka/pom.xml                      |    2 +-
 .../org/apache/beam/sdk/io/kafka/KafkaIO.java   |  132 ++-
 .../apache/beam/sdk/io/kafka/KafkaIOTest.java   |   30 +
 sdks/java/io/kinesis/pom.xml                    |    2 +-
 .../sdk/io/kinesis/CheckpointGenerator.java     |    6 +-
 .../beam/sdk/io/kinesis/CustomOptional.java     |  111 +-
 .../io/kinesis/DynamicCheckpointGenerator.java  |   52 +-
 .../sdk/io/kinesis/GetKinesisRecordsResult.java |   49 +-
 .../sdk/io/kinesis/KinesisClientProvider.java   |    4 +-
 .../apache/beam/sdk/io/kinesis/KinesisIO.java   |  281 ++---
 .../beam/sdk/io/kinesis/KinesisReader.java      |  206 ++--
 .../sdk/io/kinesis/KinesisReaderCheckpoint.java |   97 +-
 .../beam/sdk/io/kinesis/KinesisRecord.java      |  177 +--
 .../beam/sdk/io/kinesis/KinesisRecordCoder.java |   68 +-
 .../beam/sdk/io/kinesis/KinesisSource.java      |  147 +--
 .../beam/sdk/io/kinesis/RecordFilter.java       |   18 +-
 .../apache/beam/sdk/io/kinesis/RoundRobin.java  |   37 +-
 .../beam/sdk/io/kinesis/ShardCheckpoint.java    |  241 ++--
 .../sdk/io/kinesis/ShardRecordsIterator.java    |  106 +-
 .../sdk/io/kinesis/SimplifiedKinesisClient.java |  215 ++--
 .../beam/sdk/io/kinesis/StartingPoint.java      |   84 +-
 .../io/kinesis/StaticCheckpointGenerator.java   |   27 +-
 .../io/kinesis/TransientKinesisException.java   |    7 +-
 .../beam/sdk/io/kinesis/AmazonKinesisMock.java  |  539 ++++-----
 .../beam/sdk/io/kinesis/CustomOptionalTest.java |   27 +-
 .../kinesis/DynamicCheckpointGeneratorTest.java |   33 +-
 .../sdk/io/kinesis/KinesisMockReadTest.java     |   97 +-
 .../io/kinesis/KinesisReaderCheckpointTest.java |   52 +-
 .../beam/sdk/io/kinesis/KinesisReaderIT.java    |  127 +-
 .../beam/sdk/io/kinesis/KinesisReaderTest.java  |  166 +--
 .../sdk/io/kinesis/KinesisRecordCoderTest.java  |   34 +-
 .../beam/sdk/io/kinesis/KinesisTestOptions.java |   43 +-
 .../beam/sdk/io/kinesis/KinesisUploader.java    |   70 +-
 .../beam/sdk/io/kinesis/RecordFilterTest.java   |   52 +-
 .../beam/sdk/io/kinesis/RoundRobinTest.java     |   42 +-
 .../sdk/io/kinesis/ShardCheckpointTest.java     |  203 ++--
 .../io/kinesis/ShardRecordsIteratorTest.java    |  216 ++--
 .../io/kinesis/SimplifiedKinesisClientTest.java |  351 +++---
 sdks/java/io/mongodb/pom.xml                    |    2 +-
 .../beam/sdk/io/mongodb/MongoDbGridFSIO.java    |    2 +-
 .../apache/beam/sdk/io/mongodb/MongoDbIO.java   |  317 +++--
 .../beam/sdk/io/mongodb/MongoDbIOTest.java      |   37 +
 sdks/java/io/mqtt/pom.xml                       |    2 +-
 .../org/apache/beam/sdk/io/mqtt/MqttIO.java     |    2 +-
 sdks/java/io/pom.xml                            |   35 +-
 sdks/java/io/xml/pom.xml                        |    2 +-
 .../java/org/apache/beam/sdk/io/xml/XmlIO.java  |    4 +-
 .../org/apache/beam/sdk/io/xml/XmlSink.java     |   21 +-
 .../org/apache/beam/sdk/io/xml/XmlSinkTest.java |    4 +-
 sdks/java/java8tests/pom.xml                    |    2 +-
 sdks/java/javadoc/pom.xml                       |   19 +-
 .../maven-archetypes/examples-java8/pom.xml     |    2 +-
 .../main/resources/archetype-resources/pom.xml  |    1 -
 sdks/java/maven-archetypes/examples/pom.xml     |    2 +-
 .../main/resources/archetype-resources/pom.xml  |    1 -
 sdks/java/maven-archetypes/pom.xml              |    2 +-
 sdks/java/maven-archetypes/starter/pom.xml      |    2 +-
 .../resources/projects/basic/reference/pom.xml  |    2 +-
 sdks/java/pom.xml                               |    2 +-
 sdks/pom.xml                                    |    2 +-
 sdks/python/apache_beam/coders/coder_impl.py    |    4 +
 sdks/python/apache_beam/coders/coders.py        |    7 +-
 .../apache_beam/coders/coders_test_common.py    |    8 +
 .../apache_beam/examples/snippets/snippets.py   |    2 +-
 .../examples/snippets/snippets_test.py          |   16 +
 .../apache_beam/examples/streaming_wordcount.py |   25 +-
 .../apache_beam/examples/windowed_wordcount.py  |   93 ++
 sdks/python/apache_beam/io/filesystem.py        |   22 +-
 sdks/python/apache_beam/io/gcp/bigquery.py      |  100 +-
 sdks/python/apache_beam/io/gcp/bigquery_test.py |  105 +-
 .../io/gcp/datastore/v1/datastoreio.py          |   84 +-
 .../io/gcp/datastore/v1/datastoreio_test.py     |   53 +-
 .../apache_beam/io/gcp/datastore/v1/helper.py   |   43 +-
 .../io/gcp/datastore/v1/helper_test.py          |   22 +-
 .../apache_beam/io/gcp/datastore/v1/util.py     |   95 ++
 .../io/gcp/datastore/v1/util_test.py            |   67 ++
 sdks/python/apache_beam/io/gcp/gcsio.py         |   10 +-
 sdks/python/apache_beam/io/gcp/pubsub.py        |  180 ++-
 sdks/python/apache_beam/io/gcp/pubsub_test.py   |  101 +-
 .../io/gcp/tests/bigquery_matcher.py            |    6 +-
 .../io/gcp/tests/bigquery_matcher_test.py       |    2 +-
 sdks/python/apache_beam/io/range_trackers.py    |  130 ---
 .../apache_beam/io/range_trackers_test.py       |  186 ---
 .../apache_beam/options/pipeline_options.py     |   35 +-
 .../options/pipeline_options_test.py            |   39 +-
 .../apache_beam/options/value_provider_test.py  |   93 +-
 sdks/python/apache_beam/pipeline.py             |  230 +++-
 sdks/python/apache_beam/pipeline_test.py        |   53 +
 sdks/python/apache_beam/portability/__init__.py |   18 +
 .../apache_beam/portability/api/__init__.py     |   21 +
 sdks/python/apache_beam/pvalue.py               |    2 +-
 sdks/python/apache_beam/runners/api/__init__.py |   21 -
 .../runners/dataflow/dataflow_runner.py         |  112 +-
 .../runners/dataflow/dataflow_runner_test.py    |   24 +-
 .../runners/dataflow/internal/apiclient.py      |   42 +-
 .../runners/dataflow/internal/apiclient_test.py |   29 +-
 .../runners/dataflow/internal/dependency.py     |   69 +-
 .../runners/dataflow/native_io/iobase_test.py   |   39 +-
 .../dataflow/native_io/streaming_create.py      |   72 ++
 .../runners/dataflow/ptransform_overrides.py    |   52 +
 .../runners/dataflow/test_dataflow_runner.py    |    5 +-
 .../runners/direct/bundle_factory.py            |    2 +-
 .../apache_beam/runners/direct/direct_runner.py |  108 ++
 .../runners/direct/evaluation_context.py        |   73 +-
 .../apache_beam/runners/direct/executor.py      |  135 ++-
 .../runners/direct/transform_evaluator.py       |  447 ++++++-
 .../runners/direct/transform_result.py          |   41 -
 sdks/python/apache_beam/runners/direct/util.py  |   67 ++
 .../runners/direct/watermark_manager.py         |  100 +-
 .../apache_beam/runners/pipeline_context.py     |   19 +-
 .../runners/portability/fn_api_runner.py        |  306 +++--
 .../runners/portability/fn_api_runner_test.py   |   31 +-
 .../runners/worker/bundle_processor.py          |  426 +++++++
 .../apache_beam/runners/worker/data_plane.py    |   36 +-
 .../runners/worker/data_plane_test.py           |    2 +-
 .../apache_beam/runners/worker/log_handler.py   |    2 +-
 .../runners/worker/log_handler_test.py          |    2 +-
 .../runners/worker/operation_specs.py           |    9 +-
 .../apache_beam/runners/worker/operations.py    |    1 +
 .../apache_beam/runners/worker/sdk_worker.py    |  370 +-----
 .../runners/worker/sdk_worker_main.py           |    2 +-
 .../runners/worker/sdk_worker_test.py           |   95 +-
 sdks/python/apache_beam/testing/test_stream.py  |    5 +
 .../apache_beam/testing/test_stream_test.py     |   68 ++
 sdks/python/apache_beam/transforms/combiners.py |    8 +
 .../apache_beam/transforms/combiners_test.py    |    7 +-
 sdks/python/apache_beam/transforms/core.py      |  104 +-
 .../python/apache_beam/transforms/ptransform.py |   43 +-
 sdks/python/apache_beam/transforms/trigger.py   |   49 +-
 sdks/python/apache_beam/transforms/window.py    |    4 +-
 .../apache_beam/typehints/trivial_inference.py  |    3 +-
 .../typehints/trivial_inference_test.py         |    7 +
 sdks/python/apache_beam/utils/plugin.py         |   42 +
 sdks/python/apache_beam/utils/timestamp.py      |    5 +
 sdks/python/apache_beam/utils/urns.py           |    2 +-
 sdks/python/apache_beam/version.py              |    2 +-
 sdks/python/gen_protos.py                       |    2 +-
 sdks/python/pom.xml                             |    2 +-
 sdks/python/run_pylint.sh                       |    2 +-
 sdks/python/setup.py                            |    5 +-
 535 files changed, 26736 insertions(+), 13977 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/ada24c05/pom.xml
----------------------------------------------------------------------


[30/50] [abbrv] beam git commit: [BEAM-2544] Fix flaky AvroIOTest by eliminating race condition in "write then read" tests.

Posted by jb...@apache.org.
[BEAM-2544] Fix flaky AvroIOTest by eliminating race condition in "write then read" tests.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/911edbad
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/911edbad
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/911edbad

Branch: refs/heads/DSL_SQL
Commit: 911edbade388a63626e0ad6f8b7c2ad7a9f9b7c2
Parents: dd9e866
Author: Alex Filatov <al...@users.noreply.github.com>
Authored: Thu Jun 29 23:23:04 2017 +0300
Committer: Eugene Kirpichov <ki...@google.com>
Committed: Tue Jul 18 15:49:44 2017 -0700

----------------------------------------------------------------------
 .../java/org/apache/beam/sdk/io/AvroIOTest.java | 46 +++++++++++---------
 1 file changed, 25 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/911edbad/sdks/java/core/src/test/java/org/apache/beam/sdk/io/AvroIOTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/AvroIOTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/AvroIOTest.java
index 4a1386c..4380c57 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/AvroIOTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/AvroIOTest.java
@@ -90,7 +90,11 @@ import org.junit.runners.JUnit4;
 @RunWith(JUnit4.class)
 public class AvroIOTest {
 
-  @Rule public TestPipeline p = TestPipeline.create();
+  @Rule
+  public TestPipeline writePipeline = TestPipeline.create();
+
+  @Rule
+  public TestPipeline readPipeline = TestPipeline.create();
 
   @Rule public TemporaryFolder tmpFolder = new TemporaryFolder();
 
@@ -144,15 +148,15 @@ public class AvroIOTest {
         ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
     File outputFile = tmpFolder.newFile("output.avro");
 
-    p.apply(Create.of(values))
+    writePipeline.apply(Create.of(values))
         .apply(AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding());
-    p.run();
+    writePipeline.run().waitUntilFinish();
 
     PCollection<GenericClass> input =
-        p.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath()));
+        readPipeline.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath()));
 
     PAssert.that(input).containsInAnyOrder(values);
-    p.run();
+    readPipeline.run();
   }
 
   @Test
@@ -163,19 +167,19 @@ public class AvroIOTest {
         ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
     File outputFile = tmpFolder.newFile("output.avro");
 
-    p.apply(Create.of(values))
+    writePipeline.apply(Create.of(values))
         .apply(
             AvroIO.write(GenericClass.class)
                 .to(outputFile.getAbsolutePath())
                 .withoutSharding()
                 .withCodec(CodecFactory.deflateCodec(9)));
-    p.run();
+    writePipeline.run().waitUntilFinish();
 
     PCollection<GenericClass> input =
-        p.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath()));
+        readPipeline.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath()));
 
     PAssert.that(input).containsInAnyOrder(values);
-    p.run();
+    readPipeline.run();
     DataFileStream dataFileStream =
         new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader());
     assertEquals("deflate", dataFileStream.getMetaString("avro.codec"));
@@ -189,19 +193,19 @@ public class AvroIOTest {
         ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
     File outputFile = tmpFolder.newFile("output.avro");
 
-    p.apply(Create.of(values))
+    writePipeline.apply(Create.of(values))
         .apply(
             AvroIO.write(GenericClass.class)
                 .to(outputFile.getAbsolutePath())
                 .withoutSharding()
                 .withCodec(CodecFactory.nullCodec()));
-    p.run();
+    writePipeline.run().waitUntilFinish();
 
     PCollection<GenericClass> input =
-        p.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath()));
+        readPipeline.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath()));
 
     PAssert.that(input).containsInAnyOrder(values);
-    p.run();
+    readPipeline.run();
     DataFileStream dataFileStream =
         new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader());
     assertEquals("null", dataFileStream.getMetaString("avro.codec"));
@@ -261,18 +265,18 @@ public class AvroIOTest {
         ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
     File outputFile = tmpFolder.newFile("output.avro");
 
-    p.apply(Create.of(values))
+    writePipeline.apply(Create.of(values))
         .apply(AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding());
-    p.run();
+    writePipeline.run().waitUntilFinish();
 
     List<GenericClassV2> expected =
         ImmutableList.of(new GenericClassV2(3, "hi", null), new GenericClassV2(5, "bar", null));
 
     PCollection<GenericClassV2> input =
-        p.apply(AvroIO.read(GenericClassV2.class).from(outputFile.getAbsolutePath()));
+        readPipeline.apply(AvroIO.read(GenericClassV2.class).from(outputFile.getAbsolutePath()));
 
     PAssert.that(input).containsInAnyOrder(expected);
-    p.run();
+    readPipeline.run();
   }
 
   private static class WindowedFilenamePolicy extends FilenamePolicy {
@@ -467,7 +471,7 @@ public class AvroIOTest {
         ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
     File outputFile = tmpFolder.newFile("output.avro");
 
-    p.apply(Create.of(values))
+    writePipeline.apply(Create.of(values))
         .apply(
             AvroIO.write(GenericClass.class)
                 .to(outputFile.getAbsolutePath())
@@ -480,7 +484,7 @@ public class AvroIOTest {
                         100L,
                         "bytesKey",
                         "bytesValue".getBytes())));
-    p.run();
+    writePipeline.run();
 
     DataFileStream dataFileStream =
         new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader());
@@ -502,8 +506,8 @@ public class AvroIOTest {
       System.out.println("no sharding");
       write = write.withoutSharding();
     }
-    p.apply(Create.of(ImmutableList.copyOf(expectedElements))).apply(write);
-    p.run();
+    writePipeline.apply(Create.of(ImmutableList.copyOf(expectedElements))).apply(write);
+    writePipeline.run();
 
     String shardNameTemplate =
         firstNonNull(


[04/50] [abbrv] beam git commit: This closes #3559

Posted by jb...@apache.org.
This closes #3559


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/7e4719cd
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/7e4719cd
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/7e4719cd

Branch: refs/heads/DSL_SQL
Commit: 7e4719cd01f412c000bab61f99c687ab5c5491cc
Parents: ae0de1b 095e791
Author: Ahmet Altay <al...@google.com>
Authored: Mon Jul 17 09:16:14 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Mon Jul 17 09:16:14 2017 -0700

----------------------------------------------------------------------
 .../apache_beam/io/gcp/datastore/v1/helper.py   |  8 +++++++
 .../io/gcp/datastore/v1/helper_test.py          | 22 ++++++++++++++++----
 2 files changed, 26 insertions(+), 4 deletions(-)
----------------------------------------------------------------------



[28/50] [abbrv] beam git commit: This closes #3455

Posted by jb...@apache.org.
This closes #3455


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/dd9e866e
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/dd9e866e
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/dd9e866e

Branch: refs/heads/DSL_SQL
Commit: dd9e866e087351e395e464e07d512b2e8db107c4
Parents: 2d5b6d7 111603a
Author: Thomas Groh <tg...@google.com>
Authored: Tue Jul 18 14:49:56 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Tue Jul 18 14:49:56 2017 -0700

----------------------------------------------------------------------
 .../beam/runners/dataflow/DataflowPipelineJob.java  | 14 ++++++++++++--
 .../beam/runners/dataflow/DataflowRunner.java       |  3 ++-
 .../beam/runners/dataflow/util/MonitoringUtil.java  | 16 +++++++++++++---
 .../dataflow/BatchStatefulParDoOverridesTest.java   |  1 +
 .../dataflow/DataflowPipelineTranslatorTest.java    |  1 +
 .../runners/dataflow/internal/apiclient.py          |  7 +++++--
 .../runners/dataflow/test_dataflow_runner.py        |  5 +++--
 7 files changed, 37 insertions(+), 10 deletions(-)
----------------------------------------------------------------------



[16/50] [abbrv] beam git commit: This closes #3463

Posted by jb...@apache.org.
This closes #3463


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/0f06eb25
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/0f06eb25
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/0f06eb25

Branch: refs/heads/DSL_SQL
Commit: 0f06eb25bcc9c6bf9fb596a6ddc3a853f339b74d
Parents: 04d364d c5ebbff
Author: Thomas Groh <tg...@google.com>
Authored: Mon Jul 17 16:01:38 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Mon Jul 17 16:01:38 2017 -0700

----------------------------------------------------------------------
 .../beam/runners/dataflow/DataflowMetrics.java  | 30 +++++++----
 .../runners/dataflow/DataflowMetricsTest.java   | 53 +++++++++++++++-----
 2 files changed, 59 insertions(+), 24 deletions(-)
----------------------------------------------------------------------



[33/50] [abbrv] beam git commit: This closes #3576

Posted by jb...@apache.org.
This closes #3576


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/0d927ef6
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/0d927ef6
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/0d927ef6

Branch: refs/heads/DSL_SQL
Commit: 0d927ef6ab0fa5dd03a6b38ea9fe9bf4444eacd8
Parents: be5b934 1e94704
Author: Thomas Groh <tg...@google.com>
Authored: Tue Jul 18 17:52:56 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Tue Jul 18 17:52:56 2017 -0700

----------------------------------------------------------------------
 .../beam/sdk/transforms/GroupByKeyTest.java     | 156 +++++++++++++++----
 1 file changed, 122 insertions(+), 34 deletions(-)
----------------------------------------------------------------------



[38/50] [abbrv] beam git commit: [BEAM-2306] Add checkstyle check to fail the build when @Deprecated is used without @deprecated javadoc (or vice versa).

Posted by jb...@apache.org.
[BEAM-2306] Add checkstyle check to fail the build when @Deprecated is used without @deprecated javadoc (or vice versa).

The check is disabled for existing violations where reason for deprecation and/or alternative is not clear.


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d2901145
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d2901145
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d2901145

Branch: refs/heads/DSL_SQL
Commit: d290114549c0b379774dbabe119a79d3ee1b2b56
Parents: 7fde976
Author: Alex Filatov <al...@users.noreply.github.com>
Authored: Mon Jul 10 13:20:49 2017 +0300
Committer: Kenneth Knowles <kl...@google.com>
Committed: Wed Jul 19 09:03:31 2017 -0700

----------------------------------------------------------------------
 .../construction/CreatePCollectionViewTranslation.java  | 11 ++++++++++-
 .../core/construction/PTransformTranslation.java        |  4 ++++
 .../beam/runners/core/InMemoryTimerInternals.java       |  9 +++++++++
 .../java/org/apache/beam/runners/core/StateTags.java    |  3 +++
 .../beam/runners/direct/DirectTimerInternals.java       |  9 +++++++++
 .../translation/wrappers/streaming/DoFnOperator.java    |  9 +++++++++
 .../apache/beam/runners/dataflow/DataflowRunner.java    |  3 ++-
 .../options/DataflowPipelineWorkerPoolOptions.java      |  3 +++
 .../build-tools/src/main/resources/beam/checkstyle.xml  |  8 ++++++++
 .../src/main/java/org/apache/beam/sdk/coders/Coder.java | 12 +++++++++++-
 .../java/org/apache/beam/sdk/coders/CoderRegistry.java  |  9 +++++++++
 .../main/java/org/apache/beam/sdk/io/AvroSource.java    |  6 ------
 .../main/java/org/apache/beam/sdk/testing/PAssert.java  |  5 +++--
 .../java/org/apache/beam/sdk/testing/StreamingIT.java   |  4 ++++
 .../java/org/apache/beam/sdk/transforms/Combine.java    |  1 -
 .../main/java/org/apache/beam/sdk/transforms/DoFn.java  |  3 +++
 .../main/java/org/apache/beam/sdk/transforms/View.java  |  2 +-
 .../beam/sdk/transforms/reflect/DoFnInvokers.java       |  9 ---------
 .../java/org/apache/beam/sdk/util/IdentityWindowFn.java |  1 -
 .../org/apache/beam/sdk/values/PCollectionViews.java    |  1 -
 .../main/java/org/apache/beam/sdk/values/PValue.java    |  4 ++--
 .../org/apache/beam/sdk/coders/DefaultCoderTest.java    |  3 ++-
 .../org/apache/beam/fn/harness/BoundedSourceRunner.java |  6 +++---
 23 files changed, 95 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CreatePCollectionViewTranslation.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CreatePCollectionViewTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CreatePCollectionViewTranslation.java
index 8fc99b9..c67d688 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CreatePCollectionViewTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/CreatePCollectionViewTranslation.java
@@ -86,6 +86,10 @@ public class CreatePCollectionViewTranslation {
             PCollectionView.class.getSimpleName());
   }
 
+  /**
+   * @deprecated runners should move away from translating `CreatePCollectionView` and treat this
+   * as part of the translation for a `ParDo` side input.
+   */
   @Deprecated
   static class CreatePCollectionViewTranslator
       implements TransformPayloadTranslator<View.CreatePCollectionView<?, ?>> {
@@ -112,7 +116,12 @@ public class CreatePCollectionViewTranslation {
     }
   }
 
-  /** Registers {@link CreatePCollectionViewTranslator}. */
+  /**
+   * Registers {@link CreatePCollectionViewTranslator}.
+   *
+   * @deprecated runners should move away from translating `CreatePCollectionView` and treat this
+   * as part of the translation for a `ParDo` side input.
+   */
   @AutoService(TransformPayloadTranslatorRegistrar.class)
   @Deprecated
   public static class Registrar implements TransformPayloadTranslatorRegistrar {

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java
----------------------------------------------------------------------
diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java
index bae7b05..0b4a2ab 100644
--- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java
+++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/PTransformTranslation.java
@@ -55,6 +55,10 @@ public class PTransformTranslation {
   // Less well-known. And where shall these live?
   public static final String WRITE_FILES_TRANSFORM_URN = "urn:beam:transform:write_files:0.1";
 
+  /**
+   * @deprecated runners should move away from translating `CreatePCollectionView` and treat this
+   * as part of the translation for a `ParDo` side input.
+   */
   @Deprecated
   public static final String CREATE_VIEW_TRANSFORM_URN = "urn:beam:transform:create_view:v1";
 

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/runners/core-java/src/main/java/org/apache/beam/runners/core/InMemoryTimerInternals.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/InMemoryTimerInternals.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/InMemoryTimerInternals.java
index e68bb24..c29ea19 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/InMemoryTimerInternals.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/InMemoryTimerInternals.java
@@ -107,6 +107,9 @@ public class InMemoryTimerInternals implements TimerInternals {
     setTimer(TimerData.of(timerId, namespace, target, timeDomain));
   }
 
+  /**
+   * @deprecated use {@link #setTimer(StateNamespace, String, Instant, TimeDomain)}.
+   */
   @Deprecated
   @Override
   public void setTimer(TimerData timerData) {
@@ -136,6 +139,9 @@ public class InMemoryTimerInternals implements TimerInternals {
     throw new UnsupportedOperationException("Canceling a timer by ID is not yet supported.");
   }
 
+  /**
+   * @deprecated use {@link #deleteTimer(StateNamespace, String, TimeDomain)}.
+   */
   @Deprecated
   @Override
   public void deleteTimer(StateNamespace namespace, String timerId) {
@@ -145,6 +151,9 @@ public class InMemoryTimerInternals implements TimerInternals {
     }
   }
 
+  /**
+   * @deprecated use {@link #deleteTimer(StateNamespace, String, TimeDomain)}.
+   */
   @Deprecated
   @Override
   public void deleteTimer(TimerData timer) {

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/runners/core-java/src/main/java/org/apache/beam/runners/core/StateTags.java
----------------------------------------------------------------------
diff --git a/runners/core-java/src/main/java/org/apache/beam/runners/core/StateTags.java b/runners/core-java/src/main/java/org/apache/beam/runners/core/StateTags.java
index 53f9edc..a98f47d 100644
--- a/runners/core-java/src/main/java/org/apache/beam/runners/core/StateTags.java
+++ b/runners/core-java/src/main/java/org/apache/beam/runners/core/StateTags.java
@@ -302,6 +302,9 @@ public class StateTags {
       this.spec = spec;
     }
 
+    /**
+     * @deprecated use {@link StateSpec#bind} method via {@link #getSpec} for now.
+     */
     @Override
     @Deprecated
     public StateT bind(StateTag.StateBinder binder) {

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTimerInternals.java
----------------------------------------------------------------------
diff --git a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTimerInternals.java b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTimerInternals.java
index a099368..7db12a4 100644
--- a/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTimerInternals.java
+++ b/runners/direct-java/src/main/java/org/apache/beam/runners/direct/DirectTimerInternals.java
@@ -52,6 +52,9 @@ class DirectTimerInternals implements TimerInternals {
     timerUpdateBuilder.setTimer(TimerData.of(timerId, namespace, target, timeDomain));
   }
 
+  /**
+   * @deprecated use {@link #setTimer(StateNamespace, String, Instant, TimeDomain)}.
+   */
   @Deprecated
   @Override
   public void setTimer(TimerData timerData) {
@@ -63,12 +66,18 @@ class DirectTimerInternals implements TimerInternals {
     throw new UnsupportedOperationException("Canceling of timer by ID is not yet supported.");
   }
 
+  /**
+   * @deprecated use {@link #deleteTimer(StateNamespace, String, TimeDomain)}.
+   */
   @Deprecated
   @Override
   public void deleteTimer(StateNamespace namespace, String timerId) {
     throw new UnsupportedOperationException("Canceling of timer by ID is not yet supported.");
   }
 
+  /**
+   * @deprecated use {@link #deleteTimer(StateNamespace, String, TimeDomain)}.
+   */
   @Deprecated
   @Override
   public void deleteTimer(TimerData timerKey) {

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
----------------------------------------------------------------------
diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
index 350f323..a80f7b6 100644
--- a/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
+++ b/runners/flink/src/main/java/org/apache/beam/runners/flink/translation/wrappers/streaming/DoFnOperator.java
@@ -752,6 +752,9 @@ public class DoFnOperator<InputT, OutputT>
       setTimer(TimerData.of(timerId, namespace, target, timeDomain));
     }
 
+    /**
+     * @deprecated use {@link #setTimer(StateNamespace, String, Instant, TimeDomain)}.
+     */
     @Deprecated
     @Override
     public void setTimer(TimerData timerKey) {
@@ -770,6 +773,9 @@ public class DoFnOperator<InputT, OutputT>
       }
     }
 
+    /**
+     * @deprecated use {@link #deleteTimer(StateNamespace, String, TimeDomain)}.
+     */
     @Deprecated
     @Override
     public void deleteTimer(StateNamespace namespace, String timerId) {
@@ -783,6 +789,9 @@ public class DoFnOperator<InputT, OutputT>
           "Canceling of a timer by ID is not yet supported.");
     }
 
+    /**
+     * @deprecated use {@link #deleteTimer(StateNamespace, String, TimeDomain)}.
+     */
     @Deprecated
     @Override
     public void deleteTimer(TimerData timerKey) {

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
index 57a5ea5..7335ef7 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowRunner.java
@@ -82,6 +82,7 @@ import org.apache.beam.sdk.Pipeline.PipelineVisitor;
 import org.apache.beam.sdk.PipelineResult.State;
 import org.apache.beam.sdk.PipelineRunner;
 import org.apache.beam.sdk.annotations.Experimental;
+import org.apache.beam.sdk.annotations.Internal;
 import org.apache.beam.sdk.coders.ByteArrayCoder;
 import org.apache.beam.sdk.coders.Coder;
 import org.apache.beam.sdk.coders.Coder.NonDeterministicException;
@@ -1293,7 +1294,7 @@ public class DataflowRunner extends PipelineRunner<DataflowPipelineJob> {
    * A marker {@link DoFn} for writing the contents of a {@link PCollection} to a streaming
    * {@link PCollectionView} backend implementation.
    */
-  @Deprecated
+  @Internal
   public static class StreamingPCollectionViewWriterFn<T> extends DoFn<Iterable<T>, T> {
     private final PCollectionView<?> view;
     private final Coder<T> dataCoder;

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineWorkerPoolOptions.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineWorkerPoolOptions.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineWorkerPoolOptions.java
index 00d2194..2239462 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineWorkerPoolOptions.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/options/DataflowPipelineWorkerPoolOptions.java
@@ -53,6 +53,9 @@ public interface DataflowPipelineWorkerPoolOptions extends PipelineOptions {
     /** Use numWorkers machines. Do not autoscale the worker pool. */
     NONE("AUTOSCALING_ALGORITHM_NONE"),
 
+    /**
+     * @deprecated use {@link #THROUGHPUT_BASED}.
+     */
     @Deprecated
     BASIC("AUTOSCALING_ALGORITHM_BASIC"),
 

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/build-tools/src/main/resources/beam/checkstyle.xml
----------------------------------------------------------------------
diff --git a/sdks/java/build-tools/src/main/resources/beam/checkstyle.xml b/sdks/java/build-tools/src/main/resources/beam/checkstyle.xml
index ebbaa7d..b2a74a7 100644
--- a/sdks/java/build-tools/src/main/resources/beam/checkstyle.xml
+++ b/sdks/java/build-tools/src/main/resources/beam/checkstyle.xml
@@ -81,6 +81,14 @@ page at http://checkstyle.sourceforge.net/config.html -->
 
     <!--
 
+    ANNOTATIONS CHECKS
+
+    -->
+
+    <module name="MissingDeprecated" />
+
+    <!--
+
     IMPORT CHECKS
 
     -->

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java
index edcc3a8..78a4a02 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/Coder.java
@@ -56,7 +56,13 @@ import org.apache.beam.sdk.values.TypeDescriptor;
  * @param <T> the type of values being encoded and decoded
  */
 public abstract class Coder<T> implements Serializable {
-  /** The context in which encoding or decoding is being done. */
+  /**
+   * The context in which encoding or decoding is being done.
+   *
+   * @deprecated to implement a coder, do not use any `Context`. Just implement only those abstract
+   * methods which do not accept a `Context` and leave the default implementations for methods
+   * accepting a `Context`.
+   */
   @Deprecated
   @Experimental(Kind.CODER_CONTEXT)
   public static class Context {
@@ -127,6 +133,8 @@ public abstract class Coder<T> implements Serializable {
    * @throws IOException if writing to the {@code OutputStream} fails
    * for some reason
    * @throws CoderException if the value could not be encoded for some reason
+   *
+   * @deprecated only implement and call {@link #encode(Object value, OutputStream)}
    */
   @Deprecated
   @Experimental(Kind.CODER_CONTEXT)
@@ -152,6 +160,8 @@ public abstract class Coder<T> implements Serializable {
    * @throws IOException if reading from the {@code InputStream} fails
    * for some reason
    * @throws CoderException if the value could not be decoded for some reason
+   *
+   * @deprecated only implement and call {@link #decode(InputStream)}
    */
   @Deprecated
   @Experimental(Kind.CODER_CONTEXT)

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/CoderRegistry.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/CoderRegistry.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/CoderRegistry.java
index 2ba548a..53cb6d3 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/CoderRegistry.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/coders/CoderRegistry.java
@@ -234,6 +234,9 @@ public class CoderRegistry {
    * type uses the given {@link Coder}.
    *
    * @throws CannotProvideCoderException if a {@link Coder} cannot be provided
+   *
+   * @deprecated This method is to change in an unknown backwards incompatible way once support for
+   * this functionality is refined.
    */
   @Deprecated
   @Internal
@@ -254,6 +257,9 @@ public class CoderRegistry {
    * used for its input elements.
    *
    * @throws CannotProvideCoderException if a {@link Coder} cannot be provided
+   *
+   * @deprecated This method is to change in an unknown backwards incompatible way once support for
+   * this functionality is refined.
    */
   @Deprecated
   @Internal
@@ -276,6 +282,9 @@ public class CoderRegistry {
    * subclass, given {@link Coder Coders} to use for all other type parameters (if any).
    *
    * @throws CannotProvideCoderException if a {@link Coder} cannot be provided
+   *
+   * @deprecated This method is to change in an unknown backwards incompatible way once support for
+   * this functionality is refined.
    */
   @Deprecated
   @Internal

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
index 37bbe46..575218b 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/io/AvroSource.java
@@ -237,12 +237,6 @@ public class AvroSource<T> extends BlockBasedSource<T> {
     super.validate();
   }
 
-  @Deprecated // Added to let DataflowRunner migrate off of this; to be deleted.
-  public BlockBasedSource<T> createForSubrangeOfFile(String fileName, long start, long end)
-      throws IOException {
-    return createForSubrangeOfFile(FileSystems.matchSingleFileSpec(fileName), start, end);
-  }
-
   @Override
   public BlockBasedSource<T> createForSubrangeOfFile(Metadata fileMetadata, long start, long end) {
     byte[] syncMarker = this.syncMarker;

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
index 6e2b8c6..ed80f2f 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/PAssert.java
@@ -861,8 +861,9 @@ public class PAssert {
     }
 
     /**
-     * Always throws an {@link UnsupportedOperationException}: users are probably looking for
-     * {@link #isEqualTo}.
+     * @throws UnsupportedOperationException always
+     * @deprecated {@link Object#equals(Object)} is not supported on PAssert objects. If you meant
+     * to test object equality, use {@link #isEqualTo} instead.
      */
     @Deprecated
     @Override

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
index 427b908..475372d 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/testing/StreamingIT.java
@@ -29,6 +29,10 @@ package org.apache.beam.sdk.testing;
  *       StreamingPipeline.main(...);
  *     }
  * </code></pre>
+ *
+ * @deprecated tests which use unbounded PCollections should be in the category
+ * {@link UsesUnboundedPCollections}. Beyond that, it is up to the runner and test configuration
+ * to decide whether to run in streaming mode.
  */
 @Deprecated
 public interface StreamingIT {

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Combine.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Combine.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Combine.java
index d7effb5..c195352 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Combine.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Combine.java
@@ -1416,7 +1416,6 @@ public class Combine {
      * Returns a {@code CombineFn} that uses the given
      * {@code SerializableFunction} to combine values.
      */
-    @Deprecated
     public static <V> SimpleCombineFn<V> of(
         SerializableFunction<Iterable<V>, V> combiner) {
       return new SimpleCombineFn<>(combiner);

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
index 1b809c2..37c6263 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/DoFn.java
@@ -728,6 +728,9 @@ public abstract class DoFn<InputT, OutputT> implements Serializable, HasDisplayD
   /**
    * Finalize the {@link DoFn} construction to prepare for processing.
    * This method should be called by runners before any processing methods.
+   *
+   * @deprecated use {@link Setup} or {@link StartBundle} instead. This method will be removed in a
+   * future release.
    */
   @Deprecated
   public final void prepareForProcessing() {}

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/View.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/View.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/View.java
index 331b143..c94fad6 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/View.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/View.java
@@ -497,7 +497,7 @@ public class View {
     /**
      * Return the {@link PCollectionView} that is returned by applying this {@link PTransform}.
      *
-     * <p>This should not be used to obtain the output of any given application of this
+     * @deprecated This should not be used to obtain the output of any given application of this
      * {@link PTransform}. That should be obtained by inspecting the {@link Node}
      * that contains this {@link CreatePCollectionView}, as this view may have been replaced within
      * pipeline surgery.

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvokers.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvokers.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvokers.java
index 33c5a6a..44b87a0 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvokers.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/reflect/DoFnInvokers.java
@@ -17,7 +17,6 @@
  */
 package org.apache.beam.sdk.transforms.reflect;
 
-import java.io.Serializable;
 import org.apache.beam.sdk.transforms.DoFn;
 
 /** Static utilities for working with {@link DoFnInvoker}. */
@@ -36,13 +35,5 @@ public class DoFnInvokers {
     return ByteBuddyDoFnInvokerFactory.only().newByteBuddyInvoker(fn);
   }
 
-  /** TODO: remove this when Dataflow worker uses the DoFn overload. */
-  @Deprecated
-  @SuppressWarnings({"unchecked"})
-  public static <InputT, OutputT> DoFnInvoker<InputT, OutputT> invokerFor(
-      Serializable fn) {
-    return invokerFor((DoFn) fn);
-  }
-
   private DoFnInvokers() {}
 }

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/util/IdentityWindowFn.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/IdentityWindowFn.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/IdentityWindowFn.java
index ef6d833..54ac77c 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/util/IdentityWindowFn.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/util/IdentityWindowFn.java
@@ -111,7 +111,6 @@ public class IdentityWindowFn<T> extends NonMergingWindowFn<T, BoundedWindow> {
             getClass().getCanonicalName()));
   }
 
-  @Deprecated
   @Override
   public Instant getOutputTime(Instant inputTimestamp, BoundedWindow window) {
     return inputTimestamp;

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PCollectionViews.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PCollectionViews.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PCollectionViews.java
index e17e146..f2a3097 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PCollectionViews.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PCollectionViews.java
@@ -186,7 +186,6 @@ public class PCollectionViews {
     /**
      * Returns if a default value was specified.
      */
-    @Deprecated
     @Internal
     public boolean hasDefault() {
       return hasDefault;

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PValue.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PValue.java b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PValue.java
index 1089028..71f9465 100644
--- a/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PValue.java
+++ b/sdks/java/core/src/main/java/org/apache/beam/sdk/values/PValue.java
@@ -37,8 +37,8 @@ public interface PValue extends POutput, PInput {
   /**
    * {@inheritDoc}.
    *
-   * <p>A {@link PValue} always expands into itself. Calling {@link #expand()} on a PValue is almost
-   * never appropriate.
+   * @deprecated A {@link PValue} always expands into itself. Calling {@link #expand()} on a PValue
+   * is almost never appropriate.
    */
   @Deprecated
   Map<TupleTag<?>, PValue> expand();

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DefaultCoderTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DefaultCoderTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DefaultCoderTest.java
index aa8d94c..274fef4 100644
--- a/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DefaultCoderTest.java
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/coders/DefaultCoderTest.java
@@ -89,7 +89,8 @@ public class DefaultCoderTest {
 
   private static class OldCustomSerializableCoder extends SerializableCoder<OldCustomRecord> {
     // Extending SerializableCoder isn't trivial, but it can be done.
-    @Deprecated // old form using a Class
+
+    // Old form using a Class.
     @SuppressWarnings("unchecked")
     public static <T extends Serializable> SerializableCoder<T> of(Class<T> recordType) {
        checkArgument(OldCustomRecord.class.isAssignableFrom(recordType));

http://git-wip-us.apache.org/repos/asf/beam/blob/d2901145/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BoundedSourceRunner.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BoundedSourceRunner.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BoundedSourceRunner.java
index 977e803..4702e05 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BoundedSourceRunner.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/BoundedSourceRunner.java
@@ -114,9 +114,9 @@ public class BoundedSourceRunner<InputT extends BoundedSource<OutputT>, OutputT>
   }
 
   /**
-   * The runner harness is meant to send the source over the Beam Fn Data API which would be
-   * consumed by the {@link #runReadLoop}. Drop this method once the runner harness sends the
-   * source instead of unpacking it from the data block of the function specification.
+   * @deprecated The runner harness is meant to send the source over the Beam Fn Data API which
+   * would be consumed by the {@link #runReadLoop}. Drop this method once the runner harness sends
+   * the source instead of unpacking it from the data block of the function specification.
    */
   @Deprecated
   public void start() throws Exception {


[18/50] [abbrv] beam git commit: Splits large TextIOTest into TextIOReadTest and TextIOWriteTest

Posted by jb...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/d495d151/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOWriteTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOWriteTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOWriteTest.java
new file mode 100644
index 0000000..a73ed7d
--- /dev/null
+++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/io/TextIOWriteTest.java
@@ -0,0 +1,604 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.beam.sdk.io;
+
+import static com.google.common.base.MoreObjects.firstNonNull;
+import static org.apache.beam.sdk.TestUtils.LINES2_ARRAY;
+import static org.apache.beam.sdk.TestUtils.LINES_ARRAY;
+import static org.apache.beam.sdk.TestUtils.NO_LINES_ARRAY;
+import static org.apache.beam.sdk.transforms.display.DisplayDataMatchers.hasDisplayItem;
+import static org.hamcrest.Matchers.containsInAnyOrder;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+import com.google.common.base.Function;
+import com.google.common.base.Functions;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.collect.FluentIterable;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.nio.file.FileVisitResult;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.SimpleFileVisitor;
+import java.nio.file.attribute.BasicFileAttributes;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import javax.annotation.Nullable;
+import org.apache.beam.sdk.coders.AvroCoder;
+import org.apache.beam.sdk.coders.Coder;
+import org.apache.beam.sdk.coders.DefaultCoder;
+import org.apache.beam.sdk.coders.StringUtf8Coder;
+import org.apache.beam.sdk.io.FileBasedSink.WritableByteChannelFactory;
+import org.apache.beam.sdk.io.fs.MatchResult;
+import org.apache.beam.sdk.io.fs.MatchResult.Metadata;
+import org.apache.beam.sdk.io.fs.ResolveOptions;
+import org.apache.beam.sdk.io.fs.ResourceId;
+import org.apache.beam.sdk.options.PipelineOptions;
+import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.options.ValueProvider;
+import org.apache.beam.sdk.testing.NeedsRunner;
+import org.apache.beam.sdk.testing.TestPipeline;
+import org.apache.beam.sdk.transforms.Create;
+import org.apache.beam.sdk.transforms.SerializableFunction;
+import org.apache.beam.sdk.transforms.display.DisplayData;
+import org.apache.beam.sdk.util.CoderUtils;
+import org.apache.beam.sdk.values.PCollection;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+import org.junit.rules.ExpectedException;
+
+/** Tests for {@link TextIO.Write}. */
+public class TextIOWriteTest {
+  private static final String MY_HEADER = "myHeader";
+  private static final String MY_FOOTER = "myFooter";
+
+  private static Path tempFolder;
+
+  @Rule public TestPipeline p = TestPipeline.create();
+
+  @Rule public ExpectedException expectedException = ExpectedException.none();
+
+  @BeforeClass
+  public static void setupClass() throws IOException {
+    tempFolder = Files.createTempDirectory("TextIOTest");
+  }
+
+  @AfterClass
+  public static void teardownClass() throws IOException {
+    Files.walkFileTree(
+        tempFolder,
+        new SimpleFileVisitor<Path>() {
+          @Override
+          public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
+              throws IOException {
+            Files.delete(file);
+            return FileVisitResult.CONTINUE;
+          }
+
+          @Override
+          public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
+            Files.delete(dir);
+            return FileVisitResult.CONTINUE;
+          }
+        });
+  }
+
+  static class TestDynamicDestinations extends FileBasedSink.DynamicDestinations<String, String> {
+    ResourceId baseDir;
+
+    TestDynamicDestinations(ResourceId baseDir) {
+      this.baseDir = baseDir;
+    }
+
+    @Override
+    public String getDestination(String element) {
+      // Destination is based on first character of string.
+      return element.substring(0, 1);
+    }
+
+    @Override
+    public String getDefaultDestination() {
+      return "";
+    }
+
+    @Nullable
+    @Override
+    public Coder<String> getDestinationCoder() {
+      return StringUtf8Coder.of();
+    }
+
+    @Override
+    public FileBasedSink.FilenamePolicy getFilenamePolicy(String destination) {
+      return DefaultFilenamePolicy.fromStandardParameters(
+          ValueProvider.StaticValueProvider.of(
+              baseDir.resolve(
+                  "file_" + destination + ".txt",
+                  ResolveOptions.StandardResolveOptions.RESOLVE_FILE)),
+          null,
+          null,
+          false);
+    }
+  }
+
+  class StartsWith implements Predicate<String> {
+    String prefix;
+
+    StartsWith(String prefix) {
+      this.prefix = prefix;
+    }
+
+    @Override
+    public boolean apply(@Nullable String input) {
+      return input.startsWith(prefix);
+    }
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testDynamicDestinations() throws Exception {
+    ResourceId baseDir =
+        FileSystems.matchNewResource(
+            Files.createTempDirectory(tempFolder, "testDynamicDestinations").toString(), true);
+
+    List<String> elements = Lists.newArrayList("aaaa", "aaab", "baaa", "baab", "caaa", "caab");
+    PCollection<String> input = p.apply(Create.of(elements).withCoder(StringUtf8Coder.of()));
+    input.apply(
+        TextIO.write()
+            .to(new TestDynamicDestinations(baseDir))
+            .withTempDirectory(FileSystems.matchNewResource(baseDir.toString(), true)));
+    p.run();
+
+    assertOutputFiles(
+        Iterables.toArray(Iterables.filter(elements, new StartsWith("a")), String.class),
+        null,
+        null,
+        0,
+        baseDir.resolve("file_a.txt", ResolveOptions.StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+    assertOutputFiles(
+        Iterables.toArray(Iterables.filter(elements, new StartsWith("b")), String.class),
+        null,
+        null,
+        0,
+        baseDir.resolve("file_b.txt", ResolveOptions.StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+    assertOutputFiles(
+        Iterables.toArray(Iterables.filter(elements, new StartsWith("c")), String.class),
+        null,
+        null,
+        0,
+        baseDir.resolve("file_c.txt", ResolveOptions.StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+  }
+
+  @DefaultCoder(AvroCoder.class)
+  private static class UserWriteType {
+    String destination;
+    String metadata;
+
+    UserWriteType() {
+      this.destination = "";
+      this.metadata = "";
+    }
+
+    UserWriteType(String destination, String metadata) {
+      this.destination = destination;
+      this.metadata = metadata;
+    }
+
+    @Override
+    public String toString() {
+      return String.format("destination: %s metadata : %s", destination, metadata);
+    }
+  }
+
+  private static class SerializeUserWrite implements SerializableFunction<UserWriteType, String> {
+    @Override
+    public String apply(UserWriteType input) {
+      return input.toString();
+    }
+  }
+
+  private static class UserWriteDestination
+      implements SerializableFunction<UserWriteType, DefaultFilenamePolicy.Params> {
+    private ResourceId baseDir;
+
+    UserWriteDestination(ResourceId baseDir) {
+      this.baseDir = baseDir;
+    }
+
+    @Override
+    public DefaultFilenamePolicy.Params apply(UserWriteType input) {
+      return new DefaultFilenamePolicy.Params()
+          .withBaseFilename(
+              baseDir.resolve(
+                  "file_" + input.destination.substring(0, 1) + ".txt",
+                  ResolveOptions.StandardResolveOptions.RESOLVE_FILE));
+    }
+  }
+
+  private static class ExtractWriteDestination implements Function<UserWriteType, String> {
+    @Override
+    public String apply(@Nullable UserWriteType input) {
+      return input.destination;
+    }
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testDynamicDefaultFilenamePolicy() throws Exception {
+    ResourceId baseDir =
+        FileSystems.matchNewResource(
+            Files.createTempDirectory(tempFolder, "testDynamicDestinations").toString(), true);
+
+    List<UserWriteType> elements =
+        Lists.newArrayList(
+            new UserWriteType("aaaa", "first"),
+            new UserWriteType("aaab", "second"),
+            new UserWriteType("baaa", "third"),
+            new UserWriteType("baab", "fourth"),
+            new UserWriteType("caaa", "fifth"),
+            new UserWriteType("caab", "sixth"));
+    PCollection<UserWriteType> input = p.apply(Create.of(elements));
+    input.apply(
+        TextIO.writeCustomType(new SerializeUserWrite())
+            .to(new UserWriteDestination(baseDir), new DefaultFilenamePolicy.Params())
+            .withTempDirectory(FileSystems.matchNewResource(baseDir.toString(), true)));
+    p.run();
+
+    String[] aElements =
+        Iterables.toArray(
+            Iterables.transform(
+                Iterables.filter(
+                    elements,
+                    Predicates.compose(new StartsWith("a"), new ExtractWriteDestination())),
+                Functions.toStringFunction()),
+            String.class);
+    String[] bElements =
+        Iterables.toArray(
+            Iterables.transform(
+                Iterables.filter(
+                    elements,
+                    Predicates.compose(new StartsWith("b"), new ExtractWriteDestination())),
+                Functions.toStringFunction()),
+            String.class);
+    String[] cElements =
+        Iterables.toArray(
+            Iterables.transform(
+                Iterables.filter(
+                    elements,
+                    Predicates.compose(new StartsWith("c"), new ExtractWriteDestination())),
+                Functions.toStringFunction()),
+            String.class);
+    assertOutputFiles(
+        aElements,
+        null,
+        null,
+        0,
+        baseDir.resolve("file_a.txt", ResolveOptions.StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+    assertOutputFiles(
+        bElements,
+        null,
+        null,
+        0,
+        baseDir.resolve("file_b.txt", ResolveOptions.StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+    assertOutputFiles(
+        cElements,
+        null,
+        null,
+        0,
+        baseDir.resolve("file_c.txt", ResolveOptions.StandardResolveOptions.RESOLVE_FILE),
+        DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE);
+  }
+
+  private void runTestWrite(String[] elems) throws Exception {
+    runTestWrite(elems, null, null, 1);
+  }
+
+  private void runTestWrite(String[] elems, int numShards) throws Exception {
+    runTestWrite(elems, null, null, numShards);
+  }
+
+  private void runTestWrite(String[] elems, String header, String footer) throws Exception {
+    runTestWrite(elems, header, footer, 1);
+  }
+
+  private void runTestWrite(String[] elems, String header, String footer, int numShards)
+      throws Exception {
+    String outputName = "file.txt";
+    Path baseDir = Files.createTempDirectory(tempFolder, "testwrite");
+    ResourceId baseFilename =
+        FileBasedSink.convertToFileResourceIfPossible(baseDir.resolve(outputName).toString());
+
+    PCollection<String> input =
+        p.apply(Create.of(Arrays.asList(elems)).withCoder(StringUtf8Coder.of()));
+
+    TextIO.Write write = TextIO.write().to(baseFilename).withHeader(header).withFooter(footer);
+
+    if (numShards == 1) {
+      write = write.withoutSharding();
+    } else if (numShards > 0) {
+      write = write.withNumShards(numShards).withShardNameTemplate(ShardNameTemplate.INDEX_OF_MAX);
+    }
+
+    input.apply(write);
+
+    p.run();
+
+    assertOutputFiles(
+        elems,
+        header,
+        footer,
+        numShards,
+        baseFilename,
+        firstNonNull(
+            write.inner.getShardTemplate(),
+            DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE));
+  }
+
+  private static void assertOutputFiles(
+      String[] elems,
+      final String header,
+      final String footer,
+      int numShards,
+      ResourceId outputPrefix,
+      String shardNameTemplate)
+      throws Exception {
+    List<File> expectedFiles = new ArrayList<>();
+    if (numShards == 0) {
+      String pattern = outputPrefix.toString() + "*";
+      List<MatchResult> matches = FileSystems.match(Collections.singletonList(pattern));
+      for (Metadata expectedFile : Iterables.getOnlyElement(matches).metadata()) {
+        expectedFiles.add(new File(expectedFile.resourceId().toString()));
+      }
+    } else {
+      for (int i = 0; i < numShards; i++) {
+        expectedFiles.add(
+            new File(
+                DefaultFilenamePolicy.constructName(
+                    outputPrefix, shardNameTemplate, "", i, numShards, null, null)
+                    .toString()));
+      }
+    }
+
+    List<List<String>> actual = new ArrayList<>();
+
+    for (File tmpFile : expectedFiles) {
+      try (BufferedReader reader = new BufferedReader(new FileReader(tmpFile))) {
+        List<String> currentFile = new ArrayList<>();
+        while (true) {
+          String line = reader.readLine();
+          if (line == null) {
+            break;
+          }
+          currentFile.add(line);
+        }
+        actual.add(currentFile);
+      }
+    }
+
+    List<String> expectedElements = new ArrayList<>(elems.length);
+    for (String elem : elems) {
+      byte[] encodedElem = CoderUtils.encodeToByteArray(StringUtf8Coder.of(), elem);
+      String line = new String(encodedElem);
+      expectedElements.add(line);
+    }
+
+    List<String> actualElements =
+        Lists.newArrayList(
+            Iterables.concat(
+                FluentIterable.from(actual)
+                    .transform(removeHeaderAndFooter(header, footer))
+                    .toList()));
+
+    assertThat(actualElements, containsInAnyOrder(expectedElements.toArray()));
+
+    assertTrue(Iterables.all(actual, haveProperHeaderAndFooter(header, footer)));
+  }
+
+  private static Function<List<String>, List<String>> removeHeaderAndFooter(
+      final String header, final String footer) {
+    return new Function<List<String>, List<String>>() {
+      @Nullable
+      @Override
+      public List<String> apply(List<String> lines) {
+        ArrayList<String> newLines = Lists.newArrayList(lines);
+        if (header != null) {
+          newLines.remove(0);
+        }
+        if (footer != null) {
+          int last = newLines.size() - 1;
+          newLines.remove(last);
+        }
+        return newLines;
+      }
+    };
+  }
+
+  private static Predicate<List<String>> haveProperHeaderAndFooter(
+      final String header, final String footer) {
+    return new Predicate<List<String>>() {
+      @Override
+      public boolean apply(List<String> fileLines) {
+        int last = fileLines.size() - 1;
+        return (header == null || fileLines.get(0).equals(header))
+            && (footer == null || fileLines.get(last).equals(footer));
+      }
+    };
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testWriteStrings() throws Exception {
+    runTestWrite(LINES_ARRAY);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testWriteEmptyStringsNoSharding() throws Exception {
+    runTestWrite(NO_LINES_ARRAY, 0);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testWriteEmptyStrings() throws Exception {
+    runTestWrite(NO_LINES_ARRAY);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testShardedWrite() throws Exception {
+    runTestWrite(LINES_ARRAY, 5);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testWriteWithHeader() throws Exception {
+    runTestWrite(LINES_ARRAY, MY_HEADER, null);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testWriteWithFooter() throws Exception {
+    runTestWrite(LINES_ARRAY, null, MY_FOOTER);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testWriteWithHeaderAndFooter() throws Exception {
+    runTestWrite(LINES_ARRAY, MY_HEADER, MY_FOOTER);
+  }
+
+  @Test
+  @Category(NeedsRunner.class)
+  public void testWriteWithWritableByteChannelFactory() throws Exception {
+    Coder<String> coder = StringUtf8Coder.of();
+    String outputName = "file.txt";
+    ResourceId baseDir =
+        FileSystems.matchNewResource(
+            Files.createTempDirectory(tempFolder, "testwrite").toString(), true);
+
+    PCollection<String> input = p.apply(Create.of(Arrays.asList(LINES2_ARRAY)).withCoder(coder));
+
+    final WritableByteChannelFactory writableByteChannelFactory =
+        new DrunkWritableByteChannelFactory();
+    TextIO.Write write =
+        TextIO.write()
+            .to(
+                baseDir
+                    .resolve(outputName, ResolveOptions.StandardResolveOptions.RESOLVE_FILE)
+                    .toString())
+            .withoutSharding()
+            .withWritableByteChannelFactory(writableByteChannelFactory);
+    DisplayData displayData = DisplayData.from(write);
+    assertThat(displayData, hasDisplayItem("writableByteChannelFactory", "DRUNK"));
+
+    input.apply(write);
+
+    p.run();
+
+    final List<String> drunkElems = new ArrayList<>(LINES2_ARRAY.length * 2 + 2);
+    for (String elem : LINES2_ARRAY) {
+      drunkElems.add(elem);
+      drunkElems.add(elem);
+    }
+    assertOutputFiles(
+        drunkElems.toArray(new String[0]),
+        null,
+        null,
+        1,
+        baseDir.resolve(
+            outputName + writableByteChannelFactory.getSuggestedFilenameSuffix(),
+            ResolveOptions.StandardResolveOptions.RESOLVE_FILE),
+        write.inner.getShardTemplate());
+  }
+
+  @Test
+  public void testWriteDisplayData() {
+    TextIO.Write write =
+        TextIO.write()
+            .to("/foo")
+            .withSuffix("bar")
+            .withShardNameTemplate("-SS-of-NN-")
+            .withNumShards(100)
+            .withFooter("myFooter")
+            .withHeader("myHeader");
+
+    DisplayData displayData = DisplayData.from(write);
+
+    assertThat(displayData, hasDisplayItem("filePrefix", "/foo"));
+    assertThat(displayData, hasDisplayItem("fileSuffix", "bar"));
+    assertThat(displayData, hasDisplayItem("fileHeader", "myHeader"));
+    assertThat(displayData, hasDisplayItem("fileFooter", "myFooter"));
+    assertThat(displayData, hasDisplayItem("shardNameTemplate", "-SS-of-NN-"));
+    assertThat(displayData, hasDisplayItem("numShards", 100));
+    assertThat(displayData, hasDisplayItem("writableByteChannelFactory", "UNCOMPRESSED"));
+  }
+
+  @Test
+  public void testWriteDisplayDataValidateThenHeader() {
+    TextIO.Write write = TextIO.write().to("foo").withHeader("myHeader");
+
+    DisplayData displayData = DisplayData.from(write);
+
+    assertThat(displayData, hasDisplayItem("fileHeader", "myHeader"));
+  }
+
+  @Test
+  public void testWriteDisplayDataValidateThenFooter() {
+    TextIO.Write write = TextIO.write().to("foo").withFooter("myFooter");
+
+    DisplayData displayData = DisplayData.from(write);
+
+    assertThat(displayData, hasDisplayItem("fileFooter", "myFooter"));
+  }
+
+  @Test
+  public void testGetName() {
+    assertEquals("TextIO.Write", TextIO.write().to("somefile").getName());
+  }
+
+  /** Options for testing. */
+  public interface RuntimeTestOptions extends PipelineOptions {
+    ValueProvider<String> getOutput();
+    void setOutput(ValueProvider<String> value);
+  }
+
+  @Test
+  public void testRuntimeOptionsNotCalledInApply() throws Exception {
+    p.enableAbandonedNodeEnforcement(false);
+
+    RuntimeTestOptions options = PipelineOptionsFactory.as(RuntimeTestOptions.class);
+
+    p.apply(Create.of("")).apply(TextIO.write().to(options.getOutput()));
+  }
+}


[42/50] [abbrv] beam git commit: Increase the gRPC message size to max value

Posted by jb...@apache.org.
Increase the gRPC message size to max value


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/b424aa04
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/b424aa04
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/b424aa04

Branch: refs/heads/DSL_SQL
Commit: b424aa0409b507fe1c0c56a5f652d9be6458de66
Parents: 4d1db22
Author: Vikas Kedigehalli <vi...@google.com>
Authored: Tue Jul 18 10:06:46 2017 -0700
Committer: Luke Cwik <lc...@google.com>
Committed: Wed Jul 19 13:17:37 2017 -0700

----------------------------------------------------------------------
 .../beam/fn/harness/channel/ManagedChannelFactory.java       | 6 ++++++
 sdks/python/apache_beam/runners/worker/data_plane.py         | 8 +++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/b424aa04/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/channel/ManagedChannelFactory.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/channel/ManagedChannelFactory.java b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/channel/ManagedChannelFactory.java
index d26f4a5..3138bab 100644
--- a/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/channel/ManagedChannelFactory.java
+++ b/sdks/java/harness/src/main/java/org/apache/beam/fn/harness/channel/ManagedChannelFactory.java
@@ -61,6 +61,9 @@ public abstract class ManagedChannelFactory {
               ? EpollDomainSocketChannel.class : EpollSocketChannel.class)
           .eventLoopGroup(new EpollEventLoopGroup())
           .usePlaintext(true)
+          // Set the message size to max value here. The actual size is governed by the
+          // buffer size in the layers above.
+          .maxInboundMessageSize(Integer.MAX_VALUE)
           .build();
     }
   }
@@ -74,6 +77,9 @@ public abstract class ManagedChannelFactory {
     public ManagedChannel forDescriptor(ApiServiceDescriptor apiServiceDescriptor) {
       return ManagedChannelBuilder.forTarget(apiServiceDescriptor.getUrl())
           .usePlaintext(true)
+          // Set the message size to max value here. The actual size is governed by the
+          // buffer size in the layers above.
+          .maxInboundMessageSize(Integer.MAX_VALUE)
           .build();
     }
   }

http://git-wip-us.apache.org/repos/asf/beam/blob/b424aa04/sdks/python/apache_beam/runners/worker/data_plane.py
----------------------------------------------------------------------
diff --git a/sdks/python/apache_beam/runners/worker/data_plane.py b/sdks/python/apache_beam/runners/worker/data_plane.py
index 26f65ee..e713041 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane.py
@@ -269,7 +269,13 @@ class GrpcClientDataChannelFactory(DataChannelFactory):
     url = remote_grpc_port.api_service_descriptor.url
     if url not in self._data_channel_cache:
       logging.info('Creating channel for %s', url)
-      grpc_channel = grpc.insecure_channel(url)
+      grpc_channel = grpc.insecure_channel(
+          url,
+          # Options to have no limits (-1) on the size of the messages
+          # received or sent over the data plane. The actual buffer size is
+          # controlled in a layer above.
+          options=[("grpc.max_receive_message_length", -1),
+                   ("grpc.max_send_message_length", -1)])
       self._data_channel_cache[url] = GrpcClientDataChannel(
           beam_fn_api_pb2.BeamFnDataStub(grpc_channel))
     return self._data_channel_cache[url]


[02/50] [abbrv] beam git commit: [BEAM-2578] This closes #3535

Posted by jb...@apache.org.
[BEAM-2578] This closes #3535


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/ae0de1bb
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/ae0de1bb
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/ae0de1bb

Branch: refs/heads/DSL_SQL
Commit: ae0de1bb5f44ab39969442932c662ecde668bce3
Parents: 36c55eb 53ce582
Author: Jean-Baptiste Onofré <jb...@apache.org>
Authored: Sun Jul 16 21:52:57 2017 +0200
Committer: Jean-Baptiste Onofré <jb...@apache.org>
Committed: Sun Jul 16 21:52:57 2017 +0200

----------------------------------------------------------------------
 .../org/apache/beam/examples/DebuggingWordCountTest.java | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)
----------------------------------------------------------------------



[26/50] [abbrv] beam git commit: [BEAM-1963] Update Quickstart link in README

Posted by jb...@apache.org.
[BEAM-1963] Update Quickstart link in README


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/d14cef0c
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/d14cef0c
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/d14cef0c

Branch: refs/heads/DSL_SQL
Commit: d14cef0c8fd963db9865ba6a5aad647fdc6f954e
Parents: 2c2d8a3
Author: Mark Liu <ma...@google.com>
Authored: Tue Jul 18 12:25:42 2017 -0700
Committer: Ahmet Altay <al...@google.com>
Committed: Tue Jul 18 13:11:03 2017 -0700

----------------------------------------------------------------------
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/d14cef0c/README.md
----------------------------------------------------------------------
diff --git a/README.md b/README.md
index 52c056f..8190baf 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ Have ideas for new Runners? See the [JIRA](https://issues.apache.org/jira/browse
 
 ## Getting Started
 
-Please refer to the [Quickstart](http://beam.apache.org/get-started/quickstart/) available on our website.
+Please refer to the Quickstart[[Java](https://beam.apache.org/get-started/quickstart-java), [Python](https://beam.apache.org/get-started/quickstart-py)] available on our website.
 
 If you'd like to build and install the whole project from the source distribution, you may need some additional tools installed
 in your system. In a Debian-based distribution:
@@ -102,4 +102,4 @@ We also have a [contributor's guide](https://beam.apache.org/contribute/contribu
 
 * [Apache Beam](http://beam.apache.org)
 * [Overview](http://beam.apache.org/use/beam-overview/)
-* [Quickstart](http://beam.apache.org/use/quickstart/)
+* Quickstart: [Java](https://beam.apache.org/get-started/quickstart-java), [Python](https://beam.apache.org/get-started/quickstart-py)


[05/50] [abbrv] beam git commit: Fix split package in SDK harness

Posted by jb...@apache.org.
http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BeamFnDataWriteRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BeamFnDataWriteRunnerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BeamFnDataWriteRunnerTest.java
deleted file mode 100644
index 64d9ea6..0000000
--- a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BeamFnDataWriteRunnerTest.java
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.core;
-
-import static org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow;
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-import static org.mockito.Matchers.any;
-import static org.mockito.Matchers.eq;
-import static org.mockito.Mockito.verify;
-import static org.mockito.Mockito.verifyNoMoreInteractions;
-import static org.mockito.Mockito.verifyZeroInteractions;
-import static org.mockito.Mockito.when;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.base.Suppliers;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Multimap;
-import com.google.protobuf.Any;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.BytesValue;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.ServiceLoader;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicReference;
-import org.apache.beam.fn.harness.data.BeamFnDataClient;
-import org.apache.beam.fn.harness.fn.CloseableThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.fn.v1.BeamFnApi;
-import org.apache.beam.runners.core.PTransformRunnerFactory.Registrar;
-import org.apache.beam.runners.dataflow.util.CloudObjects;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.common.runner.v1.RunnerApi;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.KV;
-import org.hamcrest.collection.IsMapContaining;
-import org.junit.Before;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-import org.mockito.Matchers;
-import org.mockito.Mock;
-import org.mockito.MockitoAnnotations;
-
-/** Tests for {@link BeamFnDataWriteRunner}. */
-@RunWith(JUnit4.class)
-public class BeamFnDataWriteRunnerTest {
-
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-  private static final BeamFnApi.RemoteGrpcPort PORT_SPEC = BeamFnApi.RemoteGrpcPort.newBuilder()
-      .setApiServiceDescriptor(BeamFnApi.ApiServiceDescriptor.getDefaultInstance()).build();
-  private static final RunnerApi.FunctionSpec FUNCTION_SPEC = RunnerApi.FunctionSpec.newBuilder()
-      .setParameter(Any.pack(PORT_SPEC)).build();
-  private static final String CODER_ID = "string-coder-id";
-  private static final Coder<WindowedValue<String>> CODER =
-      WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE);
-  private static final RunnerApi.Coder CODER_SPEC;
-  private static final String URN = "urn:org.apache.beam:sink:runner:0.1";
-
-  static {
-    try {
-      CODER_SPEC = RunnerApi.Coder.newBuilder().setSpec(
-          RunnerApi.SdkFunctionSpec.newBuilder().setSpec(
-              RunnerApi.FunctionSpec.newBuilder().setParameter(
-                  Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(
-                      OBJECT_MAPPER.writeValueAsBytes(CloudObjects.asCloudObject(CODER))))
-                      .build()))
-                  .build())
-              .build())
-          .build();
-    } catch (IOException e) {
-      throw new ExceptionInInitializerError(e);
-    }
-  }
-  private static final BeamFnApi.Target OUTPUT_TARGET = BeamFnApi.Target.newBuilder()
-      .setPrimitiveTransformReference("1")
-      .setName("out")
-      .build();
-
-  @Mock private BeamFnDataClient mockBeamFnDataClient;
-
-  @Before
-  public void setUp() {
-    MockitoAnnotations.initMocks(this);
-  }
-
-
-  @Test
-  public void testCreatingAndProcessingBeamFnDataWriteRunner() throws Exception {
-    String bundleId = "57L";
-    String inputId = "100L";
-
-    Multimap<String, ThrowingConsumer<WindowedValue<?>>> consumers = HashMultimap.create();
-    List<ThrowingRunnable> startFunctions = new ArrayList<>();
-    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
-
-    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder()
-        .setUrn("urn:org.apache.beam:sink:runner:0.1")
-        .setParameter(Any.pack(PORT_SPEC))
-        .build();
-
-    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder()
-        .setSpec(functionSpec)
-        .putInputs(inputId, "inputPC")
-        .build();
-
-    new BeamFnDataWriteRunner.Factory<String>().createRunnerForPTransform(
-        PipelineOptionsFactory.create(),
-        mockBeamFnDataClient,
-        "ptransformId",
-        pTransform,
-        Suppliers.ofInstance(bundleId)::get,
-        ImmutableMap.of("inputPC",
-            RunnerApi.PCollection.newBuilder().setCoderId(CODER_ID).build()),
-        ImmutableMap.of(CODER_ID, CODER_SPEC),
-        consumers,
-        startFunctions::add,
-        finishFunctions::add);
-
-    verifyZeroInteractions(mockBeamFnDataClient);
-
-    List<WindowedValue<String>> outputValues = new ArrayList<>();
-    AtomicBoolean wasCloseCalled = new AtomicBoolean();
-    CloseableThrowingConsumer<WindowedValue<String>> outputConsumer =
-        new CloseableThrowingConsumer<WindowedValue<String>>(){
-          @Override
-          public void close() throws Exception {
-            wasCloseCalled.set(true);
-          }
-
-          @Override
-          public void accept(WindowedValue<String> t) throws Exception {
-            outputValues.add(t);
-          }
-        };
-
-    when(mockBeamFnDataClient.forOutboundConsumer(
-        any(),
-        any(),
-        Matchers.<Coder<WindowedValue<String>>>any())).thenReturn(outputConsumer);
-    Iterables.getOnlyElement(startFunctions).run();
-    verify(mockBeamFnDataClient).forOutboundConsumer(
-        eq(PORT_SPEC.getApiServiceDescriptor()),
-        eq(KV.of(bundleId, BeamFnApi.Target.newBuilder()
-            .setPrimitiveTransformReference("ptransformId")
-            .setName(inputId)
-            .build())),
-        eq(CODER));
-
-    assertThat(consumers.keySet(), containsInAnyOrder("inputPC"));
-    Iterables.getOnlyElement(consumers.get("inputPC")).accept(valueInGlobalWindow("TestValue"));
-    assertThat(outputValues, contains(valueInGlobalWindow("TestValue")));
-    outputValues.clear();
-
-    assertFalse(wasCloseCalled.get());
-    Iterables.getOnlyElement(finishFunctions).run();
-    assertTrue(wasCloseCalled.get());
-
-    verifyNoMoreInteractions(mockBeamFnDataClient);
-  }
-
-  @Test
-  public void testReuseForMultipleBundles() throws Exception {
-    RecordingConsumer<WindowedValue<String>> valuesA = new RecordingConsumer<>();
-    RecordingConsumer<WindowedValue<String>> valuesB = new RecordingConsumer<>();
-    when(mockBeamFnDataClient.forOutboundConsumer(
-        any(),
-        any(),
-        Matchers.<Coder<WindowedValue<String>>>any())).thenReturn(valuesA).thenReturn(valuesB);
-    AtomicReference<String> bundleId = new AtomicReference<>("0");
-    BeamFnDataWriteRunner<String> writeRunner = new BeamFnDataWriteRunner<>(
-        FUNCTION_SPEC,
-        bundleId::get,
-        OUTPUT_TARGET,
-        CODER_SPEC,
-        mockBeamFnDataClient);
-
-    // Process for bundle id 0
-    writeRunner.registerForOutput();
-
-    verify(mockBeamFnDataClient).forOutboundConsumer(
-        eq(PORT_SPEC.getApiServiceDescriptor()),
-        eq(KV.of(bundleId.get(), OUTPUT_TARGET)),
-        eq(CODER));
-
-    writeRunner.consume(valueInGlobalWindow("ABC"));
-    writeRunner.consume(valueInGlobalWindow("DEF"));
-    writeRunner.close();
-
-    assertTrue(valuesA.closed);
-    assertThat(valuesA, contains(valueInGlobalWindow("ABC"), valueInGlobalWindow("DEF")));
-
-    // Process for bundle id 1
-    bundleId.set("1");
-    valuesA.clear();
-    valuesB.clear();
-    writeRunner.registerForOutput();
-
-    verify(mockBeamFnDataClient).forOutboundConsumer(
-        eq(PORT_SPEC.getApiServiceDescriptor()),
-        eq(KV.of(bundleId.get(), OUTPUT_TARGET)),
-        eq(CODER));
-
-    writeRunner.consume(valueInGlobalWindow("GHI"));
-    writeRunner.consume(valueInGlobalWindow("JKL"));
-    writeRunner.close();
-
-    assertTrue(valuesB.closed);
-    assertThat(valuesB, contains(valueInGlobalWindow("GHI"), valueInGlobalWindow("JKL")));
-    verifyNoMoreInteractions(mockBeamFnDataClient);
-  }
-
-  private static class RecordingConsumer<T> extends ArrayList<T>
-      implements CloseableThrowingConsumer<T> {
-    private boolean closed;
-    @Override
-    public void close() throws Exception {
-      closed = true;
-    }
-
-    @Override
-    public void accept(T t) throws Exception {
-      if (closed) {
-        throw new IllegalStateException("Consumer is closed but attempting to consume " + t);
-      }
-      add(t);
-    }
-  }
-
-  @Test
-  public void testRegistration() {
-    for (Registrar registrar :
-        ServiceLoader.load(Registrar.class)) {
-      if (registrar instanceof BeamFnDataWriteRunner.Registrar) {
-        assertThat(registrar.getPTransformRunnerFactories(), IsMapContaining.hasKey(URN));
-        return;
-      }
-    }
-    fail("Expected registrar not found.");
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BoundedSourceRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BoundedSourceRunnerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BoundedSourceRunnerTest.java
deleted file mode 100644
index 6c9a4cb..0000000
--- a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/BoundedSourceRunnerTest.java
+++ /dev/null
@@ -1,187 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.core;
-
-import static org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow;
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.hamcrest.collection.IsEmptyCollection.empty;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.fail;
-
-import com.google.common.base.Suppliers;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Multimap;
-import com.google.protobuf.Any;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.BytesValue;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.ServiceLoader;
-import org.apache.beam.fn.harness.fn.ThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.runners.core.PTransformRunnerFactory.Registrar;
-import org.apache.beam.sdk.common.runner.v1.RunnerApi;
-import org.apache.beam.sdk.io.BoundedSource;
-import org.apache.beam.sdk.io.CountingSource;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.util.SerializableUtils;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.hamcrest.Matchers;
-import org.hamcrest.collection.IsMapContaining;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/** Tests for {@link BoundedSourceRunner}. */
-@RunWith(JUnit4.class)
-public class BoundedSourceRunnerTest {
-
-  public static final String URN = "urn:org.apache.beam:source:java:0.1";
-
-  @Test
-  public void testRunReadLoopWithMultipleSources() throws Exception {
-    List<WindowedValue<Long>> out1Values = new ArrayList<>();
-    List<WindowedValue<Long>> out2Values = new ArrayList<>();
-    Collection<ThrowingConsumer<WindowedValue<Long>>> consumers =
-        ImmutableList.of(out1Values::add, out2Values::add);
-
-    BoundedSourceRunner<BoundedSource<Long>, Long> runner = new BoundedSourceRunner<>(
-        PipelineOptionsFactory.create(),
-        RunnerApi.FunctionSpec.getDefaultInstance(),
-        consumers);
-
-    runner.runReadLoop(valueInGlobalWindow(CountingSource.upTo(2)));
-    runner.runReadLoop(valueInGlobalWindow(CountingSource.upTo(1)));
-
-    assertThat(out1Values,
-        contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(0L)));
-    assertThat(out2Values,
-        contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(0L)));
-  }
-
-  @Test
-  public void testRunReadLoopWithEmptySource() throws Exception {
-    List<WindowedValue<Long>> outValues = new ArrayList<>();
-    Collection<ThrowingConsumer<WindowedValue<Long>>> consumers =
-        ImmutableList.of(outValues::add);
-
-    BoundedSourceRunner<BoundedSource<Long>, Long> runner = new BoundedSourceRunner<>(
-        PipelineOptionsFactory.create(),
-        RunnerApi.FunctionSpec.getDefaultInstance(),
-        consumers);
-
-    runner.runReadLoop(valueInGlobalWindow(CountingSource.upTo(0)));
-
-    assertThat(outValues, empty());
-  }
-
-  @Test
-  public void testStart() throws Exception {
-    List<WindowedValue<Long>> outValues = new ArrayList<>();
-    Collection<ThrowingConsumer<WindowedValue<Long>>> consumers =
-        ImmutableList.of(outValues::add);
-
-    ByteString encodedSource =
-        ByteString.copyFrom(SerializableUtils.serializeToByteArray(CountingSource.upTo(3)));
-
-    BoundedSourceRunner<BoundedSource<Long>, Long> runner = new BoundedSourceRunner<>(
-        PipelineOptionsFactory.create(),
-        RunnerApi.FunctionSpec.newBuilder().setParameter(
-            Any.pack(BytesValue.newBuilder().setValue(encodedSource).build())).build(),
-        consumers);
-
-    runner.start();
-
-    assertThat(outValues,
-        contains(valueInGlobalWindow(0L), valueInGlobalWindow(1L), valueInGlobalWindow(2L)));
-  }
-
-  @Test
-  public void testCreatingAndProcessingSourceFromFactory() throws Exception {
-    List<WindowedValue<String>> outputValues = new ArrayList<>();
-
-    Multimap<String, ThrowingConsumer<WindowedValue<?>>> consumers = HashMultimap.create();
-    consumers.put("outputPC",
-        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) outputValues::add);
-    List<ThrowingRunnable> startFunctions = new ArrayList<>();
-    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
-
-    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder()
-        .setUrn("urn:org.apache.beam:source:java:0.1")
-        .setParameter(Any.pack(BytesValue.newBuilder()
-            .setValue(ByteString.copyFrom(
-                SerializableUtils.serializeToByteArray(CountingSource.upTo(3))))
-            .build()))
-        .build();
-
-    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder()
-        .setSpec(functionSpec)
-        .putInputs("input", "inputPC")
-        .putOutputs("output", "outputPC")
-        .build();
-
-    new BoundedSourceRunner.Factory<>().createRunnerForPTransform(
-        PipelineOptionsFactory.create(),
-        null /* beamFnDataClient */,
-        "pTransformId",
-        pTransform,
-        Suppliers.ofInstance("57L")::get,
-        ImmutableMap.of(),
-        ImmutableMap.of(),
-        consumers,
-        startFunctions::add,
-        finishFunctions::add);
-
-    // This is testing a deprecated way of running sources and should be removed
-    // once all source definitions are instead propagated along the input edge.
-    Iterables.getOnlyElement(startFunctions).run();
-    assertThat(outputValues, contains(
-        valueInGlobalWindow(0L),
-        valueInGlobalWindow(1L),
-        valueInGlobalWindow(2L)));
-    outputValues.clear();
-
-    // Check that when passing a source along as an input, the source is processed.
-    assertThat(consumers.keySet(), containsInAnyOrder("inputPC", "outputPC"));
-    Iterables.getOnlyElement(consumers.get("inputPC")).accept(
-        valueInGlobalWindow(CountingSource.upTo(2)));
-    assertThat(outputValues, contains(
-        valueInGlobalWindow(0L),
-        valueInGlobalWindow(1L)));
-
-    assertThat(finishFunctions, Matchers.empty());
-  }
-
-  @Test
-  public void testRegistration() {
-    for (Registrar registrar :
-        ServiceLoader.load(Registrar.class)) {
-      if (registrar instanceof BoundedSourceRunner.Registrar) {
-        assertThat(registrar.getPTransformRunnerFactories(), IsMapContaining.hasKey(URN));
-        return;
-      }
-    }
-    fail("Expected registrar not found.");
-  }
-}

http://git-wip-us.apache.org/repos/asf/beam/blob/f1b4700f/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java
----------------------------------------------------------------------
diff --git a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java b/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java
deleted file mode 100644
index c4df77a..0000000
--- a/sdks/java/harness/src/test/java/org/apache/beam/runners/core/FnApiDoFnRunnerTest.java
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.beam.runners.core;
-
-import static org.apache.beam.sdk.util.WindowedValue.timestampedValueInGlobalWindow;
-import static org.apache.beam.sdk.util.WindowedValue.valueInGlobalWindow;
-import static org.hamcrest.Matchers.contains;
-import static org.hamcrest.Matchers.containsInAnyOrder;
-import static org.junit.Assert.assertThat;
-import static org.junit.Assert.fail;
-
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.google.common.base.Suppliers;
-import com.google.common.collect.HashMultimap;
-import com.google.common.collect.ImmutableList;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Iterables;
-import com.google.common.collect.Multimap;
-import com.google.protobuf.Any;
-import com.google.protobuf.ByteString;
-import com.google.protobuf.BytesValue;
-import com.google.protobuf.Message;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.ServiceLoader;
-import org.apache.beam.fn.harness.fn.ThrowingConsumer;
-import org.apache.beam.fn.harness.fn.ThrowingRunnable;
-import org.apache.beam.runners.core.PTransformRunnerFactory.Registrar;
-import org.apache.beam.runners.core.construction.ParDoTranslation;
-import org.apache.beam.runners.dataflow.util.CloudObjects;
-import org.apache.beam.runners.dataflow.util.DoFnInfo;
-import org.apache.beam.sdk.coders.Coder;
-import org.apache.beam.sdk.coders.StringUtf8Coder;
-import org.apache.beam.sdk.common.runner.v1.RunnerApi;
-import org.apache.beam.sdk.options.PipelineOptionsFactory;
-import org.apache.beam.sdk.transforms.DoFn;
-import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
-import org.apache.beam.sdk.transforms.windowing.GlobalWindow;
-import org.apache.beam.sdk.util.SerializableUtils;
-import org.apache.beam.sdk.util.WindowedValue;
-import org.apache.beam.sdk.values.TupleTag;
-import org.apache.beam.sdk.values.WindowingStrategy;
-import org.hamcrest.collection.IsMapContaining;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.junit.runners.JUnit4;
-
-/** Tests for {@link FnApiDoFnRunner}. */
-@RunWith(JUnit4.class)
-public class FnApiDoFnRunnerTest {
-
-  private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
-  private static final Coder<WindowedValue<String>> STRING_CODER =
-      WindowedValue.getFullCoder(StringUtf8Coder.of(), GlobalWindow.Coder.INSTANCE);
-  private static final String STRING_CODER_SPEC_ID = "999L";
-  private static final RunnerApi.Coder STRING_CODER_SPEC;
-
-  static {
-    try {
-      STRING_CODER_SPEC = RunnerApi.Coder.newBuilder()
-          .setSpec(RunnerApi.SdkFunctionSpec.newBuilder()
-              .setSpec(RunnerApi.FunctionSpec.newBuilder()
-                  .setParameter(Any.pack(BytesValue.newBuilder().setValue(ByteString.copyFrom(
-                      OBJECT_MAPPER.writeValueAsBytes(CloudObjects.asCloudObject(STRING_CODER))))
-                      .build())))
-              .build())
-          .build();
-    } catch (IOException e) {
-      throw new ExceptionInInitializerError(e);
-    }
-  }
-
-  private static class TestDoFn extends DoFn<String, String> {
-    private static final TupleTag<String> mainOutput = new TupleTag<>("mainOutput");
-    private static final TupleTag<String> additionalOutput = new TupleTag<>("output");
-
-    private BoundedWindow window;
-
-    @ProcessElement
-    public void processElement(ProcessContext context, BoundedWindow window) {
-      context.output("MainOutput" + context.element());
-      context.output(additionalOutput, "AdditionalOutput" + context.element());
-      this.window = window;
-    }
-
-    @FinishBundle
-    public void finishBundle(FinishBundleContext context) {
-      if (window != null) {
-        context.output("FinishBundle", window.maxTimestamp(), window);
-        window = null;
-      }
-    }
-  }
-
-  /**
-   * Create a DoFn that has 3 inputs (inputATarget1, inputATarget2, inputBTarget) and 2 outputs
-   * (mainOutput, output). Validate that inputs are fed to the {@link DoFn} and that outputs
-   * are directed to the correct consumers.
-   */
-  @Test
-  public void testCreatingAndProcessingDoFn() throws Exception {
-    Map<String, Message> fnApiRegistry = ImmutableMap.of(STRING_CODER_SPEC_ID, STRING_CODER_SPEC);
-    String pTransformId = "pTransformId";
-    String mainOutputId = "101";
-    String additionalOutputId = "102";
-
-    DoFnInfo<?, ?> doFnInfo = DoFnInfo.forFn(
-        new TestDoFn(),
-        WindowingStrategy.globalDefault(),
-        ImmutableList.of(),
-        StringUtf8Coder.of(),
-        Long.parseLong(mainOutputId),
-        ImmutableMap.of(
-            Long.parseLong(mainOutputId), TestDoFn.mainOutput,
-            Long.parseLong(additionalOutputId), TestDoFn.additionalOutput));
-    RunnerApi.FunctionSpec functionSpec = RunnerApi.FunctionSpec.newBuilder()
-        .setUrn(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN)
-        .setParameter(Any.pack(BytesValue.newBuilder()
-            .setValue(ByteString.copyFrom(SerializableUtils.serializeToByteArray(doFnInfo)))
-            .build()))
-        .build();
-    RunnerApi.PTransform pTransform = RunnerApi.PTransform.newBuilder()
-        .setSpec(functionSpec)
-        .putInputs("inputA", "inputATarget")
-        .putInputs("inputB", "inputBTarget")
-        .putOutputs(mainOutputId, "mainOutputTarget")
-        .putOutputs(additionalOutputId, "additionalOutputTarget")
-        .build();
-
-    List<WindowedValue<String>> mainOutputValues = new ArrayList<>();
-    List<WindowedValue<String>> additionalOutputValues = new ArrayList<>();
-    Multimap<String, ThrowingConsumer<WindowedValue<?>>> consumers = HashMultimap.create();
-    consumers.put("mainOutputTarget",
-        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) mainOutputValues::add);
-    consumers.put("additionalOutputTarget",
-        (ThrowingConsumer) (ThrowingConsumer<WindowedValue<String>>) additionalOutputValues::add);
-    List<ThrowingRunnable> startFunctions = new ArrayList<>();
-    List<ThrowingRunnable> finishFunctions = new ArrayList<>();
-
-    new FnApiDoFnRunner.Factory<>().createRunnerForPTransform(
-        PipelineOptionsFactory.create(),
-        null /* beamFnDataClient */,
-        pTransformId,
-        pTransform,
-        Suppliers.ofInstance("57L")::get,
-        ImmutableMap.of(),
-        ImmutableMap.of(),
-        consumers,
-        startFunctions::add,
-        finishFunctions::add);
-
-    Iterables.getOnlyElement(startFunctions).run();
-    mainOutputValues.clear();
-
-    assertThat(consumers.keySet(), containsInAnyOrder(
-        "inputATarget", "inputBTarget", "mainOutputTarget", "additionalOutputTarget"));
-
-    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("A1"));
-    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("A2"));
-    Iterables.getOnlyElement(consumers.get("inputATarget")).accept(valueInGlobalWindow("B"));
-    assertThat(mainOutputValues, contains(
-        valueInGlobalWindow("MainOutputA1"),
-        valueInGlobalWindow("MainOutputA2"),
-        valueInGlobalWindow("MainOutputB")));
-    assertThat(additionalOutputValues, contains(
-        valueInGlobalWindow("AdditionalOutputA1"),
-        valueInGlobalWindow("AdditionalOutputA2"),
-        valueInGlobalWindow("AdditionalOutputB")));
-    mainOutputValues.clear();
-    additionalOutputValues.clear();
-
-    Iterables.getOnlyElement(finishFunctions).run();
-    assertThat(
-        mainOutputValues,
-        contains(
-            timestampedValueInGlobalWindow("FinishBundle", GlobalWindow.INSTANCE.maxTimestamp())));
-    mainOutputValues.clear();
-  }
-
-  @Test
-  public void testRegistration() {
-    for (Registrar registrar :
-        ServiceLoader.load(Registrar.class)) {
-      if (registrar instanceof FnApiDoFnRunner.Registrar) {
-        assertThat(registrar.getPTransformRunnerFactories(),
-            IsMapContaining.hasKey(ParDoTranslation.CUSTOM_JAVA_DO_FN_URN));
-        return;
-      }
-    }
-    fail("Expected registrar not found.");
-  }
-}


[17/50] [abbrv] beam git commit: Make Dataflow Counter Name Parsing more Robust

Posted by jb...@apache.org.
Make Dataflow Counter Name Parsing more Robust


Project: http://git-wip-us.apache.org/repos/asf/beam/repo
Commit: http://git-wip-us.apache.org/repos/asf/beam/commit/c5ebbff5
Tree: http://git-wip-us.apache.org/repos/asf/beam/tree/c5ebbff5
Diff: http://git-wip-us.apache.org/repos/asf/beam/diff/c5ebbff5

Branch: refs/heads/DSL_SQL
Commit: c5ebbff584834d16e3aff9859c90122cf9ed5ef2
Parents: 04d364d
Author: Pablo <pa...@google.com>
Authored: Wed Jun 28 15:20:53 2017 -0700
Committer: Thomas Groh <tg...@google.com>
Committed: Mon Jul 17 16:01:38 2017 -0700

----------------------------------------------------------------------
 .../beam/runners/dataflow/DataflowMetrics.java  | 30 +++++++----
 .../runners/dataflow/DataflowMetricsTest.java   | 53 +++++++++++++++-----
 2 files changed, 59 insertions(+), 24 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/beam/blob/c5ebbff5/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowMetrics.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowMetrics.java b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowMetrics.java
index 31b6cda..330cc7e 100644
--- a/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowMetrics.java
+++ b/runners/google-cloud-dataflow-java/src/main/java/org/apache/beam/runners/dataflow/DataflowMetrics.java
@@ -79,9 +79,14 @@ class DataflowMetrics extends MetricResults {
   private MetricKey metricHashKey(
       com.google.api.services.dataflow.model.MetricUpdate metricUpdate) {
     String fullStepName = metricUpdate.getName().getContext().get("step");
-    fullStepName = (dataflowPipelineJob.transformStepNames != null
-        ? dataflowPipelineJob.transformStepNames
-        .inverse().get(fullStepName).getFullName() : fullStepName);
+    if (dataflowPipelineJob.transformStepNames == null
+        || !dataflowPipelineJob.transformStepNames.inverse().containsKey(fullStepName)) {
+      // If we can't translate internal step names to user step names, we just skip them
+      // altogether.
+      return null;
+    }
+    fullStepName = dataflowPipelineJob.transformStepNames
+        .inverse().get(fullStepName).getFullName();
     return MetricKey.create(
         fullStepName,
         MetricName.named(
@@ -119,15 +124,18 @@ class DataflowMetrics extends MetricResults {
     // If the Context of the metric update does not have a namespace, then these are not
     // actual metrics counters.
     for (com.google.api.services.dataflow.model.MetricUpdate update : metricUpdates) {
-      if (Objects.equal(update.getName().getOrigin(), "user") && isMetricTentative(update)
+      if (Objects.equal(update.getName().getOrigin(), "user")
           && update.getName().getContext().containsKey("namespace")) {
-        tentativeByName.put(metricHashKey(update), update);
-        metricHashKeys.add(metricHashKey(update));
-      } else if (Objects.equal(update.getName().getOrigin(), "user")
-          && update.getName().getContext().containsKey("namespace")
-          && !isMetricTentative(update)) {
-        committedByName.put(metricHashKey(update), update);
-        metricHashKeys.add(metricHashKey(update));
+        MetricKey key = metricHashKey(update);
+        if (key == null) {
+          continue;
+        }
+        metricHashKeys.add(key);
+        if (isMetricTentative(update)) {
+          tentativeByName.put(key, update);
+        } else {
+          committedByName.put(key, update);
+        }
       }
     }
     // Create the lists with the metric result information.

http://git-wip-us.apache.org/repos/asf/beam/blob/c5ebbff5/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowMetricsTest.java
----------------------------------------------------------------------
diff --git a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowMetricsTest.java b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowMetricsTest.java
index 85a0979..c3c741c 100644
--- a/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowMetricsTest.java
+++ b/runners/google-cloud-dataflow-java/src/test/java/org/apache/beam/runners/dataflow/DataflowMetricsTest.java
@@ -33,6 +33,7 @@ import com.google.api.services.dataflow.model.Job;
 import com.google.api.services.dataflow.model.JobMetrics;
 import com.google.api.services.dataflow.model.MetricStructuredName;
 import com.google.api.services.dataflow.model.MetricUpdate;
+import com.google.common.collect.HashBiMap;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.ImmutableMap;
 import java.io.IOException;
@@ -42,6 +43,7 @@ import org.apache.beam.sdk.extensions.gcp.auth.TestCredential;
 import org.apache.beam.sdk.extensions.gcp.storage.NoopPathValidator;
 import org.apache.beam.sdk.metrics.MetricQueryResults;
 import org.apache.beam.sdk.options.PipelineOptionsFactory;
+import org.apache.beam.sdk.runners.AppliedPTransform;
 import org.junit.Before;
 import org.junit.Test;
 import org.junit.runner.RunWith;
@@ -155,6 +157,11 @@ public class DataflowMetricsTest {
     when(job.getState()).thenReturn(State.RUNNING);
     job.jobId = JOB_ID;
 
+    AppliedPTransform<?, ?, ?> myStep = mock(AppliedPTransform.class);
+    when(myStep.getFullName()).thenReturn("myStepName");
+    job.transformStepNames = HashBiMap.create();
+    job.transformStepNames.put(myStep, "s2");
+
     MetricUpdate update = new MetricUpdate();
     long stepValue = 1234L;
     update.setScalar(new BigDecimal(stepValue));
@@ -172,9 +179,9 @@ public class DataflowMetricsTest {
     DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
     MetricQueryResults result = dataflowMetrics.queryMetrics(null);
     assertThat(result.counters(), containsInAnyOrder(
-        attemptedMetricsResult("counterNamespace", "counterName", "s2", 1233L)));
+        attemptedMetricsResult("counterNamespace", "counterName", "myStepName", 1233L)));
     assertThat(result.counters(), containsInAnyOrder(
-        committedMetricsResult("counterNamespace", "counterName", "s2", 1234L)));
+        committedMetricsResult("counterNamespace", "counterName", "myStepName", 1234L)));
   }
 
   @Test
@@ -186,20 +193,25 @@ public class DataflowMetricsTest {
     when(job.getState()).thenReturn(State.RUNNING);
     job.jobId = JOB_ID;
 
+    AppliedPTransform<?, ?, ?> myStep = mock(AppliedPTransform.class);
+    when(myStep.getFullName()).thenReturn("myStepName");
+    job.transformStepNames = HashBiMap.create();
+    job.transformStepNames.put(myStep, "s2");
+
     // The parser relies on the fact that one tentative and one committed metric update exist in
     // the job metrics results.
     jobMetrics.setMetrics(ImmutableList.of(
         makeCounterMetricUpdate("counterName", "counterNamespace", "s2", 1233L, false),
         makeCounterMetricUpdate("counterName", "counterNamespace", "s2", 1234L, true),
-        makeCounterMetricUpdate("otherCounter[MIN]", "otherNamespace", "s3", 0L, false),
-        makeCounterMetricUpdate("otherCounter[MIN]", "otherNamespace", "s3", 0L, true)));
+        makeCounterMetricUpdate("otherCounter[MIN]", "otherNamespace", "s2", 0L, false),
+        makeCounterMetricUpdate("otherCounter[MIN]", "otherNamespace", "s2", 0L, true)));
 
     DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
     MetricQueryResults result = dataflowMetrics.queryMetrics(null);
     assertThat(result.counters(), containsInAnyOrder(
-        attemptedMetricsResult("counterNamespace", "counterName", "s2", 1234L)));
+        attemptedMetricsResult("counterNamespace", "counterName", "myStepName", 1234L)));
     assertThat(result.counters(), containsInAnyOrder(
-        committedMetricsResult("counterNamespace", "counterName", "s2", 1233L)));
+        committedMetricsResult("counterNamespace", "counterName", "myStepName", 1233L)));
   }
 
   @Test
@@ -211,6 +223,18 @@ public class DataflowMetricsTest {
     when(job.getState()).thenReturn(State.RUNNING);
     job.jobId = JOB_ID;
 
+    AppliedPTransform<?, ?, ?> myStep2 = mock(AppliedPTransform.class);
+    when(myStep2.getFullName()).thenReturn("myStepName");
+    job.transformStepNames = HashBiMap.create();
+    job.transformStepNames.put(myStep2, "s2");
+    AppliedPTransform<?, ?, ?> myStep3 = mock(AppliedPTransform.class);
+    when(myStep3.getFullName()).thenReturn("myStepName3");
+    job.transformStepNames.put(myStep3, "s3");
+    AppliedPTransform<?, ?, ?> myStep4 = mock(AppliedPTransform.class);
+    when(myStep4.getFullName()).thenReturn("myStepName4");
+    job.transformStepNames.put(myStep4, "s4");
+
+
     // The parser relies on the fact that one tentative and one committed metric update exist in
     // the job metrics results.
     jobMetrics.setMetrics(ImmutableList.of(
@@ -219,17 +243,20 @@ public class DataflowMetricsTest {
         makeCounterMetricUpdate("otherCounter", "otherNamespace", "s3", 12L, false),
         makeCounterMetricUpdate("otherCounter", "otherNamespace", "s3", 12L, true),
         makeCounterMetricUpdate("counterName", "otherNamespace", "s4", 1200L, false),
-        makeCounterMetricUpdate("counterName", "otherNamespace", "s4", 1233L, true)));
+        makeCounterMetricUpdate("counterName", "otherNamespace", "s4", 1233L, true),
+        // The following counter can not have its name translated thus it won't appear.
+        makeCounterMetricUpdate("lostName", "otherNamespace", "s5", 1200L, false),
+        makeCounterMetricUpdate("lostName", "otherNamespace", "s5", 1200L, true)));
 
     DataflowMetrics dataflowMetrics = new DataflowMetrics(job, dataflowClient);
     MetricQueryResults result = dataflowMetrics.queryMetrics(null);
     assertThat(result.counters(), containsInAnyOrder(
-        attemptedMetricsResult("counterNamespace", "counterName", "s2", 1234L),
-        attemptedMetricsResult("otherNamespace", "otherCounter", "s3", 12L),
-        attemptedMetricsResult("otherNamespace", "counterName", "s4", 1233L)));
+        attemptedMetricsResult("counterNamespace", "counterName", "myStepName", 1234L),
+        attemptedMetricsResult("otherNamespace", "otherCounter", "myStepName3", 12L),
+        attemptedMetricsResult("otherNamespace", "counterName", "myStepName4", 1233L)));
     assertThat(result.counters(), containsInAnyOrder(
-        committedMetricsResult("counterNamespace", "counterName", "s2", 1233L),
-        committedMetricsResult("otherNamespace", "otherCounter", "s3", 12L),
-        committedMetricsResult("otherNamespace", "counterName", "s4", 1200L)));
+        committedMetricsResult("counterNamespace", "counterName", "myStepName", 1233L),
+        committedMetricsResult("otherNamespace", "otherCounter", "myStepName3", 12L),
+        committedMetricsResult("otherNamespace", "counterName", "myStepName4", 1200L)));
   }
 }